Yarn 介绍

配置

在所有节点上修改 mapred-site.xml 和 yarn-site.xml 配置文件

配置 mapred-site.xml

<configuration>
          <!-- 设置执行 MapReduce Job 框架,比如 local, classic 或 yarn -->
        <property>
             <name>mapreduce.framework.name</name>
             <value>yarn</value>
        </property>
</configuration>

更多参数参考官网文档: https://hadoop.apache.org/docs/r2.8.0/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml

配置 yarn-site.xml

<configuration>
    <!-- 配置成 mapreduce_shuffle 才能运行 map reduce 程序  -->
    <property>
            <name>yarn.nodemanager.aux-services</name>
            <value>mapreduce_shuffle</value>
    </property>
    <!-- resourcemanager 地址 -->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>192.168.56.101</value>
    </property>
</configuration>

更多参数参考:https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-common/yarn-default.xml

登录 192.168.56.101 节点启动 ResourceManager

$ ./sbin/yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /home/hadoop/hadoop-2.8.1/logs/yarn-root-resourcemanager-vm1.out

登录 192.168.56.107 ~ 109 节点启动 NodeManager

$  ./sbin/yarn-daemon.sh start nodemanager
starting nodemanager, logging to /home/hadoop/hadoop-2.8.1/logs/yarn-root-nodemanager-vm7.out

配置启动 HistoryServer

登录 192.168.56.101,修改 mapred-site.xml

<configuration>
    <property>
          <name>mapreduce.jobhistory.webapp.address</name>
          <value>192.168.56.101:19888</value>
      </property>

    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>192.168.56.101:10020</value>
    </property>

    <property>
        <name>mapreduce.jobhistory.intermediate-done-dir</name>
        <value>/mr-history/tmp</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.done-dir</name>
        <value>/mr-history/done</value>
    </property>
</configuration>

启动

$ ./sbin/mr-jobhistory-daemon.sh start historyserver
starting historyserver, logging to /home/hadoop/hadoop-2.8.1/logs/mapred-root-historyserver-vm1.out
$ ./bin/hdfs dfs -ls /mr-history
Found 2 items
drwxrwx---   - root supergroup          0 2017-10-21 19:38 /mr-history/done
drwxrwxrwt   - root supergroup          0 2017-10-21 19:38 /mr-history/tmp
    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>2.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.8.1</version>
        </dependency>
    </dependencies>
package mrtest;

import java.io.File;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class BookWordCounter {

    public static class WordCounterMapper extends Mapper<Object, Text, Text, IntWritable> {
        // Map 阶段阶输出值
        private final static IntWritable outputValue = new IntWritable(1);
        // Map 阶段阶输出键,即每一个单词
        private Text outputKey = new Text();

        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            StringTokenizer tokenizer = new StringTokenizer(value.toString());
            while (tokenizer.hasMoreTokens()) {
                String eachWord = tokenizer.nextToken();
                // 清除标点符号和数字
                eachWord = eachWord.replaceAll("[\\pP\\p{Punct}0-9]", "").trim();
                outputKey.set(eachWord);

                context.write(outputKey, outputValue);
            }
        }
    }

    public static class WordCounterReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable str : values) {
                sum += str.get();
            }
            context.write(key, new IntWritable(sum));
        }

    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();

        // 创建作业实例
        Job job = Job.getInstance(conf, "BookWordCounter");
        job.setJarByClass(BookWordCounter.class);
        job.setMapperClass(WordCounterMapper.class);
        job.setReducerClass(WordCounterReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // 如果是本地调试,在这里设置本地文件路径,如果在 hadoop 集群中运行,这里设置 hdfs 中的文件路径
        // 指定输入文件
        FileInputFormat.setInputPaths(job, new Path(args[0]));

        // 设置输出文件
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        // //开始运行
        boolean r = job.waitForCompletion(true);
        System.out.println(r);
    }
$ mvn package
$ ./bin/yarn jar /mnt/vm_share/mrtest-0.0.1-SNAPSHOT.jar mrtest.BookWordCounter  /some_book_en_for_mr.txt /some_book_en_for_mr_result
17/10/21 21:13:20 INFO client.RMProxy: Connecting to ResourceManager at /192.168.56.101:8032
17/10/21 21:13:21 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
17/10/21 21:13:21 INFO input.FileInputFormat: Total input files to process : 1
17/10/21 21:13:21 INFO mapreduce.JobSubmitter: number of splits:11
17/10/21 21:13:21 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1508591572882_0001
17/10/21 21:13:22 INFO impl.YarnClientImpl: Submitted application application_1508591572882_0001
17/10/21 21:13:22 INFO mapreduce.Job: The url to track the job: http://vm1:8088/proxy/application_1508591572882_0001/
17/10/21 21:13:22 INFO mapreduce.Job: Running job: job_1508591572882_0001
17/10/21 21:13:33 INFO mapreduce.Job: Job job_1508591572882_0001 running in uber mode : false
17/10/21 21:13:33 INFO mapreduce.Job:  map 0% reduce 0%
17/10/21 21:14:13 INFO mapreduce.Job:  map 1% reduce 0%
17/10/21 21:14:14 INFO mapreduce.Job:  map 2% reduce 0%
17/10/21 21:14:16 INFO mapreduce.Job:  map 3% reduce 0%
17/10/21 21:14:17 INFO mapreduce.Job:  map 4% reduce 0%
17/10/21 21:14:20 INFO mapreduce.Job:  map 5% reduce 0%
17/10/21 21:14:21 INFO mapreduce.Job:  map 6% reduce 0%
17/10/21 21:14:23 INFO mapreduce.Job:  map 8% reduce 0%
17/10/21 21:14:24 INFO mapreduce.Job:  map 9% reduce 0%
17/10/21 21:14:26 INFO mapreduce.Job:  map 10% reduce 0%
17/10/21 21:14:28 INFO mapreduce.Job:  map 11% reduce 0%
17/10/21 21:14:30 INFO mapreduce.Job:  map 16% reduce 0%
17/10/21 21:14:32 INFO mapreduce.Job:  map 17% reduce 0%
17/10/21 21:14:33 INFO mapreduce.Job:  map 18% reduce 0%
17/10/21 21:14:34 INFO mapreduce.Job:  map 25% reduce 0%
17/10/21 21:14:36 INFO mapreduce.Job:  map 26% reduce 0%
17/10/21 21:14:38 INFO mapreduce.Job:  map 28% reduce 0%
17/10/21 21:14:40 INFO mapreduce.Job:  map 31% reduce 0%
17/10/21 21:14:42 INFO mapreduce.Job:  map 35% reduce 0%
17/10/21 21:14:45 INFO mapreduce.Job:  map 36% reduce 0%
17/10/21 21:14:47 INFO mapreduce.Job:  map 37% reduce 0%
17/10/21 21:14:48 INFO mapreduce.Job:  map 39% reduce 0%
17/10/21 21:14:49 INFO mapreduce.Job:  map 40% reduce 0%
17/10/21 21:14:53 INFO mapreduce.Job:  map 41% reduce 0%
17/10/21 21:14:56 INFO mapreduce.Job:  map 42% reduce 0%
17/10/21 21:15:00 INFO mapreduce.Job:  map 43% reduce 0%
17/10/21 21:15:14 INFO mapreduce.Job:  map 45% reduce 0%
17/10/21 21:15:16 INFO mapreduce.Job:  map 46% reduce 0%
17/10/21 21:15:19 INFO mapreduce.Job:  map 47% reduce 0%
17/10/21 21:15:20 INFO mapreduce.Job:  map 49% reduce 0%
17/10/21 21:15:22 INFO mapreduce.Job:  map 53% reduce 0%
17/10/21 21:15:25 INFO mapreduce.Job:  map 54% reduce 0%
17/10/21 21:15:28 INFO mapreduce.Job:  map 55% reduce 0%
17/10/21 21:15:34 INFO mapreduce.Job:  map 59% reduce 0%
17/10/21 21:15:35 INFO mapreduce.Job:  map 61% reduce 0%
17/10/21 21:15:36 INFO mapreduce.Job:  map 62% reduce 0%
17/10/21 21:15:38 INFO mapreduce.Job:  map 64% reduce 0%
17/10/21 21:15:40 INFO mapreduce.Job:  map 69% reduce 0%
17/10/21 21:15:42 INFO mapreduce.Job:  map 70% reduce 0%
17/10/21 21:15:44 INFO mapreduce.Job:  map 71% reduce 0%
17/10/21 21:15:45 INFO mapreduce.Job:  map 74% reduce 0%
17/10/21 21:15:47 INFO mapreduce.Job:  map 77% reduce 0%
17/10/21 21:15:49 INFO mapreduce.Job:  map 78% reduce 0%
17/10/21 21:15:50 INFO mapreduce.Job:  map 81% reduce 0%
17/10/21 21:15:51 INFO mapreduce.Job:  map 82% reduce 0%
17/10/21 21:16:05 INFO mapreduce.Job:  map 85% reduce 0%
17/10/21 21:16:07 INFO mapreduce.Job:  map 87% reduce 0%
17/10/21 21:16:09 INFO mapreduce.Job:  map 87% reduce 15%
17/10/21 21:16:11 INFO mapreduce.Job:  map 91% reduce 15%
17/10/21 21:16:12 INFO mapreduce.Job:  map 93% reduce 15%
17/10/21 21:16:13 INFO mapreduce.Job:  map 95% reduce 15%
17/10/21 21:16:15 INFO mapreduce.Job:  map 96% reduce 15%
17/10/21 21:16:16 INFO mapreduce.Job:  map 97% reduce 15%
17/10/21 21:16:17 INFO mapreduce.Job:  map 99% reduce 15%
17/10/21 21:16:18 INFO mapreduce.Job:  map 100% reduce 15%
17/10/21 21:16:21 INFO mapreduce.Job:  map 100% reduce 38%
17/10/21 21:16:27 INFO mapreduce.Job:  map 100% reduce 68%
17/10/21 21:16:34 INFO mapreduce.Job:  map 100% reduce 76%
17/10/21 21:16:40 INFO mapreduce.Job:  map 100% reduce 84%
17/10/21 21:16:46 INFO mapreduce.Job:  map 100% reduce 93%
17/10/21 21:16:52 INFO mapreduce.Job:  map 100% reduce 100%
17/10/21 21:16:52 INFO mapreduce.Job: Job job_1508591572882_0001 completed successfully
17/10/21 21:16:52 INFO mapreduce.Job: Counters: 49
    File System Counters
        FILE: Number of bytes read=1641052185
        FILE: Number of bytes written=2394963518
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
        HDFS: Number of bytes read=368971925
        HDFS: Number of bytes written=341707
        HDFS: Number of read operations=36
        HDFS: Number of large read operations=0
        HDFS: Number of write operations=2
    Job Counters 
        Launched map tasks=11
        Launched reduce tasks=1
        Data-local map tasks=11
        Total time spent by all maps in occupied slots (ms)=1621725
        Total time spent by all reduces in occupied slots (ms)=59608
        Total time spent by all map tasks (ms)=1621725
        Total time spent by all reduce tasks (ms)=59608
        Total vcore-milliseconds taken by all map tasks=1621725
        Total vcore-milliseconds taken by all reduce tasks=59608
        Total megabyte-milliseconds taken by all map tasks=1660646400
        Total megabyte-milliseconds taken by all reduce tasks=61038592
    Map-Reduce Framework
        Map input records=7343500
        Map output records=67914600
        Map output bytes=616451597
        Map output materialized bytes=752280863
        Input split bytes=1265
        Combine input records=0
        Combine output records=0
        Reduce input groups=27344
        Reduce shuffle bytes=752280863
        Reduce input records=67914600
        Reduce output records=27344
        Spilled Records=216065635
        Shuffled Maps =11
        Failed Shuffles=0
        Merged Map outputs=11
        GC time elapsed (ms)=18825
        CPU time spent (ms)=264450
        Physical memory (bytes) snapshot=2256031744
        Virtual memory (bytes) snapshot=24974262272
        Total committed heap usage (bytes)=1519169536
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Input Format Counters 
        Bytes Read=368970660
    File Output Format Counters 
        Bytes Written=341707
true
$ /bin/hdfs dfs -tail /some_book_en_for_mr_result/part-r-00000
00
flicker    200
flickered    600
flickerin    100
flickering    600
flicking    100
flict    100
flicted    500
flier    200
fliers    100
flies    600
flight    3300
flinch    100
fling    200
...

启动 nodemanager



$ vim ./create_book_text_for_mr.sh 
#!/bin/bash
for i in {1..100} 
do 
    cat ./some_book_en.txt >> ./some_book_en_for_mr.txt
done
$ ./bin/hadoop fs -put /mnt/vm_share/some_book_en_for_mr.txt /
$ ./bin/hadoop fs -ls -h /some_book_en_for_mr.txt 
$ ./bin/hadoop fs -ls -h /some_book_en_for_mr.txt 
-rw-r--r--   2 root supergroup    351.8 M 2017-10-21 19:08 /some_book_en_for_mr.txt
./bin/hdfs fsck  /some_book_en_for_mr.txt -files -blocks -locations 
Connecting to namenode via http://vm1:50070/fsck?ugi=root&files=1&blocks=1&locations=1&path=%2Fsome_book_en_for_mr.txt
FSCK started by root (auth:SIMPLE) from /192.168.56.101 for path /some_book_en_for_mr.txt at Sat Oct 21 21:23:20 CST 2017
/some_book_en_for_mr.txt 368929700 bytes, 11 block(s):  OK
0. BP-525791482-192.168.56.101-1508552069917:blk_1073741993_1169 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK], DatanodeInfoWithStorage[192.168.56.108:50010,DS-4e6deea2-8fcb-4294-b5c4-4e4371cf5dc3,DISK]]
1. BP-525791482-192.168.56.101-1508552069917:blk_1073741994_1170 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK], DatanodeInfoWithStorage[192.168.56.108:50010,DS-4e6deea2-8fcb-4294-b5c4-4e4371cf5dc3,DISK]]
2. BP-525791482-192.168.56.101-1508552069917:blk_1073741995_1171 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK], DatanodeInfoWithStorage[192.168.56.107:50010,DS-1e7315b1-f8bd-42c3-929f-ac891c4c1464,DISK]]
3. BP-525791482-192.168.56.101-1508552069917:blk_1073741996_1172 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.107:50010,DS-1e7315b1-f8bd-42c3-929f-ac891c4c1464,DISK], DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK]]
4. BP-525791482-192.168.56.101-1508552069917:blk_1073741997_1173 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.107:50010,DS-1e7315b1-f8bd-42c3-929f-ac891c4c1464,DISK], DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK]]
5. BP-525791482-192.168.56.101-1508552069917:blk_1073741998_1174 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.108:50010,DS-4e6deea2-8fcb-4294-b5c4-4e4371cf5dc3,DISK], DatanodeInfoWithStorage[192.168.56.107:50010,DS-1e7315b1-f8bd-42c3-929f-ac891c4c1464,DISK]]
6. BP-525791482-192.168.56.101-1508552069917:blk_1073741999_1175 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.107:50010,DS-1e7315b1-f8bd-42c3-929f-ac891c4c1464,DISK], DatanodeInfoWithStorage[192.168.56.108:50010,DS-4e6deea2-8fcb-4294-b5c4-4e4371cf5dc3,DISK]]
7. BP-525791482-192.168.56.101-1508552069917:blk_1073742000_1176 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.108:50010,DS-4e6deea2-8fcb-4294-b5c4-4e4371cf5dc3,DISK], DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK]]
8. BP-525791482-192.168.56.101-1508552069917:blk_1073742001_1177 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK], DatanodeInfoWithStorage[192.168.56.107:50010,DS-1e7315b1-f8bd-42c3-929f-ac891c4c1464,DISK]]
9. BP-525791482-192.168.56.101-1508552069917:blk_1073742002_1178 len=33554432 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK], DatanodeInfoWithStorage[192.168.56.107:50010,DS-1e7315b1-f8bd-42c3-929f-ac891c4c1464,DISK]]
10. BP-525791482-192.168.56.101-1508552069917:blk_1073742003_1179 len=33385380 Live_repl=2 [DatanodeInfoWithStorage[192.168.56.109:50010,DS-27a20178-43f1-47a1-a071-6684fdbcfa69,DISK], DatanodeInfoWithStorage[192.168.56.108:50010,DS-4e6deea2-8fcb-4294-b5c4-4e4371cf5dc3,DISK]]

Status: HEALTHY
 Total size:    368929700 B
 Total dirs:    0
 Total files:    1
 Total symlinks:        0
 Total blocks (validated):    11 (avg. block size 33539063 B)
 Minimally replicated blocks:    11 (100.0 %)
 Over-replicated blocks:    0 (0.0 %)
 Under-replicated blocks:    0 (0.0 %)
 Mis-replicated blocks:        0 (0.0 %)
 Default replication factor:    2
 Average block replication:    2.0
 Corrupt blocks:        0
 Missing replicas:        0 (0.0 %)
 Number of data-nodes:        3
 Number of racks:        1
FSCK ended at Sat Oct 21 21:23:20 CST 2017 in 3 milliseconds


The filesystem under path '/some_book_en_for_mr.txt' is HEALTHY

 ./sbin/start-yarn.sh 
starting yarn daemons
starting resourcemanager, logging to /home/hadoop/hadoop-2.8.1/logs/yarn-root-resourcemanager-vm1.out
192.168.56.109: starting nodemanager, logging to /home/hadoop/hadoop-2.8.1/logs/yarn-root-nodemanager-vm9.out
192.168.56.108: starting nodemanager, logging to /home/hadoop/hadoop-2.8.1/logs/yarn-root-nodemanager-vm8.out
192.168.56.107: starting nodemanager, logging to /home/hadoop/hadoop-2.8.1/logs/yarn-root-nodemanager-vm7.out

results matching ""

    No results matching ""