（十一）MapReduce案例之WordCount（2）-白红宇

（十一）MapReduce案例之WordCount（2）

阅读量：628 次

发布时间：2019-03-13

本文共 4423 字，大约阅读时间需要 14 分钟。

上一篇文章写了通过jar包的方式运行wordcount程序，这一篇文章通过在本地windows中进行运行wordcount

运行环境：windows、hadoop2.6.5

程序：wordcount（maven项目）

编译器：idea

贴出pom文件如下：


         
    
     org.apache.hadoop
          
    
     hadoop-common
          
    
     2.6.5
        
       
         
    
     org.apache.hadoop
          
    
     hadoop-hdfs
          
    
     2.6.5
        
       
         
    
     org.apache.hadoop
          
    
     hadoop-client
          
    
     2.6.5
        
       
         
    
     org.apache.hadoop
          
    
     hadoop-hdfs
          
    
     2.6.5
        
       
         
    
     org.apache.hadoop
          
    
     hadoop-mapreduce-client-core
          
    
     2.6.5

一、配置项目

1、将hadoop集群中的配置文件：core-site.xml、mapred-site.xml、yarn-site.xml、log4j.properties复制到resource目录下。(里面的配置和集群保持一致)

2、下载工具包：hadoop-common-2.2.0-bin-master.zip

下载链接：

提取码：x1z8

3、将hadoop-common-2.6.0-bin-master.zip解压，把hadoop-common-2.6.0-bin-master下的bin全部复制放到我们下载的Hadoop2.6.5的Hadoop2.6.5/bin目录下，如图所示

4、将hadoop-common-2.6.0-bin-master下的bin的hadoop.dll放到C:\Windows\System32下，然后重启电脑，否则会出现"main"java.lang.UnsatisfiedLinkError:org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z错误

二、在hdfs文件中创建文件

1、使用shell创建一个目录input

hadoop fs -mkdir -p /input

2、给目录赋予777权限

hadoop fs -chmod 777 /input

3、将要计算的文件传入input中

hadoop fs -put test.txt /input

4、给该文件777权限

hadoop fs -chmod 777 /input/test.txt

三、编写代码

package MapReduce;import java.io.IOException;import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordCount {    public WordCount() {    }    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        String[] otherArgs = (new GenericOptionsParser(conf, args)).getRemainingArgs();        if(otherArgs.length < 2) {            System.err.println("Usage: wordcount 
   
     [
    
     ...] 
     
      ");            System.exit(2);        }        Job job = Job.getInstance(conf, "word count");        job.setJarByClass(WordCount.class);        job.setMapperClass(WordCount.TokenizerMapper.class);        job.setCombinerClass(WordCount.IntSumReducer.class);        job.setReducerClass(WordCount.IntSumReducer.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        for(int i = 0; i < otherArgs.length - 1; ++i) {            FileInputFormat.addInputPath(job, new Path(otherArgs[i]));        }        FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));        System.exit(job.waitForCompletion(true)?0:1);    }    public static class IntSumReducer extends Reducer
      
        {        private IntWritable result = new IntWritable();        public IntSumReducer() {        }        public void reduce(Text key, Iterable
       
         values, Reducer
        
         .Context context) throws IOException, InterruptedException { int sum = 0; IntWritable val; for(Iterator i$ = values.iterator(); i$.hasNext(); sum += val.get()) { val = (IntWritable)i$.next(); } this.result.set(sum); context.write(key, this.result); } } public static class TokenizerMapper extends Mapper
         
           { private static final IntWritable one = new IntWritable(1); private Text word = new Text(); public TokenizerMapper() { } public void map(Object key, Text value, Mapper
          
           .Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while(itr.hasMoreTokens()) { this.word.set(itr.nextToken()); context.write(this.word, one); } } }}