WordCount怎么实现单词计数

发布时间：2021-12-16 15:18:45 作者：iii
来源：亿速云阅读：196

这篇文章主要介绍“WordCount怎么实现单词计数”，在日常操作中，相信很多人在WordCount怎么实现单词计数问题上存在疑惑，小编查阅了各式资料，整理出简单好用的操作方法，希望对大家解答”WordCount怎么实现单词计数”的疑惑有所帮助！接下来，请跟着小编一起来学习吧！

一：

public class WordCount {
	
	public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{
		
		private final static IntWritable one = new IntWritable(1);
		
		private Text word = new Text();

		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
			
			//将每一行拆分成一个个的单词，并肩<word，1>作为map方法的结果输出。
			StringTokenizer itr = new StringTokenizer(value.toString());
			
			//  测试其是否还有更多可用的标记
			while (itr.hasMoreTokens()) {
				word.set(itr.nextToken());
				context.write(word, one);
			}
		}
	}

	public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
		
		private IntWritable result = new IntWritable();
		/**
		 * reduce函数的输入也是一个key/value的形式，不过它的value是一个迭代器的形式Iterable<IntWritable> values，
		 * 		也就是说reduce的输入是一个key对应一组的值的value，reduce也有context和map的context作用一致。
		 * */
		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
			int sum = 0;
			for (IntWritable val : values) {
				sum += val.get();
			}
			result.set(sum);
			context.write(key, result);
		}
	}
	
	public static void main(String[] arg) throws Exception {
		
		//初始化Configuration，该类主要是读取mapreduce系统配置信息，这些信息包括hdfs还有mapreduce等。
		Configuration conf = new Configuration();
		
		//构建一个job，
		Job job = Job.getInstance(conf,"word count");
		//装载程序员编写好的计算程序
		job.setJarByClass(WordCount.class);
		
		//实现map函数，根据输入的<key,value>对生成中间结果。配置mapreduce如何运行map和reduce函数
		job.setMapperClass(TokenizerMapper.class);
		
		//Combiner类，实现combine函数，合并中间结果中具有相同key值的键值对。 默认为null 即不合并中间结果。
		job.setCombinerClass(IntSumReducer.class);
		
		//Reducer类 实现reduce函数 将中间结果合并，得到最终结果。
		job.setReducerClass(IntSumReducer.class);
		
		//定义输出的key/value的类型，也就是最终存储在hdfs上结果文件的key/value的类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		//第一行就是构建输入的数据文件，第二行是构建输出的数据文件，
		FileInputFormat.addInputPath(job, new Path("hdfs://192.168.226.129:9000/rootdir/mapreduce.txt"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.226.129:9000/rootdir/data/studytest/"+System.currentTimeMillis()+"/"));
		
		//如果job运行成功了，我们的程序就会正常退出
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

二：

public class WordCount1 {

	public static final IntWritable ONE = new IntWritable(1);
	
	public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{

		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			String[] vs = value.toString().split("\\s");
			for (String string : vs) {
				context.write(new Text(string) , ONE);
			}
		}		
	}
	
	
	public static class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
			int count =0;
			for (IntWritable v : values) {
				count += v.get();
			}
			context.write(key,new IntWritable(count) );
		}
		
	}
	
	public static void main(String[] args) {
		try {
			Configuration conf = new Configuration();
			
			Job job = Job.getInstance(conf,"word count");
			job.setJarByClass(WordCount1.class);
			
			//实现map函数，根据输入的<key,value>对生成中间结果。
			job.setMapperClass(WordCountMapper.class);
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(IntWritable.class);
			job.setReducerClass(WordCountReduce.class);

			FileInputFormat.addInputPath(job, new Path("hdfs://192.168.226.129:9000/rootdir/mapreduce.txt"));
			FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.226.129:9000/rootdir/data/studytest/"+System.currentTimeMillis()+"/"));
			
			System.exit(job.waitForCompletion(true) ? 0 : 1);
		} catch (IllegalStateException e) {
			e.printStackTrace();
		} catch (IllegalArgumentException e) {
			e.printStackTrace();
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}

}

例如：

输入：

xujun xujun xj , cxj 
xxxx
cccc ddd xujun xj
yyy jjj
ccc yyy

输出：

,	1
ccc	1
cccc	1
cxj	1
ddd	1
jjj	1
xj	2
xujun	3
xxxx	1
yyy	2

到此，关于“WordCount怎么实现单词计数”的学习就结束了，希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习，快去试试吧！若想继续学习更多相关知识，请继续关注亿速云网站，小编会继续努力为大家带来更多实用的文章！

WordCount怎么实现单词计数

相关阅读