Wrong o/p of wordcount while running program seprately

Hi Krishna,

I have written the code separately for word count as you explained but not getting the sum of word.I have tried many times but not giving the expected o/p.Please review the code and let me know what i am missing.


package mrd.training.sample;

import java.io.IOException;

import java.net.URISyntaxException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class productCost {

public static class mymapcall extends
Mapper<LongWritable, Text, Text, IntWritable> {

Text word = new Text();

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line = value.toString();

StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens())
{
word.set(tokenizer.nextToken());

if (word.toString().toLowerCase().contains(line.toLowerCase()))
{

context.write(word, new IntWritable(1));

}

}
}
}
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

int sum = 0;

for (IntWritable val : values) {

sum += val.get();
System.out.println("Sum ="+sum);

}
context.write(key, new IntWritable(sum));
}
}

public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException, URISyntaxException {

Configuration conf = new Configuration();
Job job = new Job(conf, "Word Counter");
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();

System.out.println(otherArgs[0].toString());
System.out.println(otherArgs[1].toString());

job.setJarByClass(productCost.class);

job.setMapperClass(productCost.mymapcall.class);

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

// Boolean complete = job.waitForCompletion(true);
System.exit(job.waitForCompletion(true) ? 0 : 1);

// System.exit(1);

}
}

o/p

all 1
all 1
and 1
and 1
and 1
geeta 1
hi 1
hi 1
hi 1
me 1
rita 1
seeta 1
you 1

One thought on “Wrong o/p of wordcount while running program seprately

Leave a Reply