MapReduce-多路径输出

mapreduce中完成多路径输出首要运用MulitipleOutputs类

通过两个比如能够把握

输入样例 mulitipleInput.txt

fileapple1 001
file2 002
file3 003
file2 004
file1 005
file1 006
file3 007

输出:

file1和file3开端的记载归到一个文件下
file2和file3开端的记载归到一个文件下

代码

import java.io.IOException;
import org.apache.hadoop.conf.Coapproachnfiguration;
iapplemport org.apache.hadoopjava训练课程.conf.Configuredapp装置下载;
import org.apache.hadoop.fs.Pajava训练课程th;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hajava模拟器doop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;appearance
import org.apacheappearance.hadoop.mapreduce.lib.input.java初学FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.java游戏apache.hadoop.mapreduce.lib.output.MultiappleidpleOutputs;
import org.apache.hadoop.mapreduceappear.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParsAPPer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MultipleOutputsExample extends Configured implements Tool{
public static class Muljava难学吗tipleMapper extends Mapper<Object, Teapplicationxt, Text, NullWritable>{
private MultipleOutputs<Text, NullWritable> mos;
@Override
protecappstoreted void setup(Mapper<Object, Text, Text, NullWrjava训练itable>.Context context)
throws IOExcAPPeption, IJavanterruptedException {
// TODO Auto-generated method stub
mos = new MultipleOutputs<Text, NullWritable>(context);
}
@Override
protected void map(Object key, Text value, Mapper<Object, Text, Text, NullWritable>.Context context)
throws IOEappleidxception, InterruptedException {
// TODO Auto-generated mjava面试ethod stjavahihi2018ub
String[] infosappear = value.toString().split(" ");
if(infos[0].equals("file1")){
mos.write("file1", value, NullWritable.get());
}else if (infos[0].equals("file2")) {
mos.write("file2", value, Nujava模拟器llWritable.get());
} else {
mos.write("file1", value, NullWritable.get());
mos.write("file2", value, NullWritable.get());
}
}
@Override
protected void cleanup(Mapper<Object, Text, Text, NullWritabapp装置下载le>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
mos.close();
}
}
@Ovejavahihi2018rride
public int run(Sjava初学tring[] args) throws Exception {
// TODO Autapp装置下载o-generatapproveed method stub
Configuraappreciatetion conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2){java是什么意思
System.erappearancer.println("Usage: Data Deduplication <in> <outjava难学吗>");
System.exit(2);java难学吗
}
Job job = Job.getInstance(conf);
job.setJarByClass(Muapp装置下载ltipleOutputsExample.class);
job.setMapperClass(MultipleMapper.class);java是什么意思
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
MultipleOutjava难学吗puts.addNamedOutput(job, "file1", TextOutputFormat.clasapproachs, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job,javahihi2018 "file2", TextOuappletputFormat.class, Text.class, NullWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputapproachFormat.setOutputPappstoreath(job, new Path(otherArgsjava训练[1]));
return job.waitForCompletioJavan(true)? 0:1;
}
public static void main(Strappreciateing[] args) throws Exception {appleid
System.exit(ToolRunner.run(new MultipleOutputsExample(), argjava是什么意思s));java训练
}
}

效果

$ hadoop fs -app装置下载cat /user/test/wordTest/mulitipleOutput/file1-m-00000
filappearancee1 001
file3 003
file1 005
file1 006
file3 007

$ hadoop fs -cat /user/test/wordTest/mulitipleOjava模拟器utput/file2-m-00000
file2 002
file3 003
file2 004
file3 007
file2 008

假定想把file1和file2的内容放入不同的目录下,能够通过java模拟器指定baseOutputPajava训练th,将file1开端的文java是什么意思件放在同一个目录中办理。
mos.write("file1", value, NullWritable.get());mos.APPwriapplete("file2", valuappearancee, NullWritable.get());改为mos.write("file1", value, NullWritabappearle.get(),"file1/part");mos.write("file2", value, NullWritable.get(),"file2/pjava游戏art");mos.write(value, NullWritable.gejava初学t(),"file1/part");mosjavascript.write(value, NullWritable.get(),"file2/part");能够看到输出效果

$ hadoop fs -ls /user/test/wordTest/mulitipleOutput
Found 4 items
-rw-r--r--   3 hdfs hdfs          0 2018-06-30 16:18 /user/test/wordTest/mulitipleOutput/_SUCCESS
drwxr-xr-x   - hdfs hdfs          0 2018-06-30 16:18 /user/test/worappearancedTest/mulitipleOutput/file1
drwxr-xr-x   - hdfs hdfs          0 2018-06-30 16:18 /user/test/wordTest/mulitipleOutput/file2
-rw-r--r--   3 hdfs hdfs          0 2018-06-30 16:18 /user/test/wordTest/mulappleitipleOutput/part-r-00000

指定baseOutputPath输出路径和输出文件名直接按照baseOutPutPath指定,但是默许输出文件java游戏名后缀会跟上-r-00000,假定想更改能够继承FileOappleutputFormat重写RecordWriter完成。