mapreduce读取sequencefile文件中的数据
2019年05月16日
⁄ 综合
⁄ 共 2061字 ⁄ 字号
小 中 大
-
sequencefile中的数据是以key,value对存储的。
-
通过mapreduce模式,可以读取sequencefile中的数据。
-
public class MapReduceReadFile {
-
-
private static SequenceFile.Reader reader = null;
-
private static Configuration conf = new Configuration();
-
-
public static class ReadFileMapper extends
-
Mapper<LongWritable, Text, LongWritable, Text> {
-
-
-
-
-
@Override
-
public void map(LongWritable key, Text value, Context context) {
-
key = (LongWritable) ReflectionUtils.newInstance(
-
reader.getKeyClass(), conf);
-
value = (Text) ReflectionUtils.newInstance(
-
reader.getValueClass(), conf);
-
try {
-
while (reader.next(key, value)) {
-
System.out.printf("%s\t%s\n", key, value);
-
context.write(key, value);
-
}
-
} catch (IOException e1) {
-
e1.printStackTrace();
-
} catch (InterruptedException e) {
-
e.printStackTrace();
-
}
-
}
-
-
}
-
-
-
-
-
-
-
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
-
-
Job job = new Job(conf,"read seq file");
-
job.setJarByClass(MapReduceReadFile.class);
-
job.setMapperClass(ReadFileMapper.class);
-
job.setMapOutputValueClass(Text.class);
-
Path path = new Path("logfile2");
-
FileSystem fs = FileSystem.get(conf);
-
reader = new SequenceFile.Reader(fs, path, conf);
-
FileInputFormat.addInputPath(job, path);
-
FileOutputFormat.setOutputPath(job, new Path("result"));
-
System.exit(job.waitForCompletion(true)?0:1);
-
}
-