一、代码

package com.hahadasheng.bigdata.hadooplearning.reducejoin;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;

/**
 * @author Liucheng
 * @since 2019-12-07
 */
public class MapperJoinApp {

    public static void main(String[] args) throws Exception {

        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(MapperJoinApp.class);

        // 配置Reducer Task任务个数为0
        job.setNumReduceTasks(0);

        job.setMapperClass(MapperJoinMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

        // 小文件
        URI dept = new URI("E:/ImprovementWorkingSpace/hadoop-learning/src/main/resources/join/dept.txt");
        // 将小文件加到分布式缓存中
        job.addCacheFile(dept);

        // 大文件
        Path emp = new Path("E:\ImprovementWorkingSpace\hadoop-learning\src\main\resources\join\emp.txt");
        // 写入大文件
        FileInputFormat.setInputPaths(job, emp);

        Path outputPath = new Path("E:\ImprovementWorkingSpace\hadoop-learning\src\main\resources\join\map-join");
        FileSystem fileSystem = FileSystem.get(configuration);
        fileSystem.delete(outputPath, true);
        // 文件输出
        FileOutputFormat.setOutputPath(job, outputPath);

        job.waitForCompletion(true);

    }
}

class MapperJoinMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

    private Map<String, String> deptCatch = new HashMap<>();

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {

        String filePath = context.getCacheFiles()[0].toString();
        //String filePath = "E:/ImprovementWorkingSpace/hadoop-learning/src/main/resources/join/dept.txt";
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filePath)));

        String line;
        while ((line = br.readLine()) != null) {
            String[] datas = line.split("	");
            // 部门表处理逻辑
            if (datas.length < 3) {
                return;
            }
            String deptno = datas[0];
            String dname = datas[1];
            deptCatch.put(deptno, dname);
        }
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] datas = value.toString().split("	");

        if (datas.length < 8) {
            return;
        }
        // 员工表处理逻辑
        String empnno = datas[0];
        String ename = datas[1];
        String sal = datas[5];
        String deptno = datas[7];

        StringBuilder sb = new StringBuilder();
        sb.append(empnno).append("	")
                .append(empnno).append("	")
                .append(ename).append("	")
                .append(sal).append("	")
                .append(deptno).append("	")
                .append(deptCatch.get(deptno));

        context.write(new Text(sb.toString()), NullWritable.get());
    }
}

二、报错信息

[INFO ] method:org.apache.hadoop.conf.Configuration.warnOnceIfDeprecated(Configuration.java:1274)
session.id is deprecated. Instead, use dfs.metrics.session-id
[INFO ] method:org.apache.hadoop.metrics.jvm.JvmMetrics.init(JvmMetrics.java:76)
Initializing JVM Metrics with processName=JobTracker, sessionId=
[WARN ] method:org.apache.hadoop.mapreduce.JobResourceUploader.uploadFiles(JobResourceUploader.java:64)
Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
[WARN ] method:org.apache.hadoop.mapreduce.JobResourceUploader.uploadFiles(JobResourceUploader.java:171)
No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
[INFO ] method:org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:253)
Cleaning up the staging area file:/tmp/hadoop-lc/mapred/staging/lc1750978452/.staging/job_local1750978452_0001
Exception in thread "main" java.io.IOException: No FileSystem for scheme: E
	at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2796)
	at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2810)
	at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:98)
	at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2853)
	at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2835)
	at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:387)
	at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.getFileStatus(ClientDistributedCacheManager.java:223)
	at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.determineTimestamps(ClientDistributedCacheManager.java:93)
	at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(ClientDistributedCacheManager.java:57)
	at org.apache.hadoop.mapreduce.JobResourceUploader.uploadFiles(JobResourceUploader.java:179)
	at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:99)
	at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:194)
	at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1307)
	at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1304)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
	at org.apache.hadoop.mapreduce.Job.submit(Job.java:1304)
	at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1325)
	at com.hahadasheng.bigdata.hadooplearning.reducejoin.MapperJoinApp.main(MapperJoinApp.java:57)
[WARN ] method:org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1927)
PriviledgedActionException as:lc (auth:SIMPLE) cause:java.io.IOException: No FileSystem for scheme: E
Picked up JAVA_TOOL_OPTIONS: -Dfile.encoding=UTF-8

Process finished with exit code 1

开始以为是用户权限问题，但是是用管理员登录的，还是不行

写回答

2回答

Michael_PK

2019-12-07

已采纳

uri你的代码E盘符的找不到。win写完建议打包去服务器测试。win可能各种各样问题

哈哈大圣618

老师，在Linux环境下我也发现有问题，经过我较真的调试发现问题还不少 1. 每个节点的core-site.xml要添加配置 ``` dfs.permissions false ``` 2. main方法中的 ``` job.addCacheFile(new URI(xxx)) ``` 这里会在hdfs上找对应的文件，如果没有则会报错 3. Mapper中的setup ``` String filePath = context.getCacheFiles()[0].toString(); BufferedReader br = new BufferedReader(new FileReader(filePath)); ``` 这个是在本地文件系统找文件，因为调用的是Java标准库的API，没有使用Hadoop相关的流，我尝试过使用完成的路径，加上hdfs://hadoop:8020也会报错； 3. 最终，我在hdfs上创建了目录，并把dept.txt文件上传到该目录，然后在Linux本机同样创建相同的目录，将dept.txt文件也拷贝一份，终于运行成功了~~~^o^~~~~~~^o^~~~~~~^o^~~~~~~^o^~~~ 4. Win上测试老是报找不到盘符，不想使用Windows~~~~~~~~

2019-12-08

共5条回复