不能成功运行spark-submit
来源:5-4 -standalone模式spark-submit运行
ByronC
2019-11-16
运行下面这句代码后,出现了下面的Error,我在想是不是没有调用成功localhost:50070里面的wc.txt文件。(wc.txt已经成功上传到localhost:50070,并且运行spark0403.py 文件没问题)
运行命令:
spark-submit --master spark://XXX-MacBook-Pro.local:7077 --name spark-standalone spark0403.py hdfs://localhost:50070/wc.txt hdfs://localhost:50070/output
出现Error:
py4j.protocol.Py4JJavaError: An error occurred while calling o18.partitions.
: java.io.IOException: Failed on local exception: com.google.protobuf.InvalidProtocolBufferException: Protocol message end-group tag did not match expected tag.; Host Details : local host is: “bangrencs-MacBook-Pro.local/127.0.0.1”; destination host is: “localhost”:50070;
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:776)
at org.apache.hadoop.ipc.Client.call(Client.java:1479)
at org.apache.hadoop.ipc.Client.call(Client.java:1412)
at org.apache.hadoop.ipc.ProtobufRpcEngineInvoker.invoke(ProtobufRpcEngine.java:229)atcom.sun.proxy.Invoker.invoke(ProtobufRpcEngine.java:229)
at com.sun.proxy.Invoker.invoke(ProtobufRpcEngine.java:229)atcom.sun.proxy.Proxy22.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:771)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy23.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:2108)
at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1305)
at org.apache.hadoop.hdfs.DistributedFileSystem22.doCall(DistributedFileSystem.java:1301)atorg.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)atorg.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1317)atorg.apache.hadoop.fs.Globber.getFileStatus(Globber.java:57)atorg.apache.hadoop.fs.Globber.glob(Globber.java:252)atorg.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1676)atorg.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:259)atorg.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:229)atorg.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)atorg.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:204)atorg.apache.spark.rdd.RDD22.doCall(DistributedFileSystem.java:1301)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1317)
at org.apache.hadoop.fs.Globber.getFileStatus(Globber.java:57)
at org.apache.hadoop.fs.Globber.glob(Globber.java:252)
at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1676)
at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:259)
at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:229)
at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:204)
at org.apache.spark.rdd.RDD22.doCall(DistributedFileSystem.java:1301)atorg.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)atorg.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1317)atorg.apache.hadoop.fs.Globber.getFileStatus(Globber.java:57)atorg.apache.hadoop.fs.Globber.glob(Globber.java:252)atorg.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1676)atorg.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:259)atorg.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:229)atorg.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)atorg.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:204)atorg.apache.spark.rdd.RDDanonfunanonfunanonfunpartitions2.apply(RDD.scala:253)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:253)
at org.apache.spark.rdd.RDD2.apply(RDD.scala:253)atorg.apache.spark.rdd.RDDanonfunanonfunanonfunpartitions2.apply(RDD.scala:251)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:251)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:251)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD2.apply(RDD.scala:251)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:251)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)atorg.apache.spark.rdd.RDDanonfunanonfunanonfunpartitions$2.apply(RDD.scala:253)
at org.apache.spa
1回答
-
ByronC
提问者
2019-11-16
因为一开始不嫩成功submit问题,所以刚刚提交了好几份问题,不好意思。。。
00
相似问题