执行accessDF.show时报错: java.lang.ArrayIndexOutOfBoundsException
来源:9-8 -数据清洗之日志解析
BaconNUDT
2017-07-19
printSchema可以正常运行,但是show的时候报错,在Linux中也是这样的错误
17/07/19 17:03:35 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0) java.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsException: 7 if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, url), StringType), true) AS url#0 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, url), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 0 :- null +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, url), StringType), true) +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, url), StringType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, url) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, cmsType), StringType), true) AS cmsType#1 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, cmsType), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 1 :- null +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, cmsType), StringType), true) +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, cmsType), StringType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, cmsType) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 2, cmsId), LongType) AS cmsId#2L +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 2, cmsId), LongType) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 2 :- null +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 2, cmsId), LongType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 2, cmsId) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 3, traffic), LongType) AS traffic#3L +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 3, traffic), LongType) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 3 :- null +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 3, traffic), LongType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 3, traffic) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 4, ip), StringType), true) AS ip#4 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 4, ip), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 4 :- null +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 4, ip), StringType), true) +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 4, ip), StringType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 4, ip) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 5, city), StringType), true) AS city#5 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 5, city), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 5 :- null +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 5, city), StringType), true) +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 5, city), StringType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 5, city) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 6, time), StringType), true) AS time#6 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 6, time), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 6 :- null +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 6, time), StringType), true) +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 6, time), StringType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 6, time) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 7, day), StringType), true) AS day#7 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 7, day), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 7 :- null +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 7, day), StringType), true) +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 7, day), StringType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 7, day) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:293) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:547) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:547) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:232) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:225) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:826) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:826) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.ArrayIndexOutOfBoundsException: 7 at org.apache.spark.sql.catalyst.expressions.GenericRow.get(rows.scala:173) at org.apache.spark.sql.Row$class.isNullAt(Row.scala:191) at org.apache.spark.sql.catalyst.expressions.GenericRow.isNullAt(rows.scala:165) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.evalIfCondExpr7$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply_2$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) ... 17 more 17/07/19 17:03:35 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsException: 7 if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, url), StringType), true) AS url#0 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, url), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 0
写回答
1回答
-
BaconNUDT
提问者
2017-07-19
自己检查了一下,数组下标的确越界了,在Row()的时候落下字段了。
012017-07-19
相似问题