训练模型时出错(已经解决)
来源:5-4 -C SparkSession的使用
BaconNUDT
2017-07-27
object DigitRecognizer {
def main(args: Array[String]) {
System.setProperty("hadoop.home.dir", "C:\\Users\\bacon\\winutils")
val spark = SparkSession.builder().appName("SparkSession").master("local[2]").getOrCreate()
val dataDF = spark.read.option("header","true").option("inferSchema","true").csv("D:\\learn\\BD\\ML\\data\\DigitRecognizer\\train.csv")
// dataDF.printSchema()
// dataDF.show()
val features = new ListBuffer[String]
val schemas = dataDF.schema.fieldNames
for(schema <- schemas){
if(schema.startsWith("p")) {
features.append(schema)
}
}
val featuresArray = features.toArray
val assembler = new VectorAssembler()
assembler.setInputCols(featuresArray).setOutputCol("features")
val trainDF = assembler.transform(dataDF)
trainDF.printSchema()
trainDF.show()
val pca = new PCA().setInputCol("features").setOutputCol("pcaFeatures").setK(10).fit(trainDF)
val pcaResult = pca.transform(trainDF)
pcaResult = pcaResult.select("label","pcaFeatures") //加入这一句以后就可以了
// pcaResult.printSchema()
val Array(trainingData, testData) = pcaResult.randomSplit(Array(0.7, 0.3))
// 随机森林分类器
val rf = new RandomForestClassifier()
rf.setFeaturesCol("pcaFeatures").setLabelCol("label")
// 评价标准
val evaluator = new MulticlassClassificationEvaluator()
evaluator.setLabelCol("label").setMetricName("accuracy").setPredictionCol("prediction")
// 参数空间,我们这里简单化,只是寻找最优的树的个数
val paramGrid = new ParamGridBuilder().addGrid(rf.numTrees, Array(5,10,15,20)).build()
// 设置CV
val cv = new CrossValidator()
cv.setEstimator(rf).setEvaluator(evaluator).setEstimatorParamMaps(paramGrid).setNumFolds(5)
// 训练模型,此处出错
val cvModel = cv.fit(trainingData)
val testDF = cvModel.transform(testData)
val accuracy = evaluator.evaluate(testDF)
println(accuracy)
spark.close()
}
}训练模型时出现这个问题,应该是数据太宽的原因,把前面的数据丢掉就可以了。
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:941) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:998) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:995) at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) ... 62 more Caused by: org.codehaus.janino.JaninoRuntimeException: Code of method "compare(Lorg/apache/spark/sql/catalyst/InternalRow;Lorg/apache/spark/sql/catalyst/InternalRow;)I" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificOrdering" grows beyond 64 KB at org.codehaus.janino.CodeContext.makeSpace(CodeContext.java:949) at org.codehaus.janino.CodeContext.write(CodeContext.java:839) at org.codehaus.janino.UnitCompiler.writeOpcode(UnitCompiler.java:11081) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2409) at org.codehaus.janino.UnitCompiler.access$2700(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitReturnStatement(UnitCompiler.java:1387) at org.codehaus.janino.UnitCompiler$6.visitReturnStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$ReturnStatement.accept(Java.java:3011) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1436) at org.codehaus.janino.UnitCompiler.access$1600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1376) at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1370) at org.codehaus.janino.Java$Block.accept(Java.java:2471) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2228) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1436) at org.codehaus.janino.UnitCompiler.access$1600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1376) at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1370) at org.codehaus.janino.Java$Block.accept(Java.java:2471) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2220) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2220) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2238) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:2811) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1262) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1234) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:538) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894) at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:420) at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:374) at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$AbstractPackageMemberClassDeclaration.accept(Java.java:1309) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:345) at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:396) at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:311) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:196) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:91) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:935) ... 68 more
写回答
1回答
-
mllib的东西我不是太熟,正在学习中
012017-07-27
相似问题