训练模型时出错(已经解决)
来源:5-4 -C SparkSession的使用
BaconNUDT
2017-07-27
object DigitRecognizer { def main(args: Array[String]) { System.setProperty("hadoop.home.dir", "C:\\Users\\bacon\\winutils") val spark = SparkSession.builder().appName("SparkSession").master("local[2]").getOrCreate() val dataDF = spark.read.option("header","true").option("inferSchema","true").csv("D:\\learn\\BD\\ML\\data\\DigitRecognizer\\train.csv") // dataDF.printSchema() // dataDF.show() val features = new ListBuffer[String] val schemas = dataDF.schema.fieldNames for(schema <- schemas){ if(schema.startsWith("p")) { features.append(schema) } } val featuresArray = features.toArray val assembler = new VectorAssembler() assembler.setInputCols(featuresArray).setOutputCol("features") val trainDF = assembler.transform(dataDF) trainDF.printSchema() trainDF.show() val pca = new PCA().setInputCol("features").setOutputCol("pcaFeatures").setK(10).fit(trainDF) val pcaResult = pca.transform(trainDF) pcaResult = pcaResult.select("label","pcaFeatures") //加入这一句以后就可以了 // pcaResult.printSchema() val Array(trainingData, testData) = pcaResult.randomSplit(Array(0.7, 0.3)) // 随机森林分类器 val rf = new RandomForestClassifier() rf.setFeaturesCol("pcaFeatures").setLabelCol("label") // 评价标准 val evaluator = new MulticlassClassificationEvaluator() evaluator.setLabelCol("label").setMetricName("accuracy").setPredictionCol("prediction") // 参数空间,我们这里简单化,只是寻找最优的树的个数 val paramGrid = new ParamGridBuilder().addGrid(rf.numTrees, Array(5,10,15,20)).build() // 设置CV val cv = new CrossValidator() cv.setEstimator(rf).setEvaluator(evaluator).setEstimatorParamMaps(paramGrid).setNumFolds(5) // 训练模型,此处出错 val cvModel = cv.fit(trainingData) val testDF = cvModel.transform(testData) val accuracy = evaluator.evaluate(testDF) println(accuracy) spark.close() } }
训练模型时出现这个问题,应该是数据太宽的原因,把前面的数据丢掉就可以了。
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:941) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:998) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:995) at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) ... 62 more Caused by: org.codehaus.janino.JaninoRuntimeException: Code of method "compare(Lorg/apache/spark/sql/catalyst/InternalRow;Lorg/apache/spark/sql/catalyst/InternalRow;)I" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificOrdering" grows beyond 64 KB at org.codehaus.janino.CodeContext.makeSpace(CodeContext.java:949) at org.codehaus.janino.CodeContext.write(CodeContext.java:839) at org.codehaus.janino.UnitCompiler.writeOpcode(UnitCompiler.java:11081) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2409) at org.codehaus.janino.UnitCompiler.access$2700(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitReturnStatement(UnitCompiler.java:1387) at org.codehaus.janino.UnitCompiler$6.visitReturnStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$ReturnStatement.accept(Java.java:3011) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1436) at org.codehaus.janino.UnitCompiler.access$1600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1376) at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1370) at org.codehaus.janino.Java$Block.accept(Java.java:2471) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2228) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1436) at org.codehaus.janino.UnitCompiler.access$1600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1376) at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1370) at org.codehaus.janino.Java$Block.accept(Java.java:2471) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2220) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2220) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2238) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:2811) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1262) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1234) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:538) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894) at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:420) at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:374) at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$AbstractPackageMemberClassDeclaration.accept(Java.java:1309) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:345) at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:396) at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:311) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:196) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:91) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:935) ... 68 more
写回答
1回答
-
mllib的东西我不是太熟,正在学习中
012017-07-27
相似问题