0
回答
spark 运行 清洗数据 失败

ResultStage 5 (collectAsList at ManyMac.java:42) failed in 88.705 s

17/03/25 03:27:14 INFO scheduler.DAGScheduler: Job 2 failed: collectAsList at ManyMac.java:42, took 364.058067 s

Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Total size of serialized results of 75 tasks (1033.3 MB) is bigger than spark.driver.maxResultSize (1024.0 MB)

at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1450)

at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1438)

at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1437)

at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)

at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1437)

at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)

at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)

at scala.Option.foreach(Option.scala:257)

at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)

at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1659)

at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1618)

at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1607)

at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)

at org.apache.spark.SparkContext.runJob(SparkContext.scala:1871)

at org.apache.spark.SparkContext.runJob(SparkContext.scala:1884)

at org.apache.spark.SparkContext.runJob(SparkContext.scala:1897)

at org.apache.spark.SparkContext.runJob(SparkContext.scala:1911)

at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:893)

at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)

at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)

at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)

at org.apache.spark.rdd.RDD.collect(RDD.scala:892)

at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:290)

at org.apache.spark.sql.Dataset$$anonfun$collectAsList$1$$anonfun$apply$14.apply(Dataset.scala:2176)

at org.apache.spark.sql.Dataset$$anonfun$collectAsList$1$$anonfun$apply$14.apply(Dataset.scala:2175)

at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)

at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2532)

at org.apache.spark.sql.Dataset$$anonfun$collectAsList$1.apply(Dataset.scala:2175)

at org.apache.spark.sql.Dataset$$anonfun$collectAsList$1.apply(Dataset.scala:2174)

at org.apache.spark.sql.Dataset.withCallback(Dataset.scala:2545)

at org.apache.spark.sql.Dataset.collectAsList(Dataset.scala:2174)

at aom.abc.test.ManyMac.kmeansMac(ManyMac.java:42)

at aom.abc.test.ManyMac.main(ManyMac.java:81)

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

at java.lang.reflect.Method.invoke(Method.java:498)

at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:729)

at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)

at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)

at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)

at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

17/03/25 03:27:14 INFO spark.SparkContext: Invoking stop() from shutdown hook

 

<无标签>
举报
顶部