我使用Autoloader使用这行代码将流写入Delta表
df.writeStream.format("delta").foreachBatch(lambda df, epochId: update_insert(df, epochId, cdm)).option("checkpointLocation", checkpoint_directory).trigger(availableNow=True).start()
我得到以下错误。
org.apache.spark.SparkException: Job aborted due to stage failure: Task 4 in stage 649.0 failed 4 times, most recent failure: Lost task 4.3 in stage 649.0 (TID 2611) (172.20.12.5 executor 3): **org.apache.spark.SparkException: Task failed while writing rows**.
at com.databricks.photon.PhotonWriteStageExec.$anonfun$executeWrite$2(PhotonWriteStageExec.scala:132)
at com.databricks.photon.PhotonExec.$anonfun$executePhoton$6(PhotonExec.scala:579)
at com.databricks.photon.PhotonExec.$anonfun$executePhoton$6$adapted(PhotonExec.scala:422)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndexInternal$2(RDD.scala:916)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndexInternal$2$adapted(RDD.scala:916)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:406)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:370)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:179)
at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:142)
at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:41)
at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:99)
at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:104)
at scala.util.Using$.resource(Using.scala:269)
at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:103)
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:142)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.Task.run(Task.scala:97)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:904)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1713)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:907)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:761)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
**Caused by:** java.lang.ArithmeticException: long overflow
at java.lang.Math.multiplyExact(Math.java:892)
at org.apache.spark.sql.catalyst.util.DateTimeUtils$.millisToMicros(DateTimeUtils.scala:257)
at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:374)
at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:394)
at org.apache.spark.sql.catalyst.util.RebaseDateTime.rebaseGregorianToJulianMicros(RebaseDateTime.scala)