Недействительные версии commons-lang3 в cloudera 6.1 со свечой 2.4

Мы установили cloudera 6.1 и работаем с spark 2.4. Мы используем oozie для запуска наших приложений spark. У нас есть код, который пишет и читает с использованием фреймов данных (мы читаем json из потокового потока spark и записываем его в hdfs в паркете). При запуске на новом кластере я получаю следующие ошибки:

2019-02-03 17:42:00 ERROR [JobScheduler] Logging$class:91 - Error running job streaming job 1549208520000 ms.0
java.lang.IllegalArgumentException: Illegal pattern component: XXX
    at org.apache.commons.lang3.time.FastDatePrinter.parsePattern(FastDatePrinter.java:282)
    at org.apache.commons.lang3.time.FastDatePrinter.init(FastDatePrinter.java:149)
    at org.apache.commons.lang3.time.FastDatePrinter.<init>(FastDatePrinter.java:142)
    at org.apache.commons.lang3.time.FastDateFormat.<init>(FastDateFormat.java:384)
    at org.apache.commons.lang3.time.FastDateFormat.<init>(FastDateFormat.java:369)
    at org.apache.commons.lang3.time.FastDateFormat$1.createInstance(FastDateFormat.java:91)
    at org.apache.commons.lang3.time.FastDateFormat$1.createInstance(FastDateFormat.java:88)
    at org.apache.commons.lang3.time.FormatCache.getInstance(FormatCache.java:82)
    at org.apache.commons.lang3.time.FastDateFormat.getInstance(FastDateFormat.java:165)
    at org.apache.spark.sql.catalyst.json.JSONOptions.<init>(JSONOptions.scala:87)
    at org.apache.spark.sql.catalyst.json.JSONOptions.<init>(JSONOptions.scala:44)
    at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:433)
    at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:419)


2019-02-03 17:34:03 ERROR [streaming-job-executor-0] Logging$class:91 - Aborting job 5f83993b-e354-4cb5-a8da-b3b890e2f4ee.
org.apache.spark.SparkException: Job aborted due to stage failure: 
Aborting TaskSet 0.0 because task 1 (partition 1)
cannot run anywhere due to node and executor blacklist.
Most recent failure:
Lost task 1.0 in stage 0.0 (TID 1, sfdev-dn2.frontline-pcb.com, executor 1): java.io.InvalidClassException: org.apache.commons.lang3.time.FastDateParser; local class incompatible: stream classdesc serialVersionUID = 3, local class serialVersionUID = 2
    at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:687)
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1876)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1745)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2033)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:490)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1158)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:490)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1158)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:88)
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
    at org.apache.spark.scheduler.Task.run(Task.scala:121)
    at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)


Blacklisting behavior can be configured via spark.blacklist.*.

    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1890)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1877)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:929)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:929)
    at scala.Option.foreach(Option.scala:257)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:929)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2111)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2060)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2049)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:740)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2073)
    at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:166)
    at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:159)
    at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
    at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
    at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122)
    at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
    at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
    at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
    at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
    at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
    at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
    at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
    at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
    at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
    at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668)
    at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276)
    at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270)
    at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:228)

Я проверил, и ни один из моих jar-файлов не содержит commons-lang3, а также кажется, что oozie shared lib и spark зависят от одного и того же jar-файла.

В чем может быть проблема?

1 ответ

Я просто столкнулся с той же проблемой со спарк-2.4. Это классическая "проблема адской зависимости" между классом, который вводит ваше приложение, и установленными свечами зажигания. Что сработало для меня, так это затенить пакет lang3 в моем приложении Uber JAR (используя плагин Shade). Та же стратегия для решения проблем с гуавой. Вот то, что я использовал, чтобы решить это на моей установке. Надеюсь, это поможет вам.

   ...  <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <version>3.2.0</version>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                    <configuration>
                        <createDependencyReducedPom>false</createDependencyReducedPom>
                        <transformers>
                            <transformer
                                implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                            </transformer>
                        </transformers>
                        <filters>
                            <filter>
                                <artifact>*:*</artifact>
                                <excludes>
                                    <exclude>META-INF/*.SF</exclude>
                                    <exclude>META-INF/*.DSA</exclude>
                                    <exclude>META-INF/*.RSA</exclude>
                                </excludes>
                            </filter>
                        </filters>
                        <relocations>
                            <relocation>
                                <pattern>com.google.common</pattern>
                                <shadedPattern>shaded.com.google.common</shadedPattern>
                            </relocation>
                            <relocation>
                                <pattern>org.apache.commons.lang3</pattern>
                                <shadedPattern>shaded.org.apache.commons.lang3</shadedPattern>
                            </relocation>
                        </relocations>
                        <artifactSet>
                            <includes>
                                <include>*:*</include>
                            </includes>
                        </artifactSet>
                    </configuration>
                </execution>
            </executions>
        </plugin>
Другие вопросы по тегам