Как решить InvalidRequestException(почему: вы не вошли в систему) при запуске приложения spark?
При запуске приложения Spark с DSE 5.0.5:
SparkConf conf = new SparkConf().setAppName("wordCount");
JavaSparkContext sc = new JavaSparkContext(conf);
// Load our input data.
JavaRDD<String> input = sc.textFile(inputFile);
// Split up into words.
JavaRDD<String> words = input.flatMap(
new FlatMapFunction<String, String>() {
public Iterable<String> call(String x) {
return Arrays.asList(x.split(" "));
}});
// Transform into word and count.
JavaPairRDD<String, Integer> ones = words.mapToPair(
new PairFunction<String, String, Integer>(){
public Tuple2<String, Integer> call(String x){
return new Tuple2(x, 1);
}});
JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>(){ public Integer call(Integer x, Integer y){ return x + y;}});
List<Tuple2<String, Integer>> output = counts.collect();
for (Tuple2<?,?> tuple : output) {
System.out.println(tuple._1() + ": " + tuple._2());
}
Я получаю следующую ошибку:
java.util.concurrent.RejectedExecutionException: Task java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask@4699684c rejected from java.util.concurrent.ScheduledThreadPoolExecutor@39bfefb[Shutting down, pool size = 1, active threads = 1, queued tasks = 0, completed tasks = 0]
at java.util.concurrent.ThreadPoolExecutor$AbortPolicy.rejectedExecution(ThreadPoolExecutor.java:2047)
at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:823)
at java.util.concurrent.ScheduledThreadPoolExecutor.delayedExecute(ScheduledThreadPoolExecutor.java:326)
at java.util.concurrent.ScheduledThreadPoolExecutor.schedule(ScheduledThreadPoolExecutor.java:533)
at java.util.concurrent.ScheduledThreadPoolExecutor.execute(ScheduledThreadPoolExecutor.java:622)
at scala.concurrent.impl.ExecutionContextImpl.execute(ExecutionContextImpl.scala:122)
at scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:40)
at scala.concurrent.impl.Promise$DefaultPromise.tryComplete(Promise.scala:248)
at scala.concurrent.Promise$class.complete(Promise.scala:55)
at scala.concurrent.impl.Promise$DefaultPromise.complete(Promise.scala:153)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:23)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Exception in thread "main" java.lang.RuntimeException: java.io.IOException: InvalidRequestException(why:You have not logged in)
at org.apache.hadoop.mapred.JobConf.getWorkingDirectory(JobConf.java:660)
at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:438)
at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:411)
at org.apache.spark.SparkContext$$anonfun$hadoopFile$1$$anonfun$34.apply(SparkContext.scala:1021)
at org.apache.spark.SparkContext$$anonfun$hadoopFile$1$$anonfun$34.apply(SparkContext.scala:1021)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:176)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:176)
at scala.Option.map(Option.scala:145)
at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:176)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:195)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.Partitioner$.defaultPartitioner(Partitioner.scala:65)
at org.apache.spark.api.java.JavaPairRDD.reduceByKey(JavaPairRDD.scala:526)
at com.oreilly.learningsparkexamples.mini.java.WordCount.main(WordCount.java:45)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.DseSparkSubmitBootstrapper$.main(DseSparkSubmitBootstrapper.scala:48)
at org.apache.spark.deploy.DseSparkSubmitBootstrapper.main(DseSparkSubmitBootstrapper.scala)
Caused by: java.io.IOException: InvalidRequestException(why:You have not logged in)
at com.datastax.bdp.util.CassandraProxyClient.initialize(CassandraProxyClient.java:486)
at com.datastax.bdp.util.CassandraProxyClient.<init>(CassandraProxyClient.java:404)
at com.datastax.bdp.util.CassandraProxyClient.newProxyConnection(CassandraProxyClient.java:283)
at com.datastax.bdp.util.CassandraProxyClient.access$000(CassandraProxyClient.java:50)
at com.datastax.bdp.util.CassandraProxyClient$Builder.newProxyConnection(CassandraProxyClient.java:237)
at com.datastax.bdp.hadoop.cfs.CassandraFileSystemThriftStore.connectToCassandra(CassandraFileSystemThriftStore.java:373)
at com.datastax.bdp.hadoop.cfs.CassandraFileSystemThriftStore.initialize(CassandraFileSystemThriftStore.java:269)
at com.datastax.bdp.hadoop.cfs.CassandraFileSystem.initialize(CassandraFileSystem.java:69)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2653)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:92)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2687)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2669)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:371)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:170)
at org.apache.hadoop.mapred.JobConf.getWorkingDirectory(JobConf.java:656)
... 41 more
Caused by: InvalidRequestException(why:You have not logged in)
at org.apache.cassandra.thrift.Cassandra$describe_keyspaces_result$describe_keyspaces_resultStandardScheme.read(Cassandra.java:33226)
at org.apache.cassandra.thrift.Cassandra$describe_keyspaces_result$describe_keyspaces_resultStandardScheme.read(Cassandra.java:33193)
at org.apache.cassandra.thrift.Cassandra$describe_keyspaces_result.read(Cassandra.java:33135)
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86)
at org.apache.cassandra.thrift.Cassandra$Client.recv_describe_keyspaces(Cassandra.java:1222)
at org.apache.cassandra.thrift.Cassandra$Client.describe_keyspaces(Cassandra.java:1210)
at com.datastax.bdp.util.CassandraProxyClient.initialize(CassandraProxyClient.java:453)
... 55 more: