Задание не выполнено с большим объемом небольших данных: java.io.EOFException

Я столкнулся со странной проблемой, я получил задание MR куста, которое всегда было бы неудачным, если имеется большое количество входных файлов (например, 1500 входных файлов), но всегда было успешным, если входных файлов мало (например, 50 входных файлов).

в соответствии с текущим состоянием, указанным ниже, я обнаружил, что все редукторы вышли из строя, и ошибка - это EOFException, что делает меня более запутанным.

Есть ли какие-либо предложения, чтобы это исправить?

Журналы из системного журнала работы:

2014-08-21 10:45:46,520 INFO org.apache.hadoop.io.compress.CodecPool: Got brand-new compressor
2014-08-21 10:45:46,794 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:45:46,794 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_5374920402716501410_79091174
2014-08-21 10:45:49,054 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:45:49,054 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_8556910516797493817_79091178
2014-08-21 10:45:49,995 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.IOException: Bad connect ack with firstBadLink 10.0.3.8:50010
2014-08-21 10:45:49,995 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_87853064200958346_79090990
2014-08-21 10:45:49,999 INFO org.apache.hadoop.hdfs.DFSClient: Waiting to find target node: 10.0.3.25:50010
2014-08-21 10:45:52,808 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:45:52,809 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_-4129790945127001814_79091181
2014-08-21 10:45:55,080 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:45:55,081 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_-2690563202614665609_79091181
2014-08-21 10:45:56,014 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:45:56,014 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_4352294950449307171_79091182
2014-08-21 10:45:58,829 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:45:58,829 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_-4957810822584479267_79091182
2014-08-21 10:46:02,038 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:46:02,038 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_3020744167528136240_79091185
2014-08-21 10:46:04,850 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:46:04,850 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_8611834680667753742_79091185
2014-08-21 10:46:08,045 INFO org.apache.hadoop.hdfs.DFSClient: Exception in createBlockOutputStream java.io.EOFException
2014-08-21 10:46:08,045 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_-6887269404411000429_79091185
2014-08-21 10:46:10,858 WARN org.apache.hadoop.hdfs.DFSClient: DataStreamer Exception: java.io.IOException: Unable to create new block.
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2845)
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

2014-08-21 10:46:10,858 WARN org.apache.hadoop.hdfs.DFSClient: Error Recovery for block blk_8611834680667753742_79091185 bad datanode[0] nodes == null
2014-08-21 10:46:10,858 WARN org.apache.hadoop.hdfs.DFSClient: Could not get block locations. Source file "/tmp/hive-hadoop/hive_2014-08-21_10-21-17_165_6766238325705015804-1/_task_tmp.-ext-10002/operator=reliance/circle=203/monyear=may2014/calldate=2014-05-28/_tmp.000000_0" - Aborting...
2014-08-21 10:46:10,950 FATAL ExecReducer: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{},"value":{"_col0":1428057199190412239,"_col1":"53766FB70012FA17","_col2":107,"_col3":203,"_col4":"220.225.87.228","_col5":401,"_col6":501,"_col7":1,"_col8":"Rel_Infoline_Bihar","_col9":9334548651,"_col10":"NLNG","_col11":1,"_col12":0,"_col13":0,"_col14":121,"_col15":0,"_col16":1,"_col17":0,"_col18":0,"_col19":"2014-05-28 19:04:12","_col20":21,"_col21":"CRBT","_col22":"CT-EVERGREEN","_col23":"5017911","_col24":2,"_col25":"5017911","_col26":"5","_col27":"","_col28":"TumheinApnaBananeKi","_col29":"5017911","_col30":"2014-05-28 19:06:19","_col31":0,"_col32":"","_col33":"0000-00-00 00:00:00","_col34":51,"_col35":0,"_col36":0,"_col37":"TumheinApnaBananeKi","_col38":"","_col39":"","_col40":0,"_col41":"2014-05-28 19:06:19","_col42":null,"_col43":null,"_col44":null,"_col45":null,"_col46":"2014-05-28 21:00:42","_col47":19,"_col48":4,"_col49":12,"_col50":"","_col51":"","_col52":"","_col53":"","_col54":null,"_col55":"NA","_col56":"NA","_col57":"reliance","_col58":"203","_col59":"may2014","_col60":"2014-05-28"}}
    at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:258)
    at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
    at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.EOFException
    at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:652)
    at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:501)
    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:842)
    at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
    at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:501)
    at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:249)
    ... 3 more
Caused by: java.io.EOFException
    at java.io.DataInputStream.readByte(DataInputStream.java:250)
    at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
    at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
    at org.apache.hadoop.io.Text.readString(Text.java:400)
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901)
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826)
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

0 ответов

Другие вопросы по тегам