Мы используем Cassandra 2.2.7, и мы получаем странные проблемы с потоковой передачей при ремонте / загрузке / выводе из эксплуатации некоторых узлов. В качестве примера я пытался списать новый узел с начальной загрузкой и сразу получил следующие ошибки на узле:
ERROR [STREAM-OUT-/54.XX.XXX.XXX] 2016-08-04 17:42:44,051 StreamSession.java:532 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Streaming error occurred
java.io.IOException: Broken pipe
at sun.nio.ch.FileDispatcherImpl.write0(Native Method) ~[na:1.8.0_101]
at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47) ~[na:1.8.0_101]
at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93) ~[na:1.8.0_101]
at sun.nio.ch.IOUtil.write(IOUtil.java:51) ~[na:1.8.0_101]
at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:471) ~[na:1.8.0_101]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.doFlush(BufferedDataOutputStreamPlus.java:266) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.flush(BufferedDataOutputStreamPlus.java:274) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.sendMessage(ConnectionHandler.java:390) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.run(ConnectionHandler.java:361) [apache-cassandra-2.2.7.jar:2.2.7]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_101]
INFO [STREAM-OUT-/54.XX.XXX.XXX] 2016-08-04 17:42:44,102 StreamResultFuture.java:183 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Session with /54.XX.XXX.XXX is complete
WARN [STREAM-OUT-/54.XX.XXX.XXX] 2016-08-04 17:42:44,102 StreamResultFuture.java:210 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Stream failed
ERROR [STREAM-OUT-/54.XX.XXX.XXX] 2016-08-04 17:42:44,103 StreamSession.java:532 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Streaming error occurred
java.io.IOException: Broken pipe
at sun.nio.ch.FileDispatcherImpl.write0(Native Method) ~[na:1.8.0_101]
at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47) ~[na:1.8.0_101]
at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93) ~[na:1.8.0_101]
at sun.nio.ch.IOUtil.write(IOUtil.java:51) ~[na:1.8.0_101]
at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:471) ~[na:1.8.0_101]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.doFlush(BufferedDataOutputStreamPlus.java:266) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.flush(BufferedDataOutputStreamPlus.java:274) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.sendMessage(ConnectionHandler.java:390) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.run(ConnectionHandler.java:369) [apache-cassandra-2.2.7.jar:2.2.7]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_101]
ERROR [STREAM-IN-/54.XX.XXX.XXX] 2016-08-04 17:42:44,312 StreamSession.java:532 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Streaming error occurred
java.lang.RuntimeException: Outgoing stream handler has been closed
at org.apache.cassandra.streaming.ConnectionHandler.sendMessage(ConnectionHandler.java:143) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.StreamSession.receive(StreamSession.java:596) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.StreamSession.messageReceived(StreamSession.java:475) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:293) ~[apache-cassandra-2.2.7.jar:2.2.7]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_101]
На других узлах я получаю следующие ошибки:
WARN [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 17:42:43,234 CompressedStreamReader.java:121 - [Stream d89c1640-5a6a-11e6-9152-91527bf14a77] Error while reading partition DecoratedKey(7971573768827100217, 39653161643838316130303031333963393436366330303863633031636637346565343435366233373663383633343039633963323039313161336333333034) from stream on ks='KEYSPACE_XXXXXXX' and table='TABLE_XXXXXXXX'.
ERROR [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 17:42:43,315 StreamSession.java:524 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Streaming error occurred
java.io.IOException: net.jpountz.lz4.LZ4Exception: Error decoding offset 33063 of input buffer
at org.apache.cassandra.io.compress.LZ4Compressor.uncompress(LZ4Compressor.java:92) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.compress.CompressedInputStream.decompress(CompressedInputStream.java:116) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.compress.CompressedInputStream.read(CompressedInputStream.java:94) ~[apache-cassandra-2.2.5.jar:2.2.5]
at java.io.DataInputStream.readUnsignedShort(DataInputStream.java:337) ~[na:1.8.0_77]
at org.apache.cassandra.utils.BytesReadTracker.readUnsignedShort(BytesReadTracker.java:140) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.utils.ByteBufferUtil.readShortLength(ByteBufferUtil.java:328) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.utils.ByteBufferUtil.readWithShortLength(ByteBufferUtil.java:338) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(AbstractCType.java:382) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(AbstractCType.java:366) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.OnDiskAtom$Serializer.deserializeFromSSTable(OnDiskAtom.java:75) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.AbstractCell$1.computeNext(AbstractCell.java:52) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.AbstractCell$1.computeNext(AbstractCell.java:46) ~[apache-cassandra-2.2.5.jar:2.2.5]
at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:143) ~[guava-16.0.jar:na]
at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:138) ~[guava-16.0.jar:na]
at org.apache.cassandra.io.sstable.format.big.BigTableWriter.appendFromStream(BigTableWriter.java:243) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.StreamReader.writeRow(StreamReader.java:187) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.compress.CompressedStreamReader.read(CompressedStreamReader.java:108) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(IncomingFileMessage.java:49) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(IncomingFileMessage.java:38) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.messages.StreamMessage.deserialize(StreamMessage.java:58) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:261) ~[apache-cassandra-2.2.5.jar:2.2.5]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_77]
Caused by: net.jpountz.lz4.LZ4Exception: Error decoding offset 33063 of input buffer
at net.jpountz.lz4.LZ4JNIFastDecompressor.decompress(LZ4JNIFastDecompressor.java:39) ~[lz4-1.3.0.jar:na]
at org.apache.cassandra.io.compress.LZ4Compressor.uncompress(LZ4Compressor.java:87) ~[apache-cassandra-2.2.5.jar:2.2.5]
... 21 common frames omitted
INFO [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 17:42:43,317 StreamResultFuture.java:182 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Session with /163.XXX.XXX.XX is complete
WARN [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 17:42:43,317 StreamResultFuture.java:209 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Stream failed
WARN [Thread-1988] 2016-08-04 17:42:43,319 CompressedInputStream.java:179 - Error while reading compressed input stream.
java.nio.channels.AsynchronousCloseException: null
at java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:205) ~[na:1.8.0_77]
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:407) ~[na:1.8.0_77]
at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:59) ~[na:1.8.0_77]
at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:109) ~[na:1.8.0_77]
at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:103) ~[na:1.8.0_77]
at org.apache.cassandra.streaming.compress.CompressedInputStream$Reader.runMayThrow(CompressedInputStream.java:169) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28) [apache-cassandra-2.2.5.jar:2.2.5]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_77]
Иногда я получаю такое:
WARN [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 09:00:16,649 CompressedStreamReader.java:121 - [Stream b93dbb30-5a21-11e6-af4d-cf5ff5ded900] Error while reading partition DecoratedKey(-4093657774327370252, 37663734383563316637313338363739613462306435393036663431666566613963336237623534343663366131626238396562393764656262373662373835) from stream on ks='KEYSPACE_XXXXXX' and table='TABLE_XXXXXX'.
WARN [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 09:00:17,720 StreamSession.java:655 - [Stream #b93dbb30-5a21-11e6-af4d-cf5ff5ded900] Retrying for following error
java.lang.IllegalArgumentException: Not enough bytes. Offset: 39. Length: 13113. Buffer size: 54
at org.apache.cassandra.db.composites.AbstractCType.checkRemaining(AbstractCType.java:362) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.composites.AbstractCompoundCellNameType.fromByteBuffer(AbstractCompoundCellNameType.java:98) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(AbstractCType.java:382) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(AbstractCType.java:366) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.OnDiskAtom$Serializer.deserializeFromSSTable(OnDiskAtom.java:75) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.AbstractCell$1.computeNext(AbstractCell.java:52) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.AbstractCell$1.computeNext(AbstractCell.java:46) ~[apache-cassandra-2.2.7.jar:2.2.7]
at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:143) ~[guava-16.0.jar:na]
at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:138) ~[guava-16.0.jar:na]
at org.apache.cassandra.io.sstable.format.big.BigTableWriter.appendFromStream(BigTableWriter.java:243) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.StreamReader.writeRow(StreamReader.java:187) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.compress.CompressedStreamReader.read(CompressedStreamReader.java:108) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(IncomingFileMessage.java:49) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(IncomingFileMessage.java:38) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.messages.StreamMessage.deserialize(StreamMessage.java:58) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:287) [apache-cassandra-2.2.7.jar:2.2.7]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_101]
Что может быть причиной? Я не думаю, что это проблема сети, потому что эта ошибка «недостаточно байтов» появилась сразу на всех узлах при запуске команды вывода из эксплуатации, тогда как я могу без проблем «scp» в течение нескольких часов переключаться между узлами.
Даже ремонт вылетает с теми же ошибками, поэтому мы застряли с «нестабильным» кластером.
Вам действительно стоит обратить внимание на JIRA Cassandra, это хороший источник информации о странных сообщениях или поведении. Последний strace, кажется, связан с этот трекер например