<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="bbPress/1.0.3" -->
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>DataStax Support Forums &#187; Topic: Cassandra crashes during simultaneous reads and writes on EC2</title>
		<link>http://www.datastax.com/support-forums/topic/cassandra-crashes-during-simultaneous-reads-and-writes-on-ec2</link>
		<description>Software, Support, and Training for Apache Cassandra</description>
		<language>en-US</language>
		<pubDate>Thu, 20 Jun 2013 03:39:57 +0000</pubDate>
		<generator>http://bbpress.org/?v=1.0.3</generator>
		<textInput>
			<title><![CDATA[Search]]></title>
			<description><![CDATA[Search all topics from these forums.]]></description>
			<name>q</name>
			<link>http://www.datastax.com/support-forums/search.php</link>
		</textInput>
		<atom:link href="http://www.datastax.com/support-forums/rss/topic/cassandra-crashes-during-simultaneous-reads-and-writes-on-ec2" rel="self" type="application/rss+xml" />

		<item>
			<title>jbellis on "Cassandra crashes during simultaneous reads and writes on EC2"</title>
			<link>http://www.datastax.com/support-forums/topic/cassandra-crashes-during-simultaneous-reads-and-writes-on-ec2#post-1380</link>
			<pubDate>Thu, 15 Mar 2012 22:05:52 +0000</pubDate>
			<dc:creator>jbellis</dc:creator>
			<guid isPermaLink="false">1380@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;If the machine itself is crashing you have an OS- or hardware- level problem, perhaps this one: &#60;a href=&#34;http://wiki.apache.org/cassandra/FAQ#ubuntu_hangs&#34; rel=&#34;nofollow&#34;&#62;http://wiki.apache.org/cassandra/FAQ#ubuntu_hangs&#60;/a&#62;
&#60;/p&#62;</description>
		</item>
		<item>
			<title>Anonymous on "Cassandra crashes during simultaneous reads and writes on EC2"</title>
			<link>http://www.datastax.com/support-forums/topic/cassandra-crashes-during-simultaneous-reads-and-writes-on-ec2#post-1347</link>
			<pubDate>Fri, 09 Mar 2012 17:13:17 +0000</pubDate>
			<dc:creator>Anonymous</dc:creator>
			<guid isPermaLink="false">1347@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Already checked the metrics. As I mentioned earlier, disk utilization is very low, especially for writes ( writes is about 0.0* MB/sec). Heap size is 4GB and I'm not getting any OOM exceptions. &#34;Top&#34; doesn't provide much useful information in this case.&#60;/p&#62;
&#60;p&#62; This is an output of jstack&#60;/p&#62;
&#60;p&#62;&#34;Thrift:21&#34; daemon prio=10 tid=0x0000000040aae000 nid=0x1580 runnable [0x00007f6fdd2d6000]&#60;br /&#62;
   java.lang.Thread.State: RUNNABLE&#60;br /&#62;
        at java.net.SocketInputStream.socketRead0(Native Method)&#60;br /&#62;
        at java.net.SocketInputStream.read(SocketInputStream.java:129)&#60;br /&#62;
        at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)&#60;br /&#62;
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)&#60;br /&#62;
        at java.io.BufferedInputStream.read(BufferedInputStream.java:317)&#60;br /&#62;
        - locked &#38;lt;0x0000000712909f50&#38;gt; (a java.io.BufferedInputStream)&#60;br /&#62;
        at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:127)&#60;br /&#62;
        at org.apache.thrift.transport.TTransport.readAll(TTransport.java:84)&#60;br /&#62;
        at org.apache.thrift.transport.TFramedTransport.readFrame(TFramedTransport.java:129)&#60;br /&#62;
        at org.apache.thrift.transport.TFramedTransport.read(TFramedTransport.java:101)&#60;br /&#62;
        at org.apache.thrift.transport.TTransport.readAll(TTransport.java:84)&#60;br /&#62;
        at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:378)&#60;br /&#62;
        at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:297)&#60;br /&#62;
        at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:204)&#60;br /&#62;
        at org.apache.cassandra.thrift.Cassandra$Processor.process(Cassandra.java:2877)&#60;br /&#62;
        at org.apache.cassandra.thrift.CustomTThreadPoolServer$WorkerProcess.run(CustomTThreadPoolServer.java:187)&#60;br /&#62;
        at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)&#60;br /&#62;
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)&#60;br /&#62;
        at java.lang.Thread.run(Thread.java:662)&#60;/p&#62;
&#60;p&#62;&#34;Thrift:20&#34; daemon prio=10 tid=0x0000000040859000 nid=0x157f runnable [0x00007f8548107000]&#60;br /&#62;
   java.lang.Thread.State: RUNNABLE&#60;br /&#62;
        at java.net.SocketInputStream.socketRead0(Native Method)&#60;br /&#62;
        at java.net.SocketInputStream.read(SocketInputStream.java:129)&#60;br /&#62;
        at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)&#60;br /&#62;
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)&#60;br /&#62;
        at java.io.BufferedInputStream.read(BufferedInputStream.java:317)&#60;br /&#62;
        - locked &#38;lt;0x000000070626b468&#38;gt; (a java.io.BufferedInputStream)&#60;br /&#62;
        at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:127)&#60;br /&#62;
        at org.apache.thrift.transport.TTransport.readAll(TTransport.java:84)&#60;br /&#62;
        at org.apache.thrift.transport.TFramedTransport.readFrame(TFramedTransport.java:129)&#60;br /&#62;
        at org.apache.thrift.transport.TFramedTransport.read(TFramedTransport.java:101)&#60;br /&#62;
        at org.apache.thrift.transport.TTransport.readAll(TTransport.java:84)&#60;br /&#62;
        at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:378)&#60;br /&#62;
        at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:297)&#60;br /&#62;
        at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:204)&#60;br /&#62;
        at org.apache.cassandra.thrift.Cassandra$Processor.process(Cassandra.java:2877)&#60;br /&#62;
        at org.apache.cassandra.thrift.CustomTThreadPoolServer$WorkerProcess.run(CustomTThreadPoolServer.java:187)&#60;br /&#62;
        at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)&#60;br /&#62;
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)&#60;br /&#62;
        at java.lang.Thread.run(Thread.java:662)&#60;/p&#62;
&#60;p&#62;This is the dominant thread and we have bunch of these in the output. It seems to me that a possible bottle neck might be in the Thrift API, reading all the write request ...&#60;/p&#62;
&#60;p&#62;And, as I mentioned system.log is clean. Also, the overload occurs so quickly that there's not much time I can spend for investigation. Once it starts increasing, the machine crashes withing seconds.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>jbellis on "Cassandra crashes during simultaneous reads and writes on EC2"</title>
			<link>http://www.datastax.com/support-forums/topic/cassandra-crashes-during-simultaneous-reads-and-writes-on-ec2#post-1346</link>
			<pubDate>Fri, 09 Mar 2012 15:25:09 +0000</pubDate>
			<dc:creator>jbellis</dc:creator>
			<guid isPermaLink="false">1346@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Can you check which threads are consuming the CPU, as outlined here? &#60;a href=&#34;http://spyced.blogspot.com/2010/01/linux-performance-basics.html&#34; rel=&#34;nofollow&#34;&#62;http://spyced.blogspot.com/2010/01/linux-performance-basics.html&#60;/a&#62;&#60;/p&#62;
&#60;p&#62;(Also worth checking the other performance metrics described on that post.)
&#60;/p&#62;</description>
		</item>
		<item>
			<title>Anonymous on "Cassandra crashes during simultaneous reads and writes on EC2"</title>
			<link>http://www.datastax.com/support-forums/topic/cassandra-crashes-during-simultaneous-reads-and-writes-on-ec2#post-1339</link>
			<pubDate>Fri, 09 Mar 2012 00:07:24 +0000</pubDate>
			<dc:creator>Anonymous</dc:creator>
			<guid isPermaLink="false">1339@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Hi, &#60;/p&#62;
&#60;p&#62;We are currently using a cluster of 2 cassandra nodes on 2 larges EC2 instances. Each of them has about 65G of used data. ( Same setup for Dev and Production ). &#60;/p&#62;
&#60;p&#62;The issue we are having is during writes on production - the CPU load becomes 40 + ( 2 core machine ) and eventually the machines becomes unavailable and need to be rebooted. I tried mutiple &#34;tunning strategies&#34; such as decreasing the total memtable space, changing ratio of eden space vs survival space in the young generation, copying larger object directly from Eden space to the Older Generation, optimizing compaction to run more frequently, using smaller # of sstables ( I noticed that during the crash disk utilization was almost none, so I tried to relieve the memory usage).&#60;/p&#62;
&#60;p&#62;Looking at cassandra system.log, I wasn't able to see any ERRORS or WARNINGs.&#60;br /&#62;
Only thing that shows up during the crash is the StatusLogger :&#60;/p&#62;
&#60;p&#62; INFO [ScheduledTasks:1] 2012-03-08 08:21:21,741 StatusLogger.java (line 50) Pool Name                    Active   Pending   Blocked&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,741 StatusLogger.java (line 65) ReadStage                         1         1         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,742 StatusLogger.java (line 65) RequestResponseStage              0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,742 StatusLogger.java (line 65) ReadRepairStage                   0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,742 StatusLogger.java (line 65) MutationStage                     1       600         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,743 StatusLogger.java (line 65) ReplicateOnWriteStage             0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,743 StatusLogger.java (line 65) GossipStage                       0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,744 StatusLogger.java (line 65) AntiEntropyStage                  0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,744 StatusLogger.java (line 65) MigrationStage                    0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,744 StatusLogger.java (line 65) StreamStage                       0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,744 StatusLogger.java (line 65) MemtablePostFlusher               0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,745 StatusLogger.java (line 65) FlushWriter                       0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,760 StatusLogger.java (line 65) MiscStage                         0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,760 StatusLogger.java (line 65) InternalResponseStage             0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,761 StatusLogger.java (line 65) HintedHandoff                     0         0         0&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,761 StatusLogger.java (line 69) CompactionManager               n/a         2&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,761 StatusLogger.java (line 81) MessagingService                n/a      0,45&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,761 StatusLogger.java (line 85) ColumnFamily                Memtable ops,data  Row cache size/cap  Key cache size/cap&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,762 StatusLogger.java (line 88) system.NodeIdInfo                         0,0                 0/0                 0/1&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,762 StatusLogger.java (line 88) system.IndexInfo                          0,0                 0/0                 0/1&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,762 StatusLogger.java (line 88) system.LocationInfo                       0,0                 0/0                 2/2&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,762 StatusLogger.java (line 88) system.Versions                         3,103                 0/0                 0/2&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,763 StatusLogger.java (line 88) system.Migrations                         0,0                 0/0                 0/3&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,763 StatusLogger.java (line 88) system.HintsColumnFamily                  0,0                 0/0                 1/1&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,763 StatusLogger.java (line 88) system.Schema                             0,0                 0/0                 2/3&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,763 StatusLogger.java (line 88) test.popular_neighbors                    0,0                 0/0       105549/200000&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,764 StatusLogger.java (line 88) test.popular_neighbors_root                 0,0                 0/0            0/200000&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,764 StatusLogger.java (line 88) upp.topcat                             113,73                 0/0        31472/200000&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,764 StatusLogger.java (line 88) upp.fulllisting               295447,74865591                 0/0       101034/200000&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,764 StatusLogger.java (line 88) collector.seo_tags                        0,0                 0/0         1104/200000&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,765 StatusLogger.java (line 88) collector.seo_tags_full_ids                 0,0                 0/0            8/200000&#60;br /&#62;
 INFO [ScheduledTasks:1] 2012-03-08 08:21:21,765 StatusLogger.java (line 88) collector.seo_tags_full                   0,0                 0/0           14/200000&#60;/p&#62;
&#60;p&#62;If I run writes on the Dev cluster, everything runs smoothly - no errors, load is max 1.5-2.&#60;br /&#62;
If I reboot the 2 production instances, I can run reads and writes for a while before the crash occurs, otherwise given that machines have been up for 2+ days they crash occurs within minutes.&#60;/p&#62;
&#60;p&#62;Any suggestions, ideas would be highly appreciated.&#60;/p&#62;
&#60;p&#62;Thanks
&#60;/p&#62;</description>
		</item>

	</channel>
</rss>
