<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="bbPress/1.0.3" -->
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>DataStax Support Forums &#187; Topic: Number of Map Tasks</title>
		<link>http://www.datastax.com/support-forums/topic/number-of-map-tasks</link>
		<description>Software, Support, and Training for Apache Cassandra</description>
		<language>en-US</language>
		<pubDate>Mon, 20 May 2013 04:28:05 +0000</pubDate>
		<generator>http://bbpress.org/?v=1.0.3</generator>
		<textInput>
			<title><![CDATA[Search]]></title>
			<description><![CDATA[Search all topics from these forums.]]></description>
			<name>q</name>
			<link>http://www.datastax.com/support-forums/search.php</link>
		</textInput>
		<atom:link href="http://www.datastax.com/support-forums/rss/topic/number-of-map-tasks" rel="self" type="application/rss+xml" />

		<item>
			<title>tjake on "Number of Map Tasks"</title>
			<link>http://www.datastax.com/support-forums/topic/number-of-map-tasks#post-1507</link>
			<pubDate>Mon, 02 Apr 2012 16:59:04 +0000</pubDate>
			<dc:creator>tjake</dc:creator>
			<guid isPermaLink="false">1507@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Make it part of the TBLPROPERTIES&#60;/p&#62;
&#60;p&#62;The smaller the split size is the more mappers it will create.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>Anonymous on "Number of Map Tasks"</title>
			<link>http://www.datastax.com/support-forums/topic/number-of-map-tasks#post-1506</link>
			<pubDate>Mon, 02 Apr 2012 16:55:01 +0000</pubDate>
			<dc:creator>Anonymous</dc:creator>
			<guid isPermaLink="false">1506@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Is it the same as setting HIVE_OPT while invoking hive? Or should we make it part of TBLPROPERTIES while defining an external table?&#60;/p&#62;
&#60;p&#62; &#60;code&#62;export HIVE_OPT=&#38;quot;-Dcassandra.input.split.size=268435456&#38;quot;&#60;/code&#62;&#60;/p&#62;
&#60;p&#62;we are trying to increase the number of map tasks that get kicked of as a result of a simple query like &#60;/p&#62;
&#60;p&#62; &#60;code&#62;select count(*) from tablefoo;&#60;/code&#62;
&#60;/p&#62;</description>
		</item>
		<item>
			<title>tjake on "Number of Map Tasks"</title>
			<link>http://www.datastax.com/support-forums/topic/number-of-map-tasks#post-1504</link>
			<pubDate>Mon, 02 Apr 2012 16:32:54 +0000</pubDate>
			<dc:creator>tjake</dc:creator>
			<guid isPermaLink="false">1504@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;You can set it there or as part of the job using -D in hadoop cmd
&#60;/p&#62;</description>
		</item>
		<item>
			<title>Anonymous on "Number of Map Tasks"</title>
			<link>http://www.datastax.com/support-forums/topic/number-of-map-tasks#post-1503</link>
			<pubDate>Mon, 02 Apr 2012 16:31:17 +0000</pubDate>
			<dc:creator>Anonymous</dc:creator>
			<guid isPermaLink="false">1503@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Thanks. Where do we set this parameter? In mapred-site.xml?
&#60;/p&#62;</description>
		</item>
		<item>
			<title>tjake on "Number of Map Tasks"</title>
			<link>http://www.datastax.com/support-forums/topic/number-of-map-tasks#post-1501</link>
			<pubDate>Mon, 02 Apr 2012 16:21:01 +0000</pubDate>
			<dc:creator>tjake</dc:creator>
			<guid isPermaLink="false">1501@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Yes,  The setting is &#34;cassandra.input.split.size&#34; its default is 64k rows
&#60;/p&#62;</description>
		</item>
		<item>
			<title>Anonymous on "Number of Map Tasks"</title>
			<link>http://www.datastax.com/support-forums/topic/number-of-map-tasks#post-1497</link>
			<pubDate>Mon, 02 Apr 2012 15:23:53 +0000</pubDate>
			<dc:creator>Anonymous</dc:creator>
			<guid isPermaLink="false">1497@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;While using DSE 2.0, how can we get hadoop/hive to increase the number of map tasks for maximum performance? We are trying to run map reduce with the input data coming from cassandra. Are there things like cassandra input buffer size or blocks that map reduce can see and increase the number of map tasks?
&#60;/p&#62;</description>
		</item>

	</channel>
</rss>
