<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="bbPress/1.0.3" -->
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>DataStax Support Forums &#187; Topic: use lzo compressed file in S3 as the data source for hive tables</title>
		<link>http://www.datastax.com/support-forums/topic/use-lzo-compressed-file-in-s3-as-the-data-source-for-hive-tables</link>
		<description>Software, Support, and Training for Apache Cassandra</description>
		<language>en-US</language>
		<pubDate>Sat, 25 May 2013 10:23:53 +0000</pubDate>
		<generator>http://bbpress.org/?v=1.0.3</generator>
		<textInput>
			<title><![CDATA[Search]]></title>
			<description><![CDATA[Search all topics from these forums.]]></description>
			<name>q</name>
			<link>http://www.datastax.com/support-forums/search.php</link>
		</textInput>
		<atom:link href="http://www.datastax.com/support-forums/rss/topic/use-lzo-compressed-file-in-s3-as-the-data-source-for-hive-tables" rel="self" type="application/rss+xml" />

		<item>
			<title>spencerho on "use lzo compressed file in S3 as the data source for hive tables"</title>
			<link>http://www.datastax.com/support-forums/topic/use-lzo-compressed-file-in-s3-as-the-data-source-for-hive-tables#post-443</link>
			<pubDate>Mon, 15 Aug 2011 23:15:25 +0000</pubDate>
			<dc:creator>spencerho</dc:creator>
			<guid isPermaLink="false">443@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;We have all our data files stored in S3 lzo compressed.  Our current hadoop/hive cluster uses and produces this type of files.&#60;/p&#62;
&#60;p&#62;We now in the process evaluating whether brisk/cassandra can work with out existing data.  I created an external table pointing to S3&#60;/p&#62;
&#60;pre&#62;&#60;code&#62;create external table exp_web_xxx
(
    request  STRING
)
ROW FORMAT DELIMITED
    FIELDS TERMINATED BY &#38;#039;9&#38;#039;
    LINES TERMINATED BY &#38;#039;10&#38;#039;
STORED AS TEXTFILE
LOCATION &#38;#039;s3n://......&#38;#039;;&#60;/code&#62;&#60;/pre&#62;
&#60;p&#62;to see if brisk/hive can get the data alright.  We are using demo AMI for our brisk cluster.  I changed the hadoop configuration according to&#60;/p&#62;
&#60;blockquote&#62;&#60;p&#62;&#60;a href=&#34;https://github.com/riptano/brisk/wiki/Installing-LZO-compression&#34; rel=&#34;nofollow&#34;&#62;https://github.com/riptano/brisk/wiki/Installing-LZO-compression&#60;/a&#62;
&#60;/p&#62;&#60;/blockquote&#62;
&#60;p&#62;Though the demo AMI has a different path setup than the production brisk image, I believe I have changed brisk-env.sh and hadoop's mapred-site.xml accordingly.  I also changed brisk/resources/hive/conf/hive-site.xml for lzo compression. However, I can't get hive to work with our lzo compressed files.  When I performed some basic operation like &#60;code&#62;&#60;/code&#62;&#60;code&#62;select * from exp_web_xxx limit 10&#60;/code&#62; the output is not decompressed. &#60;/p&#62;
&#60;p&#62;Does anyone has experience running brisk/cassandra with lzo compressed files as input data?
&#60;/p&#62;</description>
		</item>

	</channel>
</rss>
