<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="bbPress/1.0.3" -->
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>DataStax Support Forums &#187; User Favorites: dipesh</title>
		<link><a href='http://www.datastax.com/support-forums/profile/dipesh'>dipesh</a></link>
		<description>Software, Support, and Training for Apache Cassandra</description>
		<language>en-US</language>
		<pubDate>Wed, 19 Jun 2013 22:18:30 +0000</pubDate>
		<generator>http://bbpress.org/?v=1.0.3</generator>
		<textInput>
			<title><![CDATA[Search]]></title>
			<description><![CDATA[Search all topics from these forums.]]></description>
			<name>q</name>
			<link>http://www.datastax.com/support-forums/search.php</link>
		</textInput>
		<atom:link href="http://www.datastax.com/support-forums/rss/profile/" rel="self" type="application/rss+xml" />

		<item>
			<title>dipesh on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table/page/2#post-9072</link>
			<pubDate>Tue, 19 Feb 2013 17:02:21 +0000</pubDate>
			<dc:creator>dipesh</dc:creator>
			<guid isPermaLink="false">9072@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Finally, I am back to this. Sorry for the delay.&#60;/p&#62;
&#60;p&#62;Composite Column Name = UTF8Type + LongType/IntegerType/UTF8Type&#60;br /&#62;
Note: For all above combinations of second column, I am getting the same exception.&#60;/p&#62;
&#60;p&#62;My Doubts:&#60;br /&#62;
- Looking at the exception log, &#34;value&#34; seems pretty odd. Is that a problem?&#60;br /&#62;
- createCompositeKey method in the UDF java code, value of third (end-of-component) and fourth (lastIsOne) is hard coded to '0' and 'true' respectively. Frankly, I didn't understand it completely, even when I read the java-doc of Composite type several times. Please advise.&#60;/p&#62;
&#60;p&#62;Here is the Exception from the Log:&#60;br /&#62;
&#60;pre&#62;&#60;code&#62;2013-02-19 22:15:32,802 null map = 100%,  reduce = 0%
[2013-02-19 22:15:34,916] FATAL {ExecReducer} -  org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {&#38;quot;key&#38;quot;:{&#38;quot;_col0&#38;quot;:&#38;quot;CSCO&#38;quot;,&#38;quot;_col1&#38;quot;:&#38;quot;price&#38;quot;,&#38;quot;_col2&#38;quot;:1360821839400},&#38;quot;value&#38;quot;:{&#38;quot;_col0&#38;quot;:{&#38;quot;count&#38;quot;:1,&#38;quot;sum&#38;quot;:25.13}},&#38;quot;alias&#38;quot;:0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:518)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:419)
	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:256)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: InvalidRequestException(why:Not enough bytes to read value of component 0)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:603)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
	at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:84)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
	at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:84)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
	at org.apache.hadoop.hive.ql.exec.GroupByOperator.forward(GroupByOperator.java:959)
	at org.apache.hadoop.hive.ql.exec.GroupByOperator.processAggr(GroupByOperator.java:798)
	at org.apache.hadoop.hive.ql.exec.GroupByOperator.processOp(GroupByOperator.java:724)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
	... 3 more
Caused by: java.io.IOException: InvalidRequestException(why:Not enough bytes to read value of component 0)
	at org.apache.hadoop.hive.cassandra.output.CassandraAbstractPut.commitChanges(CassandraAbstractPut.java:69)
	at org.apache.hadoop.hive.cassandra.output.CassandraPut.write(CassandraPut.java:139)
	at org.apache.hadoop.hive.cassandra.output.HiveCassandraOutputFormat$1.write(HiveCassandraOutputFormat.java:69)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:589)
	... 16 more&#60;/code&#62;&#60;/pre&#62;
&#60;p&#62;Here is my UDF based on your sample code. &#60;/p&#62;
&#60;pre&#62;&#60;code&#62;package com.cisco.iep.hive.plugins;

import java.nio.ByteBuffer;

import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;

public class WriteCompositeStringLong extends UDF{
	public BytesWritable evaluate(final String strCol, final Long longCol) {

		ByteBuffer byteBuff = createCompositeKey(strCol, longCol, 0, false);

		return new BytesWritable(byteBuff.array());
	}

	private ByteBuffer createCompositeKey(String strCol, Long longCol, int endOfComponent,
			boolean lastIsOne) {
		ByteBuffer bytes = ByteBufferUtil.bytes(strCol);
		int totalSize = 0;
		if (strCol != null) {
			totalSize += 2 + bytes.remaining() + 1;
			if (longCol != null) {
				totalSize += 2 + 8 + 1;
				if (endOfComponent != -1) {
					totalSize += 2 + 1 + 1;
				}
			}
		}

		ByteBuffer bb = ByteBuffer.allocate(totalSize);

		if (strCol != null) {
			bb.putShort((short) bytes.remaining());
			bb.put(bytes);
			bb.put(longCol == null &#38;amp;&#38;amp; lastIsOne ? (byte) 1 : (byte) 0);
			if (longCol != null) {
				//bb.putShort((short) 16);
				//bb.put(UUIDGen.decompose(intCol));
				bb.putShort((short) 8);	//8 for Long value
				bb.putLong(longCol);
				bb.put(endOfComponent == -1 &#38;amp;&#38;amp; lastIsOne ? (byte) 1 : (byte) 0);
				if (endOfComponent != -1) {
					// We are putting a byte only because our test use ints that
					// fit in a byte *and* IntegerType.fromString() will
					// return something compatible (i.e, putting a full int here
					// would break &#38;#039;fromStringTest&#38;#039;)
					bb.putShort((short) 1);
					bb.put((byte) endOfComponent);
					bb.put(lastIsOne ? (byte) 1 : (byte) 0);
				}
			}
		}
		bb.rewind();
		return bb;
	}
}&#60;/code&#62;&#60;/pre&#62;
&#60;p&#62;I could paste the all other missing pieces like Hive Create Table script, Source Table details etc. &#60;/p&#62;
&#60;p&#62;I feel guilty of not resolving this issue yet even after your so much help from you already.&#60;/p&#62;
&#60;p&#62;Really appreciate your time and help over this!!
&#60;/p&#62;</description>
		</item>
		<item>
			<title>dipesh on "Mapping composite column names on Hive"</title>
			<link>http://www.datastax.com/support-forums/topic/mapping-composite-column-names-on-hive#post-9018</link>
			<pubDate>Thu, 14 Feb 2013 11:45:49 +0000</pubDate>
			<dc:creator>dipesh</dc:creator>
			<guid isPermaLink="false">9018@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;If possible, can you please share a sample code for serializer and deserializer?&#60;br /&#62;
And/or provide necessary pointers that could help in that regard.&#60;/p&#62;
&#60;p&#62;Here is what I have, but not working.&#60;/p&#62;
&#60;pre&#62;&#60;code&#62;package com.cisco.iep.hive.plugins;

import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

public class WriteCompositeLongString extends UDF{

	public BytesWritable evaluate(final LongWritable l, final Text s) {

		List&#38;lt;byte[]&#38;gt; b = new ArrayList&#38;lt;byte[]&#38;gt;(2);
		b.add(longWritableToByteArray(l));
		b.add(textToByteArray(s));

		return new BytesWritable(makeComposite(b));
	}

	private byte[] textToByteArray(Text s){
		if(s==null){
			return null;
		}
		return s.getBytes();
	}

	private byte[] longWritableToByteArray(final LongWritable l){
		if(l==null){
			return null;
		}
		return ByteBuffer.allocate(Long.SIZE).putLong(l.get()).array();
	}

	private byte[] makeComposite(final List&#38;lt;byte[]&#38;gt; b) {
		ByteArrayOutputStream bos = new ByteArrayOutputStream();
		for (int i = 0; i &#38;lt; b.size(); i++) {
			bos.write((byte) ((b.get(i).length &#38;gt;&#38;gt; (7 + 1)) &#38;amp; 0xFF));
			bos.write((byte) (b.get(i).length &#38;amp; 0xFF));
			for (int j = 0; j &#38;lt; b.get(i).length; j++) {
				bos.write(b.get(i)[j] &#38;amp; 0xFF);
			}
			bos.write((byte) 0);
		}
		return bos.toByteArray();
	}

}&#60;/code&#62;&#60;/pre&#62;
&#60;p&#62;Thanks in advance!!
&#60;/p&#62;</description>
		</item>
		<item>
			<title>dipesh on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table/page/2#post-8989</link>
			<pubDate>Mon, 11 Feb 2013 05:23:03 +0000</pubDate>
			<dc:creator>dipesh</dc:creator>
			<guid isPermaLink="false">8989@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Thanks again, however I will need couple of more days to try out due to other priorities.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>alexliu on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-8945</link>
			<pubDate>Wed, 06 Feb 2013 17:56:32 +0000</pubDate>
			<dc:creator>alexliu</dc:creator>
			<guid isPermaLink="false">8945@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Here is composite column encoding according to the source code&#60;/p&#62;
&#60;p&#62;/*&#60;br /&#62;
 * The encoding of a CompositeType column name should be:&#60;br /&#62;
 *   &#38;lt;component&#38;gt;&#38;lt;component&#38;gt;&#38;lt;component&#38;gt; ...&#60;br /&#62;
 * where &#38;lt;component&#38;gt; is:&#60;br /&#62;
 *   &#38;lt;length of value&#38;gt;&#38;lt;value&#38;gt;&#38;lt;'end-of-component' byte&#38;gt;&#60;br /&#62;
 * where &#38;lt;length of value&#38;gt; is a 2 bytes unsigned short the and the&#60;br /&#62;
 * 'end-of-component' byte should always be 0 for actual column name.&#60;br /&#62;
 * However, it can set to 1 for query bounds. This allows to query for the&#60;br /&#62;
 * equivalent of 'give me the full super-column'. That is, if during a slice&#60;br /&#62;
 * query uses:&#60;br /&#62;
 *   start = &#38;lt;3&#38;gt;&#38;lt;&#34;foo&#34;.getBytes()&#38;gt;&#38;lt;0&#38;gt;&#60;br /&#62;
 *   end   = &#38;lt;3&#38;gt;&#38;lt;&#34;foo&#34;.getBytes()&#38;gt;&#38;lt;1&#38;gt;&#60;br /&#62;
 * then he will be sure to get *all* the columns whose first component is &#34;foo&#34;.&#60;br /&#62;
 * If for a component, the 'end-of-component' is != 0, there should not be any&#60;br /&#62;
 * following component. The end-of-component can also be -1 to allow&#60;br /&#62;
 * non-inclusive query. For instance:&#60;br /&#62;
 *   start = &#38;lt;3&#38;gt;&#38;lt;&#34;foo&#34;.getBytes()&#38;gt;&#38;lt;-1&#38;gt;&#60;br /&#62;
 * allows to query everything that is greater than &#38;lt;3&#38;gt;&#38;lt;&#34;foo&#34;.getBytes()&#38;gt;, but&#60;br /&#62;
 * not &#38;lt;3&#38;gt;&#38;lt;&#34;foo&#34;.getBytes()&#38;gt; itself.&#60;br /&#62;
 */&#60;/p&#62;
&#60;p&#62;here is a sample code to create encoding composite column&#60;/p&#62;
&#60;p&#62;    private ByteBuffer createCompositeKey(String s, UUID uuid, int i, boolean lastIsOne)&#60;br /&#62;
    {&#60;br /&#62;
        ByteBuffer bytes = ByteBufferUtil.bytes(s);&#60;br /&#62;
        int totalSize = 0;&#60;br /&#62;
        if (s != null)&#60;br /&#62;
        {&#60;br /&#62;
            totalSize += 2 + bytes.remaining() + 1;&#60;br /&#62;
            if (uuid != null)&#60;br /&#62;
            {&#60;br /&#62;
                totalSize += 2 + 16 + 1;&#60;br /&#62;
                if (i != -1)&#60;br /&#62;
                {&#60;br /&#62;
                    totalSize += 2 + 1 + 1;&#60;br /&#62;
                }&#60;br /&#62;
            }&#60;br /&#62;
        }&#60;/p&#62;
&#60;p&#62;        ByteBuffer bb = ByteBuffer.allocate(totalSize);&#60;/p&#62;
&#60;p&#62;        if (s != null)&#60;br /&#62;
        {&#60;br /&#62;
            bb.putShort((short) bytes.remaining());&#60;br /&#62;
            bb.put(bytes);&#60;br /&#62;
            bb.put(uuid == null &#38;amp;&#38;amp; lastIsOne ? (byte)1 : (byte)0);&#60;br /&#62;
            if (uuid != null)&#60;br /&#62;
            {&#60;br /&#62;
                bb.putShort((short) 16);&#60;br /&#62;
                bb.put(UUIDGen.decompose(uuid));&#60;br /&#62;
                bb.put(i == -1 &#38;amp;&#38;amp; lastIsOne ? (byte)1 : (byte)0);&#60;br /&#62;
                if (i != -1)&#60;br /&#62;
                {&#60;br /&#62;
                    // We are putting a byte only because our test use ints that fit in a byte *and* IntegerType.fromString() will&#60;br /&#62;
                    // return something compatible (i.e, putting a full int here would break 'fromStringTest')&#60;br /&#62;
                    bb.putShort((short) 1);&#60;br /&#62;
                    bb.put((byte)i);&#60;br /&#62;
                    bb.put(lastIsOne ? (byte)1 : (byte)0);&#60;br /&#62;
                }&#60;br /&#62;
            }&#60;br /&#62;
        }&#60;br /&#62;
        bb.rewind();&#60;br /&#62;
        return bb;&#60;br /&#62;
    }&#60;/p&#62;
&#60;p&#62;You need change your code to the same encoding as the composite column name
&#60;/p&#62;</description>
		</item>
		<item>
			<title>dipesh on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-8942</link>
			<pubDate>Wed, 06 Feb 2013 16:58:47 +0000</pubDate>
			<dc:creator>dipesh</dc:creator>
			<guid isPermaLink="false">8942@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Thanks alexliu!!&#60;/p&#62;
&#60;p&#62;I could get Composite Column working on SELECT statement, however got an exception when tried to write data into Composite Column using following hive script and custom UDF.&#60;/p&#62;
&#60;p&#62;Here is relevant log on Exception:&#60;/p&#62;
&#60;blockquote&#62;&#60;p&#62;
[2013-02-06 22:23:15,252] FATAL {ExecReducer} -  org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {&#34;key&#34;:{&#34;_col0&#34;:&#34;blr_01&#34;,&#34;_col1&#34;:1350465782,&#34;_col2&#34;:&#34;WARN&#34;},&#34;value&#34;:{&#34;_col0&#34;:1},&#34;alias&#34;:0}&#60;br /&#62;
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)&#60;br /&#62;
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:518)&#60;br /&#62;
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:419)&#60;br /&#62;
	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:256)&#60;br /&#62;
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: InvalidRequestException(why:Not enough bytes to read value of component 0)&#60;br /&#62;
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:603)&#60;br /&#62;
...&#60;/p&#62;
&#60;/blockquote&#62;
&#60;p&#62;Here is my hive script:&#60;br /&#62;
&#60;pre&#62;&#60;code&#62;CREATE TEMPORARY FUNCTION setCompositeLongString AS
       &#38;#039;com.cisco.iep.hive.plugins.WriteCompositeLongString&#38;#039;; 

INSERT OVERWRITE TABLE TestTable
SELECT src_id,
  setCompositeLongString(cast(alert_time as BIGINT), severity),
  COUNT(alert_type)
FROM alert_instance
GROUP BY src_id,
  cast(alert_time as BIGINT),
  severity;&#60;/code&#62;&#60;/pre&#62;
&#60;p&#62;And here is custom UDF to write data into Composite Column:&#60;/p&#62;
&#60;pre&#62;&#60;code&#62;package com.cisco.iep.hive.plugins;

import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

public class WriteCompositeLongString extends UDF{

	public BytesWritable evaluate(final LongWritable l, final Text s) {

		List&#38;lt;byte[]&#38;gt; b = new ArrayList&#38;lt;byte[]&#38;gt;(2);
		b.add(longWritableToByteArray(l));
		b.add(textToByteArray(s));

		return new BytesWritable(makeComposite(b));
	}

	private byte[] textToByteArray(Text s){
		if(s==null){
			return null;
		}
		return s.getBytes();
	}

	private byte[] longWritableToByteArray(final LongWritable l){
		if(l==null){
			return null;
		}
		return ByteBuffer.allocate(Long.SIZE).putLong(l.get()).array();
	}

	private byte[] makeComposite(final List&#38;lt;byte[]&#38;gt; b) {
		ByteArrayOutputStream bos = new ByteArrayOutputStream();
		for (int i = 0; i &#38;lt; b.size(); i++) {
			bos.write((byte) ((b.get(i).length &#38;gt;&#38;gt; (7 + 1)) &#38;amp; 0xFF));
			bos.write((byte) (b.get(i).length &#38;amp; 0xFF));
			for (int j = 0; j &#38;lt; b.get(i).length; j++) {
				bos.write(b.get(i)[j] &#38;amp; 0xFF);
			}
			bos.write((byte) 0);
		}
		return bos.toByteArray();
	}
}&#60;/code&#62;&#60;/pre&#62;
&#60;p&#62;Please advise.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>alexliu on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-8747</link>
			<pubDate>Mon, 28 Jan 2013 20:24:56 +0000</pubDate>
			<dc:creator>alexliu</dc:creator>
			<guid isPermaLink="false">8747@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;non-cql composite row key is the same format as a composite column.&#60;/p&#62;
&#60;p&#62;But cql composite row key is in  a little different format, so you should modify the UDF for cql composite row key.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>alexliu on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-8746</link>
			<pubDate>Mon, 28 Jan 2013 19:24:07 +0000</pubDate>
			<dc:creator>alexliu</dc:creator>
			<guid isPermaLink="false">8746@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;you should be able to do the same to Composite Row Key, because that composite row key is stored the same format as composite column.&#60;/p&#62;
&#60;p&#62;Basically you retrieve the key as binary data, then use UDF to retrieve/parse the data.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>dipesh on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-8593</link>
			<pubDate>Wed, 23 Jan 2013 06:38:08 +0000</pubDate>
			<dc:creator>dipesh</dc:creator>
			<guid isPermaLink="false">8593@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;I am wondering, will this technique work with Composite Row Key as well?&#60;/p&#62;
&#60;p&#62;So the table (CF) would look like this:&#60;/p&#62;
&#60;p&#62;create column family TestTable&#60;br /&#62;
with key_validation_class = 'CompositeType (UTF8Type, UTF8Type)'&#60;br /&#62;
and comparator = 'UTF8Type'&#60;br /&#62;
;
&#60;/p&#62;</description>
		</item>
		<item>
			<title>pierantonio.merlino on "Mapping composite column names on Hive"</title>
			<link>http://www.datastax.com/support-forums/topic/mapping-composite-column-names-on-hive#post-7442</link>
			<pubDate>Thu, 08 Nov 2012 14:07:24 +0000</pubDate>
			<dc:creator>pierantonio.merlino</dc:creator>
			<guid isPermaLink="false">7442@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Thank you for the answer.&#60;br /&#62;
I solved the problem mapping the composite columns in to a struct in Hive and writing a custom serializer/deserializer.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>Piotr Kołaczkowski on "Mapping composite column names on Hive"</title>
			<link>http://www.datastax.com/support-forums/topic/mapping-composite-column-names-on-hive#post-7159</link>
			<pubDate>Mon, 29 Oct 2012 09:37:26 +0000</pubDate>
			<dc:creator>Piotr Kołaczkowski</dc:creator>
			<guid isPermaLink="false">7159@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;There is no dedicated support in Hive for composite columns. In wide-row mode, it scans each row sequentially and fetches each column to produce a triple (row-key, column-name, value). Outputting as triples is hardcoded. Anyway, this could make a nice feature-request.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>pierantonio.merlino on "Mapping composite column names on Hive"</title>
			<link>http://www.datastax.com/support-forums/topic/mapping-composite-column-names-on-hive#post-6763</link>
			<pubDate>Thu, 04 Oct 2012 10:00:30 +0000</pubDate>
			<dc:creator>pierantonio.merlino</dc:creator>
			<guid isPermaLink="false">6763@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Hi,&#60;br /&#62;
I've to map Cassandra columns families on Hive tables.&#60;br /&#62;
Since I've dynamic fields in Cassandra, I use the following string&#60;/p&#62;
&#60;pre&#62;&#60;code&#62;CREATE EXTERNAL TABLE Table1
(keyName string, columnName string, value string)
STORED BY &#38;#039;org.apache.hadoop.hive.cassandra.CassandraStorageHandler&#38;#039;
WITH SERDEPROPERTIES (&#38;quot;cassandra.columns.mapping&#38;quot; = &#38;quot;:key,:column,:value&#38;quot; );&#60;/code&#62;&#60;/pre&#62;
&#60;p&#62;in order to create the external table on Hive. The result is a table where each row is as follows:&#60;/p&#62;
&#60;p&#62;&#60;code&#62;rowName1 columnName1 value1&#60;/code&#62;&#60;br /&#62;
&#60;code&#62;rowName2 columnName2 value2&#60;/code&#62;&#60;br /&#62;
...&#60;/p&#62;
&#60;p&#62;Since I use composite column names in Cassandra (Composite(columnNameA, columnNameB)), is it possible to map them into two different columns on Hive?&#60;br /&#62;
What I want is as follows:&#60;/p&#62;
&#60;p&#62;&#60;code&#62;rowName1 columnNameA1 columnNameB1 value1&#60;/code&#62;.&#60;/p&#62;
&#60;p&#62;How can I map these columns on Hive?&#60;/p&#62;
&#60;p&#62;Thanks in advance.
&#60;/p&#62;</description>
		</item>
		<item>
			<title>tjake on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-1458</link>
			<pubDate>Thu, 29 Mar 2012 15:52:29 +0000</pubDate>
			<dc:creator>tjake</dc:creator>
			<guid isPermaLink="false">1458@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;awsome, thanks!
&#60;/p&#62;</description>
		</item>
		<item>
			<title>Anonymous on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-1457</link>
			<pubDate>Thu, 29 Mar 2012 15:51:33 +0000</pubDate>
			<dc:creator>Anonymous</dc:creator>
			<guid isPermaLink="false">1457@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;I got it. Here is the UDF that works&#60;/p&#62;
&#60;pre&#62;&#60;code&#62;import java.nio.ByteBuffer;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;

public final class TestExtract extends UDF {

	public String evaluate(final BytesWritable s) {
		if (s == null) {
			return null;
		}
		// byte buffer
		ByteBuffer bb = ByteBuffer.wrap(s.getBytes());

		// first part of composite
		int colASize = (bb.get() &#38;amp; 0xFF) &#38;lt;&#38;lt; (7 + 1);
		colASize = (bb.get() &#38;amp; 0xFF);
		byte[] colA = new byte[colASize];
		bb.get(colA);
		ByteBuffer bf = ByteBuffer.wrap(colA);

		 bb.get();
		// second part of composite
		int colBSize = (bb.get() &#38;amp; 0xFF) &#38;lt;&#38;lt; (7 + 1);
		colBSize = (bb.get() &#38;amp; 0xFF);
		byte[] colB = new byte[colBSize];
		bb.get(colB);

		// return concatenated string
		return new String(bf.getLong() + &#38;quot;_&#38;quot; + new String(colB));
	}
}&#60;/code&#62;&#60;/pre&#62;</description>
		</item>
		<item>
			<title>Anonymous on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-1451</link>
			<pubDate>Wed, 28 Mar 2012 22:38:15 +0000</pubDate>
			<dc:creator>Anonymous</dc:creator>
			<guid isPermaLink="false">1451@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;Here is what the data looks like from cassandra-cli&#60;/p&#62;
&#60;p&#62; (column=1332972763782:Test1, value=abc123, timestamp=1332972760270111)&#60;br /&#62;
 (column=1332972763782:Test2, value=test132, timestamp=1332972760269241)&#60;br /&#62;
 (column=1332972763782:Test3, value=blah, timestamp=1332972760270101)&#60;br /&#62;
 (column=1332972763782:Test4, value=blah2, timestamp=1332972760269340)&#60;br /&#62;
 (column=1332972763782:Test5, value=hello123, timestamp=1332972760270320)
&#60;/p&#62;</description>
		</item>
		<item>
			<title>Anonymous on "How do you use composite columns (from cassandra) in a hive table?"</title>
			<link>http://www.datastax.com/support-forums/topic/how-do-you-use-composite-columns-from-cassandra-in-a-hive-table#post-1449</link>
			<pubDate>Wed, 28 Mar 2012 22:26:32 +0000</pubDate>
			<dc:creator>Anonymous</dc:creator>
			<guid isPermaLink="false">1449@http://www.datastax.com/support-forums/</guid>
			<description>&#60;p&#62;I went ahead and created a test example with DSE 2.0. I am still getting an error. Can you please confirm if my mapping is wrong&#60;/p&#62;
&#60;p&#62;create keyspace TestData;&#60;/p&#62;
&#60;p&#62;-- column family, compositeType columns are (timeinmiilis, string)&#60;/p&#62;
&#60;p&#62;create column family TestTable&#60;br /&#62;
    with key_validation_class = UTF8Type&#60;br /&#62;
    and comparator = 'CompositeType (LongType, UTF8Type)'&#60;br /&#62;
    and comment = 'A Test Column Family'&#60;br /&#62;
    and compression_options={sstable_compression:SnappyCompressor, chunk_length_kb:6};&#60;/p&#62;
&#60;p&#62;// test UDF java program for hive, it concatenates the long and string with an under_score in the middle&#60;br /&#62;
package com.foo.hive.udf;&#60;br /&#62;
import java.nio.ByteBuffer;&#60;br /&#62;
import org.apache.hadoop.hive.ql.exec.UDF;&#60;br /&#62;
import org.apache.hadoop.io.BytesWritable;&#60;/p&#62;
&#60;p&#62;public final class TestExtract extends UDF {&#60;/p&#62;
&#60;p&#62;	public String evaluate(final BytesWritable s) {&#60;br /&#62;
		if (s == null) {&#60;br /&#62;
			return null;&#60;br /&#62;
		}&#60;br /&#62;
		// byte buffer&#60;br /&#62;
		ByteBuffer bb = ByteBuffer.wrap(s.getBytes());&#60;/p&#62;
&#60;p&#62;		// header byte&#60;br /&#62;
		bb.get();&#60;/p&#62;
&#60;p&#62;		// first part of composite&#60;br /&#62;
		int colASize = (bb.get() &#38;amp; 0xFF) &#38;lt;&#38;lt; (7 + 1);&#60;br /&#62;
		colASize = (bb.get() &#38;amp; 0xFF);&#60;br /&#62;
		byte[] colA = new byte[colASize];&#60;br /&#62;
		bb.get(colA);&#60;/p&#62;
&#60;p&#62;		// second part of composite&#60;br /&#62;
		int colBSize = (bb.get() &#38;amp; 0xFF) &#38;lt;&#38;lt; (7 + 1);&#60;br /&#62;
		colBSize = (bb.get() &#38;amp; 0xFF);&#60;br /&#62;
		byte[] colB = new byte[colBSize];&#60;br /&#62;
		bb.get(colB);&#60;/p&#62;
&#60;p&#62;		// return concatenated string&#60;br /&#62;
		return new String(new String(colA) + '_' + new String(colB));&#60;br /&#62;
	}&#60;br /&#62;
}&#60;/p&#62;
&#60;p&#62;--export the jar&#60;br /&#62;
export HIVE_AUX_JARS_PATH=/tmp/custom.jar&#60;/p&#62;
&#60;p&#62;--login to hive&#60;br /&#62;
use TestData;  &#60;/p&#62;
&#60;p&#62;-- drop if exists&#60;br /&#62;
drop table TestTable;&#60;/p&#62;
&#60;p&#62;--create hive mapping&#60;br /&#62;
CREATE EXTERNAL TABLE TestTable (row_key string, param_name binary, param_value string)&#60;br /&#62;
      STORED BY 'org.apache.hadoop.hive.cassandra.CassandraStorageHandler'&#60;br /&#62;
      WITH SERDEPROPERTIES (&#34;cassandra.ks.name&#34; = &#34;TestData&#34;,&#60;br /&#62;
                            &#34;cassandra.columns.mapping&#34; =  &#34;:key,:column,:value&#34;,&#60;br /&#62;
                            &#34;cassandra.cf.validatortype&#34;= &#34;BytesType, BytesType, BytesType&#34;)&#60;br /&#62;
           TBLPROPERTIES ( &#34;cassandra.ks.name&#34; = &#34;TestData&#34;,&#60;br /&#62;
                           &#34;cassandra.range.size&#34; = &#34;100&#34;,&#60;br /&#62;
                           &#34;cassandra.slice.predicate.size&#34; = &#34;100&#34;);&#60;/p&#62;
&#60;p&#62;-- create temp function&#60;br /&#62;
create temporary function my_test as 'com.foo.hive.udf.TestExtract';  &#60;/p&#62;
&#60;p&#62;-- select&#60;br /&#62;
select row_key, my_test(param_name), param_value  from TestTable limit 5;&#60;/p&#62;
&#60;p&#62;--output comes out as&#60;br /&#62;
e8cf5f471d0211e18c8e6431504ba7b8	_6	abc123&#60;br /&#62;
e8cf5f471d0211e18c8e6431504ba7b8	_6	test132&#60;br /&#62;
e8cf5f471d0211e18c8e6431504ba7b8	_6	blah&#60;br /&#62;
e8cf5f471d0211e18c8e6431504ba7b8	_6	blah2&#60;br /&#62;
e8cf5f471d0211e18c8e6431504ba7b8	_6	hello123&#60;/p&#62;
&#60;p&#62;What am I doing wrong here?
&#60;/p&#62;</description>
		</item>

	</channel>
</rss>
