I went ahead and created a test example with DSE 2.0. I am still getting an error. Can you please confirm if my mapping is wrong
create keyspace TestData;
-- column family, compositeType columns are (timeinmiilis, string)
create column family TestTable
with key_validation_class = UTF8Type
and comparator = 'CompositeType (LongType, UTF8Type)'
and comment = 'A Test Column Family'
and compression_options={sstable_compression:SnappyCompressor, chunk_length_kb:6};
// test UDF java program for hive, it concatenates the long and string with an under_score in the middle
package com.foo.hive.udf;
import java.nio.ByteBuffer;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
public final class TestExtract extends UDF {
public String evaluate(final BytesWritable s) {
if (s == null) {
return null;
}
// byte buffer
ByteBuffer bb = ByteBuffer.wrap(s.getBytes());
// header byte
bb.get();
// first part of composite
int colASize = (bb.get() & 0xFF) << (7 + 1);
colASize = (bb.get() & 0xFF);
byte[] colA = new byte[colASize];
bb.get(colA);
// second part of composite
int colBSize = (bb.get() & 0xFF) << (7 + 1);
colBSize = (bb.get() & 0xFF);
byte[] colB = new byte[colBSize];
bb.get(colB);
// return concatenated string
return new String(new String(colA) + '_' + new String(colB));
}
}
--export the jar
export HIVE_AUX_JARS_PATH=/tmp/custom.jar
--login to hive
use TestData;
-- drop if exists
drop table TestTable;
--create hive mapping
CREATE EXTERNAL TABLE TestTable (row_key string, param_name binary, param_value string)
STORED BY 'org.apache.hadoop.hive.cassandra.CassandraStorageHandler'
WITH SERDEPROPERTIES ("cassandra.ks.name" = "TestData",
"cassandra.columns.mapping" = ":key,:column,:value",
"cassandra.cf.validatortype"= "BytesType, BytesType, BytesType")
TBLPROPERTIES ( "cassandra.ks.name" = "TestData",
"cassandra.range.size" = "100",
"cassandra.slice.predicate.size" = "100");
-- create temp function
create temporary function my_test as 'com.foo.hive.udf.TestExtract';
-- select
select row_key, my_test(param_name), param_value from TestTable limit 5;
--output comes out as
e8cf5f471d0211e18c8e6431504ba7b8 _6 abc123
e8cf5f471d0211e18c8e6431504ba7b8 _6 test132
e8cf5f471d0211e18c8e6431504ba7b8 _6 blah
e8cf5f471d0211e18c8e6431504ba7b8 _6 blah2
e8cf5f471d0211e18c8e6431504ba7b8 _6 hello123
What am I doing wrong here?