Technology•April 7, 2015

An Introduction to DSE Field Transformers

Edward Ribeiro

clqlsh> CREATE KEYSPACE solr_fit WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'Solr': 1};

cqlsh> use solr_fit;

cqlsh> CREATE TABLE cities (id int, json text, primary key (id));

mvn install:install-file -Dfile=/path/to/dse.jar -DgroupId=com.datastax \

 -DartifactId=dse -Dversion=4.6.1 -Dpackaging=jar

<dependency>

   <groupId>com.datastax</groupId>

   <artifactId>dse</artifactId>

   <version>4.6.1</version>

</dependency>

mvn install:install-file -Dfile=/path/to/solr-4.6.0.3.4-SNAPSHOT-uber.jar \

-DgroupId=org.apache.solr -DartifactId=solr-core -Dversion=4.6.0.3.4-SNAPSHOT -Dpackaging=jar

<dependency>

    <groupId>org.apache.solr</groupId>

    <artifactId>solr-core</artifactId>

    <version>4.6.0.3.4-SNAPSHOT</version>

</dependency>

<?xml version="1.0" encoding="UTF-8" ?>

<schema name="blog" version="1.1">

  <types>

    <fieldType name="int" class="solr.TrieIntField" multiValued="false"/>

    <fieldType name="string" class="solr.StrField"/>

  </types>

  <fields>

    <field name="id" type="int" indexed="true" stored="true"/>

    <field name="json" type="string" indexed="true" stored="true"/>

    <field name="city" type="string" indexed="true" stored="false"/>

    <field name="state" type="string" indexed="true" stored="false"/>

    <field name="country" type="string" indexed="true" stored="false"/>

  </fields>

   <defaultSearchField>id</defaultSearchField>

   <uniqueKey>id</uniqueKey>

</schema>

<fieldInputTransformer name="dse" class="br.eribeiro.dse.search.JsonFieldInputTransformer">

</fieldInputTransformer>

<fieldOutputTransformer name="dse" class="br.eribeiro.dse.search.JsonFieldOutputTransformer">

</fieldOutputTransformer>

package br.eribeiro.dse.search;

public class City

    private String city;

    private String state;

    private String country;

    public String getCity()

        return city;

    public void setCity(String city)

        this.city = city;

    public String getState()

        return state;

    public void setState(String state)

        this.state = state;

    public String getCountry()

        return country;

    public void setCountry(String country)

        this.country = country;

    @Override

    public String toString()

        return "City{" +

                "city='" + city + "'" +

                ", state='" + state + "'" +

                ", country='" + country + "'" +

                '}';

package br.eribeiro.dse.search;

import com.datastax.bdp.search.solr.FieldInputTransformer;

import com.fasterxml.jackson.databind.ObjectMapper;

import org.apache.lucene.document.Document;

import org.apache.solr.core.SolrCore;

import org.apache.solr.schema.IndexSchema;

import org.apache.solr.schema.SchemaField;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import java.io.IOException;

public class JsonFieldInputTransformer extends FieldInputTransformer

    private static final Logger LOGGER = LoggerFactory.getLogger(JsonFieldInputTransformer.class);

    @Override

    public boolean evaluate(String field)

        return field.equals("json");

    @Override

    public void addFieldToDocument(SolrCore core,

                                   IndexSchema schema,

                                   String key,

                                   Document doc,

                                   SchemaField fieldInfo,

                                   String fieldValue,

                                   float boost,

                                   DocumentHelper helper)

            throws IOException

try

            ObjectMapper mapper = new ObjectMapper();

            LOGGER.info("JsonFieldInputTransformer called");

            LOGGER.info("fieldValue: " + fieldValue);

            City city = mapper.readValue(fieldValue, City.class);

            SchemaField jsonCity = core.getLatestSchema().getFieldOrNull("city");

            SchemaField jsonState = core.getLatestSchema().getFieldOrNull("state");

            SchemaField jsonCountry = core.getLatestSchema().getFieldOrNull("country");

            helper.addFieldToDocument(core, core.getLatestSchema(), key, doc, jsonCity, city.getCity(), boost);

            helper.addFieldToDocument(core, core.getLatestSchema(), key, doc, jsonState, city.getState(), boost);

            helper.addFieldToDocument(core, core.getLatestSchema(), key, doc, jsonCountry, city.getCountry(), boost);

        catch (Exception ex)

            LOGGER.error(ex.getMessage());

            throw new RuntimeException(ex);

package br.eribeiro.dse.search;

import com.datastax.bdp.search.solr.FieldOutputTransformer;

import com.fasterxml.jackson.databind.ObjectMapper;

import org.apache.lucene.index.FieldInfo;

import org.apache.lucene.index.StoredFieldVisitor;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import java.io.IOException;

public class JsonFieldOutputTransformer extends FieldOutputTransformer

    private static final Logger LOGGER = LoggerFactory.getLogger(JsonFieldOutputTransformer.class);

    @Override

    public void stringField(FieldInfo fieldInfo,

                            String value,

                            StoredFieldVisitor visitor,

                            DocumentHelper helper) throws IOException

        ObjectMapper mapper = new ObjectMapper();

        LOGGER.info("name: " + fieldInfo.name + ", value: " + value);

try

            City city = mapper.readValue(value.getBytes(), City.class);

            FieldInfo json_city_fi = helper.getFieldInfo("city");

            FieldInfo json_state_fi = helper.getFieldInfo("state");

            FieldInfo json_country_fi = helper.getFieldInfo("country");

            if (city.getCity() != null)

                visitor.stringField(json_city_fi, city.getCity());

            if (city.getState() != null)

                visitor.stringField(json_state_fi, city.getState());

            if (city.getCountry() != null)

                visitor.stringField(json_country_fi, city.getCountry());

        catch (IOException e)

            LOGGER.error(fieldInfo.name + " " + e.getMessage());

            throw e;

$ curl http://localhost:8983/solr/resource/solr_fit.cities/solrconfig.xml --data-binary @solrconfig.xml -H 'Content-type:text/xml; charset=utf-8'

$ curl http://localhost:8983/solr/resource/solr_fit.cities/schema.xml --data-binary @schema.xml -H 'Content-type:text/xml; charset=utf-8'

$ curl -X POST "http://localhost:8983/solr/admin/cores?action=CREATE&name=solr_fit.cities"

cqlsh> insert into cities (id, json) values (1, '{"city":"Austin","state":"TX", "country":"USA"}');

cqlsh> insert into cities (id, json) values (2, '{"city":"San Francisco","state":"CA", "country":"USA"}');

cqlsh> insert into cities (id, json) values (3, '{"city":"Seattle","state":"WA", "country":"USA"}');

cqlsh:solr_fit> select * from cities;

id | json | solr_query

----+--------------------------------------------------------+------------

 1 | {"city":"Austin","state":"TX", "country":"USA"} | null

 2 | {"city":"San Francisco","state":"CA", "country":"USA"} | null

 3 | {"city":"Seattle","state":"WA", "country":"USA"} | null

(3 rows)

cqlsh:solr_fit> select * from cities where solr_query = '{"q":"country: USA"}';

id | json | solr_query

----+--------------------------------------------------------+------------

 1 | {"city":"Austin","state":"TX", "country":"USA"} | null

 2 | {"city":"San Francisco","state":"CA", "country":"USA"} | null

 3 | {"city":"Seattle","state":"WA", "country":"USA"} | null

(3 rows)

cqlsh:solr_fit> select * from cities where solr_query = '{"q":"city: Seat*"}';

id | json | solr_query

----+--------------------------------------------------------+------------

 2 | {"city":"Seattle","state":"WA", "country":"USA"} | null

(1 rows)

cqlsh:solr_fit>

cqlsh:solr_fit> select * from cities where solr_query = '{"q":"state: CA"}';

id | json | solr_query

----+--------------------------------------------------------+------------

2 | {"city":"San Francisco","state":"CA", "country":"USA"} | null

(1 rows)

More Technology

View All

Technology • April 25, 2024

How to Build a Crystal Image Search App with Vector Search

Technology • April 18, 2024

Knowledge Graphs for RAG without a GraphDB

Technology • April 17, 2024

How Winweb Built its AI Assistant with DataStax Astra DB and LangChain

Technology • April 16, 2024

Vercel + Astra DB: Get Data into Your GenAI Apps Fast

One-stop Data API for Production GenAI

Astra DB gives JavaScript developers a complete data API and out-of-the-box integrations that make it easier to build production RAG apps with high relevancy and low latency.

Learn More

Get Started for Free