TechnologyApril 7, 2015

An Introduction to DSE Field Transformers

Edward Ribeiro
Edward Ribeiro
An Introduction to DSE Field Transformers
clqlsh> CREATE KEYSPACE solr_fit WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'Solr': 1};
cqlsh> use solr_fit;
cqlsh> CREATE TABLE cities (id int, json text, primary key (id));
mvn install:install-file -Dfile=/path/to/dse.jar -DgroupId=com.datastax \
 -DartifactId=dse -Dversion=4.6.1 -Dpackaging=jar
<dependency>
   <groupId>com.datastax</groupId>
   <artifactId>dse</artifactId>
   <version>4.6.1</version>
</dependency>
mvn install:install-file -Dfile=/path/to/solr-4.6.0.3.4-SNAPSHOT-uber.jar \
-DgroupId=org.apache.solr -DartifactId=solr-core -Dversion=4.6.0.3.4-SNAPSHOT -Dpackaging=jar
<dependency>
    <groupId>org.apache.solr</groupId>
    <artifactId>solr-core</artifactId>
    <version>4.6.0.3.4-SNAPSHOT</version>
</dependency>
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="blog" version="1.1">
  <types>
    <fieldType name="int" class="solr.TrieIntField" multiValued="false"/>
    <fieldType name="string" class="solr.StrField"/>
  </types>
  <fields>
    <field name="id" type="int" indexed="true" stored="true"/>
    <field name="json" type="string" indexed="true" stored="true"/>
    <field name="city" type="string" indexed="true" stored="false"/>
    <field name="state" type="string" indexed="true" stored="false"/>
    <field name="country" type="string" indexed="true" stored="false"/>
  </fields>
   <defaultSearchField>id</defaultSearchField>
   <uniqueKey>id</uniqueKey>
</schema>
<fieldInputTransformer name="dse" class="br.eribeiro.dse.search.JsonFieldInputTransformer">
</fieldInputTransformer>
<fieldOutputTransformer name="dse" class="br.eribeiro.dse.search.JsonFieldOutputTransformer">
</fieldOutputTransformer>
package br.eribeiro.dse.search;
public class City
{
    private String city;
    private String state;
    private String country;
    public String getCity()
    {
        return city;
    }
    public void setCity(String city)
    {
        this.city = city;
    }
    public String getState()
    {
        return state;
    }
    public void setState(String state)
    {
        this.state = state;
    }
    public String getCountry()
    {
        return country;
    }
    public void setCountry(String country)
    {
        this.country = country;
    }
    @Override
    public String toString()
    {
        return "City{" +
                "city='" + city + "'" +
                ", state='" + state + "'" +
                ", country='" + country + "'" +
                '}';
    }
}
package br.eribeiro.dse.search;
import com.datastax.bdp.search.solr.FieldInputTransformer;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.lucene.document.Document;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class JsonFieldInputTransformer extends FieldInputTransformer
{
    private static final Logger LOGGER = LoggerFactory.getLogger(JsonFieldInputTransformer.class);
    @Override
    public boolean evaluate(String field)
    {
        return field.equals("json");
    }
    @Override
    public void addFieldToDocument(SolrCore core,
                                   IndexSchema schema,
                                   String key,
                                   Document doc,
                                   SchemaField fieldInfo,
                                   String fieldValue,
                                   float boost,
                                   DocumentHelper helper)
            throws IOException
    {
        try
        {
            ObjectMapper mapper = new ObjectMapper();
            LOGGER.info("JsonFieldInputTransformer called");
            LOGGER.info("fieldValue: " + fieldValue);
            City city = mapper.readValue(fieldValue, City.class);
            SchemaField jsonCity = core.getLatestSchema().getFieldOrNull("city");
            SchemaField jsonState = core.getLatestSchema().getFieldOrNull("state");
            SchemaField jsonCountry = core.getLatestSchema().getFieldOrNull("country");
            helper.addFieldToDocument(core, core.getLatestSchema(), key, doc, jsonCity, city.getCity(), boost);
            helper.addFieldToDocument(core, core.getLatestSchema(), key, doc, jsonState, city.getState(), boost);
            helper.addFieldToDocument(core, core.getLatestSchema(), key, doc, jsonCountry, city.getCountry(), boost);
        }
        catch (Exception ex)
        {
            LOGGER.error(ex.getMessage());
            throw new RuntimeException(ex);
        }
    }
}
package br.eribeiro.dse.search;
import com.datastax.bdp.search.solr.FieldOutputTransformer;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class JsonFieldOutputTransformer extends FieldOutputTransformer
{
    private static final Logger LOGGER = LoggerFactory.getLogger(JsonFieldOutputTransformer.class);
    @Override
    public void stringField(FieldInfo fieldInfo,
                            String value,
                            StoredFieldVisitor visitor,
                            DocumentHelper helper) throws IOException
    {
        ObjectMapper mapper = new ObjectMapper();
        LOGGER.info("name: " + fieldInfo.name + ", value: " + value);
        try
        {
            City city = mapper.readValue(value.getBytes(), City.class);
            FieldInfo json_city_fi = helper.getFieldInfo("city");
            FieldInfo json_state_fi = helper.getFieldInfo("state");
            FieldInfo json_country_fi = helper.getFieldInfo("country");
            if (city.getCity() != null)
            {
                visitor.stringField(json_city_fi, city.getCity());
            }
            if (city.getState() != null)
            {
                visitor.stringField(json_state_fi, city.getState());
            }
            if (city.getCountry() != null)
            {
                visitor.stringField(json_country_fi, city.getCountry());
            }
        }
        catch (IOException e)
        {
            LOGGER.error(fieldInfo.name + " " + e.getMessage());
            throw e;
        }
    }
}
$ curl http://localhost:8983/solr/resource/solr_fit.cities/solrconfig.xml --data-binary @solrconfig.xml -H 'Content-type:text/xml; charset=utf-8'
$ curl http://localhost:8983/solr/resource/solr_fit.cities/schema.xml --data-binary @schema.xml -H 'Content-type:text/xml; charset=utf-8'
$ curl -X POST "http://localhost:8983/solr/admin/cores?action=CREATE&name=solr_fit.cities"
cqlsh> insert into cities (id, json) values (1, '{"city":"Austin","state":"TX", "country":"USA"}');
cqlsh> insert into cities (id, json) values (2, '{"city":"San Francisco","state":"CA", "country":"USA"}');
cqlsh> insert into cities (id, json) values (3, '{"city":"Seattle","state":"WA", "country":"USA"}');
cqlsh:solr_fit> select * from cities;
id | json | solr_query
----+--------------------------------------------------------+------------
 1 | {"city":"Austin","state":"TX", "country":"USA"} | null
 2 | {"city":"San Francisco","state":"CA", "country":"USA"} | null
 3 | {"city":"Seattle","state":"WA", "country":"USA"} | null
(3 rows)
cqlsh:solr_fit> select * from cities where solr_query = '{"q":"country: USA"}';
id | json | solr_query
----+--------------------------------------------------------+------------
 1 | {"city":"Austin","state":"TX", "country":"USA"} | null
 2 | {"city":"San Francisco","state":"CA", "country":"USA"} | null
 3 | {"city":"Seattle","state":"WA", "country":"USA"} | null
(3 rows)
cqlsh:solr_fit> select * from cities where solr_query = '{"q":"city: Seat*"}';
id | json | solr_query
----+--------------------------------------------------------+------------
 2 | {"city":"Seattle","state":"WA", "country":"USA"} | null
(1 rows)
cqlsh:solr_fit>
cqlsh:solr_fit> select * from cities where solr_query = '{"q":"state: CA"}';
id | json | solr_query
----+--------------------------------------------------------+------------
2 | {"city":"San Francisco","state":"CA", "country":"USA"} | null
(1 rows)
Share

One-stop Data API for Production GenAI

Astra DB gives JavaScript developers a complete data API and out-of-the-box integrations that make it easier to build production RAG apps with high relevancy and low latency.