TechnologyFebruary 5, 2024

How to Build an Image Search App with Astra DB and CLIP Models

Aaron Ploetz
Aaron PloetzDeveloper Relations, DataStax
How to Build an Image Search App with Astra DB and CLIP Models
python --version
Python 3.9.6
pip install -r requirements.txt
carImageSearch/
      templates/
      static/
            css/
            images/
            input_images/
            web_images/
export ASTRA_DB_APP_TOKEN=
        AstraCS:abcdefNOTREALghijk:lmnopqrsBLAHBLAHBLAHtuvwxyz
export ASTRA_DB_API_ENDPOINT=
        https://b9aff773-also-not-real.apps.astra.datastax.com
ASTRA_DB_APPLICATION_TOKEN = 
                       os.environ.get("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_API_ENDPOINT= os.environ.get("ASTRA_DB_API_ENDPOINT")

db = AstraDB(
    token=ASTRA_DB_APPLICATION_TOKEN,
    api_endpoint=ASTRA_DB_API_ENDPOINT,
)
col = db.create_collection("car_images", dimension=512, 
                           metric="cosine")
model = SentenceTransformer('clip-ViT-B-32')
IMAGE_DIR = "static/images/"

for id, imageName in enumerate(os.listdir(IMAGE_DIR)):

    img_emb = model.encode(Image.open(IMAGE_DIR + imageName))
    strJson = '{"_id":"' + str(id) + '","text":"' + imageName +
              '","$vector":' + str(img_emb.tolist()) + '}'

    doc = json.loads(strJson)
    col.insert_one(doc)
python carImageLoader.py
async def get_car_by_text(search_text):
    global model

    if model is None:
        model = SentenceTransformer('clip-ViT-B-32')

    # generate embedding from search_text
    text_emb = model.encode(search_text)

    # execute vector search
    results = await get_by_vector(COLLECTION_NAME,text_emb,1)

    # should only be one result returned
    return IMAGE_DIR + results[0]["text"]
async def get_by_vector(table_name, vector_embedding, limit=1):
    global db
    global collection

    if collection is None:
        db = AstraDB(
            token=ASTRA_DB_APPLICATION_TOKEN,
            api_endpoint=ASTRA_DB_API_ENDPOINT,
        )
        collection = db.collection(table_name)

    results = collection.vector_find(vector_embedding.tolist(), 
                        limit=limit, fields={"text","$vector"})

    return results
flask run -p 8000
 * Serving Flask app 'carSearch'
 * Debug mode: off
WARNING: This is a development server. Do not use it in a production
deployment. Use a production WSGI server instead.
 * Running on http://127.0.0.1:8000
Press CTRL+C to quit
Discover more
Vector SearchGenAI
Share

One-stop Data API for Production GenAI

Astra DB gives JavaScript developers a complete data API and out-of-the-box integrations that make it easier to build production RAG apps with high relevancy and low latency.