TechnologyDecember 13, 2023

Can I Ask You a Question? Building a Taylor Swift Chatbot with the Astra DB Vector Database

Carter Rabasa
Carter RabasaDeveloper relations
Can I Ask You a Question? Building a Taylor Swift Chatbot with the Astra DB Vector Database
KEY=value
import { AstraDB } from "@datastax/astra-db-ts";
import { PuppeteerWebBaseLoader } from "langchain/document_loaders/web/puppeteer";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import 'dotenv/config'
import { CohereClient } from "cohere-ai";
import { SimilarityMetric } from "../app/hooks/useConfiguration";
const { COHERE_API_KEY } = process.env;

const cohere = new CohereClient({
  token: COHERE_API_KEY,
});

const { ASTRA_DB_APPLICATION_TOKEN, ASTRA_DB_ID } = process.env;

const astraDb = new AstraDB(ASTRA_DB_APPLICATION_TOKEN, ASTRA_DB_ID);

const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 1000,
  chunkOverlap: 200,
});
const createCollection = async (similarityMetric: SimilarityMetric = 'dot_product') => {
  const res = await astraDb.createCollection(ASTRA_DB_COLLECTION, {
    vector: {
      size: 384,
      function: similarityMetric,
    }
  });
};
const scrapePage = async (url: string) => {
  const loader = new PuppeteerWebBaseLoader(url, {
    launchOptions: {
      headless: "new"
    },
    gotoOptions: {
      waitUntil: "domcontentloaded",
    },
    evaluate: async (page, browser) => {
      const result = await page.evaluate(() => document.body.innerHTML);
      await browser.close();
      return result;
    },
  });
  return (await loader.scrape())?.replace(/<[^>]*>?/gm, '');
};
const loadSampleData = async (similarityMetric: SimilarityMetric = 'dot_product') => {
  const collection = await astraDb.collection(ASTRA_DB_COLLECTION);
  for await (const url of taylorData) {
    console.log(`Processing url ${url}`);
    const content = await scrapePage(url);
    const chunks = await splitter.splitText(content);
    let i = 0;
    for await (const chunk of chunks) {
      const embedded = await cohere.embed({
        texts: [chunk],
        model: "embed-english-light-v3.0",
        inputType: "search_document",
      });

      const res = await collection.insertOne({
        $vector: embedded[0]?.embedding,
        text: chunk
      });
      console.log(res)
      i++;
    }
  }
};
"use client";
import { useChat } from 'ai/react';
import { Message } from 'ai';
const { append, messages, input, handleInputChange, handleSubmit } = useChat();
useEffect(() => {
  scrollToBottom();
 }, [messages]);
const handleSend = (e) => {
  handleSubmit(e, { options: { body: { useRag, llm, similarityMetric}}});
}
import { CohereClient } from "cohere-ai";
import OpenAI from 'openai';
import { OpenAIStream, StreamingTextResponse } from "ai";
import { AstraDB } from "@datastax/astra-db-ts";

const {
  ASTRA_DB_APPLICATION_TOKEN,
  ASTRA_DB_ID,
  ASTRA_DB_COLLECTION,
  COHERE_API_KEY,
  OPENAI_API_KEY
} = process.env;

const cohere = new CohereClient({
  token: COHERE_API_KEY,
});

const openai = new OpenAI({
  apiKey: OPENAI_API_KEY,
  baseURL: "https://open-assistant-ai.astra.datastax.com/v1",
  defaultHeaders: {
    "astra-api-token": ASTRA_DB_APPLICATION_TOKEN,
  }
});


const astraDb = new AstraDB(ASTRA_DB_APPLICATION_TOKEN, ASTRA_DB_ID);
export async function POST(req: Request) {
  try {
    const { messages, useRag, llm, similarityMetric } = await req.json();
    const latestMessage = messages[messages?.length - 1]?.content;
    let docContext = '';
    if (useRag) {

      const embedded = await cohere.embed({
        texts: [latestMessage],
        model: "embed-english-light-v3.0",
        inputType: "search_query",
      });

      try {
        const collection = await astraDb.collection(ASTRA_DB_COLLECTION);
        const cursor = collection.find(null, {
          sort: {
            $vector: embedded?.embeddings[0],
          },
          limit: 10,
        });

        const documents = await cursor.toArray();

        const docsMap = documents?.map(doc => doc.text);
    
        docContext = JSON.stringify(docsMap);
      } catch (e) {
        console.log("Error querying db...");
        docContext = "";
      }
    }
    const Template = {
      role: 'system',
      content: `You are an AI assistant who is a Taylor Swift super fan. Use the below context to
 augment what you know about Taylor Swift and her music.
        The context will provide you with the most recent page data from her wikipedia, tour
 website and others.
        If the context doesn't include the information you need to answer based on your existing knowledge
 and don't mention the source of your information or what the context does or doesn't include.
        Format responses using markdown where applicable and don't return images.
        ----------------
        START CONTEXT
        ${ docContext }
        END CONTEXT
        ----------------
        QUESTION: $ {latestMessage }
        ----------------      
        `
    };

    const response = await openai.chat.completions.create(
      {
        model: llm ?? 'gpt-4',
        stream: true,
        messages: [Template, ...messages],
      }
    );
    const stream = OpenAIStream(response);

    return new StreamingTextResponse(stream);
  } catch (e) {
    throw e;
  } 
Discover more
Vector SearchDataStax Astra DB
Share

One-stop Data API for Production GenAI

Astra DB gives JavaScript developers a complete data API and out-of-the-box integrations that make it easier to build production RAG apps with high relevancy and low latency.