File size: 696 Bytes
ba2e3fe
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

def build_qa(transcript_text, persist_dir="chroma_db"):
    # split transcript into chunks (simple split by chars)
    chunks = [transcript_text[i:i+2000] for i in range(0, len(transcript_text), 2000)]
    embeddings = OpenAIEmbeddings()   # or another provider
    db = Chroma.from_texts(chunks, embeddings, persist_directory=persist_dir)
    retriever = db.as_retriever()
    qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name="gpt-4", temperature=0), retriever=retriever)
    return qa