from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQA def build_qa(transcript_text, persist_dir="chroma_db"): # split transcript into chunks (simple split by chars) chunks = [transcript_text[i:i+2000] for i in range(0, len(transcript_text), 2000)] embeddings = OpenAIEmbeddings() # or another provider db = Chroma.from_texts(chunks, embeddings, persist_directory=persist_dir) retriever = db.as_retriever() qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name="gpt-4", temperature=0), retriever=retriever) return qa