Vedio-Summerizer / utils /qa_module.py
Omkar1872's picture
Upload 8 files
ba2e3fe verified
raw
history blame contribute delete
696 Bytes
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
def build_qa(transcript_text, persist_dir="chroma_db"):
# split transcript into chunks (simple split by chars)
chunks = [transcript_text[i:i+2000] for i in range(0, len(transcript_text), 2000)]
embeddings = OpenAIEmbeddings() # or another provider
db = Chroma.from_texts(chunks, embeddings, persist_directory=persist_dir)
retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name="gpt-4", temperature=0), retriever=retriever)
return qa