import streamlit as st from langchain.vectorstores import FAISS from langchain.document_loaders.csv_loader import CSVLoader from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.prompts import PromptTemplate from langchain.chains import RetrievalQA from langchain.llms import OpenAI import os from dotenv import load_dotenv load_dotenv() # Create Groq LLaMA LLM llm = OpenAI( base_url="https://api.groq.com/openai/v1", api_key="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", model_name="llama3-8b-8192", temperature=0.1 ) embedding_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large") vectordb_file_path = "faiss_index" def create_vector_db(): loader = CSVLoader(file_path='codebasics_faqs.csv', source_column="prompt") data = loader.load() vectordb = FAISS.from_documents(documents=data, embedding=embedding_model) vectordb.save_local(vectordb_file_path) def get_qa_chain(): vectordb = FAISS.load_local(vectordb_file_path, embedding_model) retriever = vectordb.as_retriever(score_threshold=0.7) prompt_template = """Given the following context and a question, generate an answer based on this context only. In the answer try to provide as much text as possible from "response" section in the source document context without making much changes. If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer. CONTEXT: {context} QUESTION: {question}""" PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, input_key="query", chain_type_kwargs={"prompt": PROMPT} ) return chain # Streamlit UI st.title("📊 Ask Questions About Your CSV") if not os.path.exists(f"{vectordb_file_path}/index.faiss"): with st.spinner("Creating vector DB..."): create_vector_db() user_input = st.text_input("Enter your question:") if user_input: qa_chain = get_qa_chain() result = qa_chain({"query": user_input}) st.write("### Answer:") st.write(result["result"]) with st.expander("Show Source Document(s)"): for doc in result["source_documents"]: st.markdown(f"**Source:** {doc.metadata}") st.text(doc.page_content)