import streamlit as st
from langchain.vectorstores import FAISS
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
import os
from dotenv import load_dotenv

load_dotenv()

# Create Groq LLaMA LLM
llm = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key="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",
    model_name="llama3-8b-8192",
    temperature=0.1
)

embedding_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
vectordb_file_path = "faiss_index"

def create_vector_db():
    loader = CSVLoader(file_path='codebasics_faqs.csv', source_column="prompt")
    data = loader.load()
    vectordb = FAISS.from_documents(documents=data, embedding=embedding_model)
    vectordb.save_local(vectordb_file_path)

def get_qa_chain():
    vectordb = FAISS.load_local(vectordb_file_path, embedding_model)
    retriever = vectordb.as_retriever(score_threshold=0.7)

    prompt_template = """Given the following context and a question, generate an answer based on this context only.
    In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
    If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

    CONTEXT: {context}

    QUESTION: {question}"""

    PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
        input_key="query",
        chain_type_kwargs={"prompt": PROMPT}
    )
    return chain

# Streamlit UI
st.title("📊 Ask Questions About Your CSV")
if not os.path.exists(f"{vectordb_file_path}/index.faiss"):
    with st.spinner("Creating vector DB..."):
        create_vector_db()

user_input = st.text_input("Enter your question:")
if user_input:
    qa_chain = get_qa_chain()
    result = qa_chain({"query": user_input})
    st.write("### Answer:")
    st.write(result["result"])

    with st.expander("Show Source Document(s)"):
        for doc in result["source_documents"]:
            st.markdown(f"**Source:** {doc.metadata}")
            st.text(doc.page_content)