import os
import gradio as gr

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Imported Chroma from the dedicated langchain_chroma package
from langchain_chroma import Chroma

from langchain_huggingface import (
    HuggingFaceEmbeddings,
    HuggingFaceEndpoint
)

# =====================================================
# HUGGING FACE TOKEN
# =====================================================

HF_TOKEN = os.getenv("HF_TOKEN")

# =====================================================
# EMBEDDING MODEL
# =====================================================

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)
# =====================================================
# LLM MODEL
# =====================================================
llm = HuggingFaceEndpoint(
    repo_id="NousResearch/Llama-2-7b-chat-hf", 
    task="text-generation",
    huggingfacehub_api_token=HF_TOKEN,
    temperature=0.5,
    max_new_tokens=512
)

# =====================================================
# VECTOR DATABASE
# =====================================================
db = None

# =====================================================
# PDF PROCESSING FUNCTION
# =====================================================
def process_pdf(pdf_file):
    global db

    if pdf_file is None:
        return "Please upload a PDF file."

    try:
        # Load PDF
        loader = PyPDFLoader(pdf_file.name)
        documents = loader.load()

        # Split into chunks
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        docs = splitter.split_documents(documents)

        # Create Chroma vector store in-memory
        db = Chroma.from_documents(
            documents=docs,
            embedding=embedding_model
        )

        return "PDF processed and indexed in ChromaDB successfully!"
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

# =====================================================
# QUESTION ANSWERING FUNCTION
# =====================================================
def ask_question(question):
    global db

    if db is None:
        return "Please upload and process a PDF first."

    if question.strip() == "":
        return "Please enter a question."

    try:
        # Retrieve relevant chunks from Chroma
        docs = db.similarity_search(
            question,
            k=3
        )

        # Combine retrieved text
        context = "\n\n".join(
            [doc.page_content for doc in docs]
        )

        # Prompt
        prompt = f"""You are a helpful PDF question answering assistant.

                    Answer the question ONLY from the provided context.
                    
                    If the answer is not in the context, say:
                    "I could not find the answer in the PDF."
                    
                    Context:
                    {context}
                    
                    Question:
                    {question}
                    
                    Answer:"""


        response = llm.invoke(prompt)
        return response
        
    except Exception as e:
        return f"Error generating answer: {str(e)}"

# =====================================================
# GRADIO UI
# =====================================================

with gr.Blocks() as demo:

    gr.Markdown("# PDF Question Answering Bot (Powered by ChromaDB)")

    pdf_input = gr.File(
        label="Upload PDF",
        file_types=[".pdf"]
    )

    process_btn = gr.Button("Process PDF")

    process_output = gr.Textbox(
        label="PDF Status"
    )

    process_btn.click(
        fn=process_pdf,
        inputs=pdf_input,
        outputs=process_output
    )

    question_input = gr.Textbox(
        label="Ask a Question"
    )

    ask_btn = gr.Button("Get Answer")

    answer_output = gr.Textbox(
        label="Answer",
        lines=10
    )

    ask_btn.click(
        fn=ask_question,
        inputs=question_input,
        outputs=answer_output
    )

# =====================================================
# LAUNCH APP
# =====================================================
if __name__ == "__main__":
    demo.launch()