import os
import gradio as gr
from transformers import pipeline

# specific imports to fix "ModuleNotFoundError"
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline

# ------------------ LOAD EMBEDDINGS ------------------
# We use a standard efficient embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# ------------------ LOAD VECTOR STORE ------------------
# Check if vectorstore exists to avoid crashing
if not os.path.exists("vectorstore/faiss_index"):
    print("❌ ERROR: 'vectorstore/faiss_index' folder not found.")
    print("   Please run your ingest/indexing script first to create the database.")
    # Create a dummy empty DB just so the app doesn't crash immediately (optional)
    db = FAISS.from_texts(["Empty index"], embeddings)
else:
    db = FAISS.load_local(
        "vectorstore/faiss_index",
        embeddings,
        allow_dangerous_deserialization=True
    )

# ------------------ LOAD LLM ------------------
# Using phi-2. 
# WARNING: If the Space crashes with "OOM" (Out of Memory), change this to "google/flan-t5-small"
print("Loading Model...")
text_gen_pipeline = pipeline(
    "text-generation",
    model="microsoft/phi-2",
    max_new_tokens=256,   # Reduced slightly to save memory
    temperature=0.2,
    do_sample=True,
    truncation=True
)

llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

# ------------------ RAG CHAIN ------------------
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    chain_type="stuff",
)

# ------------------ CHAT FUNCTION ------------------
def chat(user_message, history):
    if not user_message.strip():
        return history

    try:
        # 'invoke' is the new standard, but 'run' is kept for compatibility with your code
        answer = qa_chain.run(user_message)
    except Exception as e:
        answer = f"Error generating answer: {str(e)}"

    history.append((user_message, answer))
    return history

# ------------------ GRADIO UI ------------------
with gr.Blocks(title="Document RAG Chatbot") as demo:
    gr.Markdown(
        """
        # 📚 Document RAG Chatbot  
        Answers are generated **strictly from the provided documents** using Retrieval-Augmented Generation.
        """
    )

    chatbot = gr.Chatbot(height=400)
    query = gr.Textbox(
        label="Ask a question",
        placeholder="Ask something from the documents..."
    )
    
    with gr.Row():
        submit_btn = gr.Button("Submit", variant="primary")
        clear_btn = gr.Button("Clear Chat")

    # Wire up the buttons
    query.submit(chat, [query, chatbot], chatbot)
    submit_btn.click(chat, [query, chatbot], chatbot)
    clear_btn.click(lambda: [], None, chatbot)

if __name__ == "__main__":
    demo.launch()