import os
import requests
import gradio as gr
from pypdf import PdfReader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document

# ── Config ────────────────────────────────────────────────────
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL   = "google/flan-t5-large"   # free, no special permissions needed
CHROMA_DIR  = "/tmp/chroma_db"
HF_TOKEN    = os.getenv("HF_TOKEN", "")
HF_API_URL  = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"

# ── Embeddings ────────────────────────────────────────────────
print("Loading embedding model...")
embeddings = HuggingFaceEmbeddings(
    model_name=EMBED_MODEL,
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True},
)
print("Embeddings ready ✓")

# ── State ─────────────────────────────────────────────────────
vectorstore = None
current_doc = None

# ── LLM call via HF Inference API (classic, no router) ────────
def call_llm(prompt: str) -> str:
    headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 256,
            "temperature": 0.3,
            "do_sample": False,
        }
    }
    resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30)
    resp.raise_for_status()
    result = resp.json()
    if isinstance(result, list):
        return result[0].get("generated_text", "").strip()
    return str(result)

# ── PDF processing ────────────────────────────────────────────
def process_pdf(pdf_file):
    global vectorstore, current_doc
    if pdf_file is None:
        return "Please upload a PDF file."
    try:
        reader = PdfReader(pdf_file.name)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
        if not text.strip():
            return "Could not extract text from this PDF."
        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        chunks   = splitter.split_text(text)
        docs     = [Document(page_content=c) for c in chunks]
        vectorstore = Chroma.from_documents(
            documents=docs,
            embedding=embeddings,
            persist_directory=CHROMA_DIR,
        )
        current_doc = os.path.basename(pdf_file.name)
        return f"✅ Processed **{current_doc}** — {len(reader.pages)} pages, {len(chunks)} chunks indexed. Ready!"
    except Exception as e:
        return f"❌ Error: {str(e)}"

# ── RAG query ─────────────────────────────────────────────────
def answer_question(question, history):
    global vectorstore
    if not question.strip():
        return history, ""
    if vectorstore is None:
        history = history + [
            {"role": "user",      "content": question},
            {"role": "assistant", "content": "⚠️ Please upload a PDF first."},
        ]
        return history, ""
    try:
        retriever     = vectorstore.as_retriever(search_kwargs={"k": 3})
        relevant_docs = retriever.invoke(question)
        context       = "\n\n".join([d.page_content for d in relevant_docs])

        prompt = f"""Answer the question based on the context below.
If the answer is not in the context, say "I couldn't find that in the document."

Context: {context}

Question: {question}
Answer:"""

        answer = call_llm(prompt)
        # flan-t5 returns only the answer, strip the prompt if echoed
        if "Answer:" in answer:
            answer = answer.split("Answer:")[-1].strip()

        history = history + [
            {"role": "user",      "content": question},
            {"role": "assistant", "content": answer},
        ]
        return history, ""
    except Exception as e:
        history = history + [
            {"role": "user",      "content": question},
            {"role": "assistant", "content": f"❌ Error: {str(e)}"},
        ]
        return history, ""

def clear_chat():
    return [], ""

def clear_db():
    global vectorstore, current_doc
    vectorstore = None
    current_doc = None
    return "🗑️ Document cleared.", [], ""

# ── UI ────────────────────────────────────────────────────────
with gr.Blocks(title="Document Q&A · RAG") as demo:
    gr.Markdown("""
    # 📄 Document Q&A — RAG Pipeline
    Upload a PDF and ask questions. Powered by **LangChain + ChromaDB + Flan-T5**.
    """)

    with gr.Row():
        with gr.Column(scale=1):
            pdf_input     = gr.File(label="Upload PDF", file_types=[".pdf"])
            upload_btn    = gr.Button("→ Process PDF", variant="primary")
            upload_status = gr.Markdown("Upload a PDF to get started.")
            clear_btn     = gr.Button("🗑️ Clear document", variant="secondary")
            gr.Markdown("""
            ### How it works
            1. Upload any PDF document
            2. Text is chunked and embedded into ChromaDB
            3. Your question retrieves the most relevant chunks
            4. Flan-T5 generates an answer from those chunks

            ### Tips
            - Ask specific questions about the document
            - Works best with text-based PDFs
            - Try: *"What is the main topic?"*
            """)

        with gr.Column(scale=2):
            chatbot  = gr.Chatbot(label="Chat", height=450)
            question = gr.Textbox(
                label="Ask a question about the document",
                placeholder="e.g. What are the main conclusions?",
                lines=2,
            )
            with gr.Row():
                ask_btn        = gr.Button("→ Ask", variant="primary")
                clear_chat_btn = gr.Button("Clear chat")

    upload_btn.click(fn=process_pdf, inputs=pdf_input, outputs=upload_status)
    ask_btn.click(fn=answer_question, inputs=[question, chatbot], outputs=[chatbot, question])
    question.submit(fn=answer_question, inputs=[question, chatbot], outputs=[chatbot, question])
    clear_chat_btn.click(fn=clear_chat, outputs=[chatbot, question])
    clear_btn.click(fn=clear_db, outputs=[upload_status, chatbot, question])

    gr.Markdown("---\nPart of the [AI Engineer Portfolio](https://github.com/amarshiv86)")

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)