# Patch cached_download import for compatibility with newer huggingface-hub
import sys
import types

try:
    from huggingface_hub import cached_download
except ImportError:
    import huggingface_hub
    huggingface_hub.cached_download = lambda *args, **kwargs: None


import os
import io
import requests
import pdfplumber
import numpy as np
import faiss
import gradio as gr
from sklearn.preprocessing import normalize
from sentence_transformers import SentenceTransformer

# =========================================================
# ✅ Global Variables
# =========================================================
DOCS = []
FAISS_INDEX = None
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

# =========================================================
# ✅ Embedding Model Setup
# =========================================================
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# =========================================================
# ✅ Helper Functions
# =========================================================
def extract_text_from_pdf(file_bytes):
    text = ""
    with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text() or ""
            text += page_text + "\n"
    return text.strip()

def chunk_text(text, chunk_size=700):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

def embed_texts(texts):
    embeddings = embedder.encode(texts)
    embeddings = normalize(embeddings)
    return np.array(embeddings).astype("float32")

def build_faiss_index(embeddings):
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    return index

def search_docs(query, k=4):
    global DOCS, FAISS_INDEX
    if not DOCS or FAISS_INDEX is None:
        return ["⚠️ Please upload and process a PDF first."]
    q_emb = embed_texts([query])
    D, I = FAISS_INDEX.search(q_emb, k)
    return [DOCS[i]["text"] for i in I[0]]

# =========================================================
# ✅ GROQ API Chat Function
# =========================================================
def call_groq_chat(system_prompt, user_prompt):
    if not GROQ_API_KEY:
        return "⚠️ Missing GROQ_API_KEY. Please set it in Hugging Face Space secrets."

    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
    body = {
        "model": "llama-3.1-8b-instant",
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        "temperature": 0.3
    }

    try:
        resp = requests.post(url, headers=headers, json=body, timeout=30)
        if resp.status_code == 401:
            return "❌ Unauthorized: Invalid or missing Groq API key."
        if resp.status_code == 404:
            return "❌ API endpoint or model not found."
        if resp.status_code == 429:
            return "⚠️ Too many requests. Please try again later."
        resp.raise_for_status()
        return resp.json()["choices"][0]["message"]["content"]
    except Exception as e:
        return f"❌ Error contacting Groq API: {str(e)}"

# =========================================================
# ✅ Process PDF
# =========================================================
def process_pdf(file_obj):
    global DOCS, FAISS_INDEX

    if file_obj is None:
        yield "⚠️ Please upload a PDF first."
        return

    try:
        yield "📥 Reading PDF..."

        raw = None
        if isinstance(file_obj, dict) and "data" in file_obj:
            raw = file_obj["data"]
        elif hasattr(file_obj, "read"):
            raw = file_obj.read()
        elif isinstance(file_obj, str) and os.path.exists(file_obj):
            with open(file_obj, "rb") as f:
                raw = f.read()

        if raw is None:
            yield f"❌ Unsupported file type: {type(file_obj)}"
            return

        yield "✏️ Extracting text..."
        text = extract_text_from_pdf(raw)
        if not text.strip():
            yield "⚠️ No extractable text found."
            return

        yield "📄 Splitting text into chunks..."
        chunks = chunk_text(text)

        yield "🧠 Creating embeddings..."
        DOCS = [{"text": c} for c in chunks]
        embs = embed_texts([d["text"] for d in DOCS])

        yield "📦 Building FAISS index..."
        FAISS_INDEX = build_faiss_index(embs)

        yield f"✅ Successfully processed {len(chunks)} chunks."
    except Exception as e:
        yield f"❌ Error processing PDF: {str(e)}"

# =========================================================
# ✅ Answer Question
# =========================================================
def answer_question(query, history):
    if not DOCS or FAISS_INDEX is None:
        return history + [["⚠️ Please upload and process a PDF first.", ""]]

    related = search_docs(query)
    context = "\n\n".join(related)

    system_prompt = "You are a helpful assistant answering based on the provided document."
    user_prompt = f"Document context:\n{context}\n\nUser question: {query}"

    answer = call_groq_chat(system_prompt, user_prompt)
    history.append([query, answer])
    return history

# =========================================================
# ✅ UI Design (Modern Look)
# =========================================================
with gr.Blocks(
    theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray"),
    css="""
    body {background: linear-gradient(135deg, #e3f2fd, #bbdefb);}
    .gradio-container {max-width: 900px !important; margin: auto;}
    .chatbox {height: 400px; overflow: auto; background: white; border-radius: 12px;
              box-shadow: 0 2px 10px rgba(0,0,0,0.1); padding: 10px;}
    .status-box {background: #f0f8ff; border-radius: 8px; padding: 10px; color: #333;}
    h1 {text-align:center; font-size: 2em; color: #0d47a1;}
    """
) as app:
    gr.Markdown("<h1>📘 AI PDF Q&A Assistant</h1><p style='text-align:center;'>Powered by Groq + FAISS + Gradio</p>")

    with gr.Row():
        pdf_file = gr.File(label="📂 Upload PDF", file_types=[".pdf"])
        process_btn = gr.Button("⚙️ Process PDF", variant="primary")

    status_box = gr.Textbox(label="📊 Status", elem_classes="status-box", interactive=False)
    process_btn.click(process_pdf, inputs=pdf_file, outputs=status_box)

    gr.Markdown("### 💬 Ask Questions About Your PDF")
    chatbot = gr.Chatbot(label="Chat", elem_classes="chatbox", bubble_full_width=False)
    query_box = gr.Textbox(label="Type your question here...")
    clear_btn = gr.Button("🧹 Clear Chat")

    query_box.submit(answer_question, [query_box, chatbot], chatbot)
    clear_btn.click(lambda: None, None, chatbot, queue=False)

# =========================================================
# ✅ Launch
# =========================================================
if __name__ == "__main__":
    app.launch()