import gradio as gr
import os
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq

# Load Groq API Key from environment variable
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    raise ValueError("Please set the GROQ_API_KEY as a secret/environment variable in the Hugging Face Space.")

groq_client = Groq(api_key=GROQ_API_KEY)

# Load embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Templates
SYSTEM_TEMPLATE = (
    "You are a helpful academic supervisor who helps a student understand their paper and answer questions based only on the document."
)
USER_TEMPLATE = (
    "Based on the provided document, answer the following question:\n\n{context}\n\nQuestion: {question}\n\nAnswer:"
)

# Globals
faiss_index = None
stored_chunks = []
dimension = 384  # embedding size

# PDF to text
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"
    return text if text.strip() else None

# Chunk text
def chunk_text(text, chunk_size=500):
    sentences = text.split(". ")
    chunks, current_chunk = [], ""
    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= chunk_size:
            current_chunk += sentence + ". "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

# Upload & index
def upload_and_index(pdf):
    global faiss_index, stored_chunks
    if pdf is None:
        return "❌ No PDF uploaded. Please upload a file."

    try:
        text = extract_text_from_pdf(pdf.name)

        if text is None:
            return "❌ Failed to extract text. PDF may be scanned or empty."

        chunks = chunk_text(text)
        if not chunks:
            return "❌ Failed to split text into chunks."

        embeddings = embedder.encode(chunks)

        faiss_index = faiss.IndexFlatL2(dimension)
        stored_chunks.clear()
        stored_chunks.extend(chunks)
        faiss_index.add(np.array(embeddings, dtype=np.float32))

        return "✅ Document indexed successfully! You can now ask questions."
    except Exception as e:
        return f"❌ Error during upload/indexing: {e}"

# Answer question
def answer_question(question):
    global faiss_index, stored_chunks
    if faiss_index is None or faiss_index.ntotal == 0:
        return "❌ Please upload & index a document first."

    try:
        q_embedding = embedder.encode([question])
        distances, indices = faiss_index.search(np.array(q_embedding, dtype=np.float32), k=3)

        context = "\n\n".join(
            [stored_chunks[idx] for idx in indices[0] if idx < len(stored_chunks)]
        )

        if not context:
            return "❌ Could not find relevant context."

        prompt = USER_TEMPLATE.format(context=context, question=question)

        chat_completion = groq_client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[
                {"role": "system", "content": SYSTEM_TEMPLATE},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2,
            max_tokens=512
        )

        return chat_completion.choices[0].message.content.strip()
    except Exception as e:
        return f"❌ Error while answering: {e}"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 📚 Academic Supervisor — Upload Paper & Ask Questions")
    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
            upload_btn = gr.Button("Upload & Index")
            upload_output = gr.Textbox(label="Status")
        with gr.Column():
            question_input = gr.Textbox(label="Your Question")
            ask_btn = gr.Button("Get Answer")
            answer_output = gr.Textbox(label="Answer")

    upload_btn.click(upload_and_index, inputs=pdf_input, outputs=upload_output)
    ask_btn.click(answer_question, inputs=question_input, outputs=answer_output)

demo.launch()