import os
import fitz  # PyMuPDF
import faiss
import numpy as np
import pickle
import torch
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import gradio as gr

# Load or create FAISS index and associated data
INDEX_FILE = "faiss_index.bin"
CHUNKS_FILE = "chunks.pkl"

model = SentenceTransformer("all-MiniLM-L6-v2")
llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", device=-1)

def load_pdf(file):
    doc = fitz.open(file)
    text = "\n".join(page.get_text() for page in doc)
    return text

def split_text(text, chunk_size=500):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

def create_or_load_index(chunks):
    if os.path.exists(INDEX_FILE) and os.path.exists(CHUNKS_FILE):
        with open(CHUNKS_FILE, "rb") as f:
            chunks = pickle.load(f)
        index = faiss.read_index(INDEX_FILE)
    else:
        embeddings = model.encode(chunks)
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(np.array(embeddings))
        faiss.write_index(index, INDEX_FILE)
        with open(CHUNKS_FILE, "wb") as f:
            pickle.dump(chunks, f)
    return index, chunks

def retrieve_context(query, index, chunks, top_k=3):
    query_emb = model.encode([query])
    distances, indices = index.search(np.array(query_emb), top_k)
    return "\n\n".join([chunks[i] for i in indices[0]])

def answer_question(query, index, chunks):
    context = retrieve_context(query, index, chunks)
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    response = llm(prompt, max_new_tokens=256, do_sample=False)
    return response[0]["generated_text"].split("Answer:")[-1].strip()

def generate_quiz(index, chunks):
    context = retrieve_context("generate quiz questions", index, chunks)
    prompt = f"Based on the following context, generate 3 quiz questions with multiple choice answers:\n\n{context}\n\nQuestions:"
    response = llm(prompt, max_new_tokens=512, do_sample=False)
    return response[0]["generated_text"].split("Questions:")[-1].strip()

# Gradio UI
with gr.Blocks() as demo:
    state = {"index": None, "chunks": []}

    gr.Markdown("# 📘 AI Revision Assistant")

    with gr.Row():
        file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload your revision PDFs")
        status_output = gr.Textbox(label="Status", interactive=False)

    def process(files):
        all_chunks = []
        for file in files:
            text = load_pdf(file.name)
            chunks = split_text(text)
            all_chunks.extend(chunks)
        index, chunks = create_or_load_index(all_chunks)
        state["index"] = index
        state["chunks"] = chunks
        return f"Processed {len(files)} files. You can now ask questions or generate quizzes."

    file_input.change(fn=process, inputs=file_input, outputs=status_output)

    question_input = gr.Textbox(label="Ask a revision question")
    answer_output = gr.Textbox(label="Answer", lines=5)

    question_input.submit(fn=lambda q: answer_question(q, state["index"], state["chunks"]) if state["index"] else "Please upload files first.", inputs=question_input, outputs=answer_output)

    quiz_btn = gr.Button("Quiz Me")
    quiz_output = gr.Textbox(label="Generated Quiz Questions", lines=6)

    quiz_btn.click(fn=lambda: generate_quiz(state["index"], state["chunks"]) if state["index"] else "Please upload files first.", outputs=quiz_output)

demo.launch(debug=True)