import os import fitz # PyMuPDF import faiss import numpy as np import pickle import torch from sentence_transformers import SentenceTransformer from transformers import pipeline import gradio as gr # Load or create FAISS index and associated data INDEX_FILE = "faiss_index.bin" CHUNKS_FILE = "chunks.pkl" model = SentenceTransformer("all-MiniLM-L6-v2") llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", device=-1) def load_pdf(file): doc = fitz.open(file) text = "\n".join(page.get_text() for page in doc) return text def split_text(text, chunk_size=500): words = text.split() return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] def create_or_load_index(chunks): if os.path.exists(INDEX_FILE) and os.path.exists(CHUNKS_FILE): with open(CHUNKS_FILE, "rb") as f: chunks = pickle.load(f) index = faiss.read_index(INDEX_FILE) else: embeddings = model.encode(chunks) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(np.array(embeddings)) faiss.write_index(index, INDEX_FILE) with open(CHUNKS_FILE, "wb") as f: pickle.dump(chunks, f) return index, chunks def retrieve_context(query, index, chunks, top_k=3): query_emb = model.encode([query]) distances, indices = index.search(np.array(query_emb), top_k) return "\n\n".join([chunks[i] for i in indices[0]]) def answer_question(query, index, chunks): context = retrieve_context(query, index, chunks) prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:" response = llm(prompt, max_new_tokens=256, do_sample=False) return response[0]["generated_text"].split("Answer:")[-1].strip() def generate_quiz(index, chunks): context = retrieve_context("generate quiz questions", index, chunks) prompt = f"Based on the following context, generate 3 quiz questions with multiple choice answers:\n\n{context}\n\nQuestions:" response = llm(prompt, max_new_tokens=512, do_sample=False) return response[0]["generated_text"].split("Questions:")[-1].strip() # Gradio UI with gr.Blocks() as demo: state = {"index": None, "chunks": []} gr.Markdown("# 📘 AI Revision Assistant") with gr.Row(): file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload your revision PDFs") status_output = gr.Textbox(label="Status", interactive=False) def process(files): all_chunks = [] for file in files: text = load_pdf(file.name) chunks = split_text(text) all_chunks.extend(chunks) index, chunks = create_or_load_index(all_chunks) state["index"] = index state["chunks"] = chunks return f"Processed {len(files)} files. You can now ask questions or generate quizzes." file_input.change(fn=process, inputs=file_input, outputs=status_output) question_input = gr.Textbox(label="Ask a revision question") answer_output = gr.Textbox(label="Answer", lines=5) question_input.submit(fn=lambda q: answer_question(q, state["index"], state["chunks"]) if state["index"] else "Please upload files first.", inputs=question_input, outputs=answer_output) quiz_btn = gr.Button("Quiz Me") quiz_output = gr.Textbox(label="Generated Quiz Questions", lines=6) quiz_btn.click(fn=lambda: generate_quiz(state["index"], state["chunks"]) if state["index"] else "Please upload files first.", outputs=quiz_output) demo.launch(debug=True)