Spaces:
Runtime error
Runtime error
| import os | |
| import fitz # PyMuPDF | |
| import faiss | |
| import numpy as np | |
| import pickle | |
| import torch | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import pipeline | |
| import gradio as gr | |
| # Load or create FAISS index and associated data | |
| INDEX_FILE = "faiss_index.bin" | |
| CHUNKS_FILE = "chunks.pkl" | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", device=-1) | |
| def load_pdf(file): | |
| doc = fitz.open(file) | |
| text = "\n".join(page.get_text() for page in doc) | |
| return text | |
| def split_text(text, chunk_size=500): | |
| words = text.split() | |
| return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] | |
| def create_or_load_index(chunks): | |
| if os.path.exists(INDEX_FILE) and os.path.exists(CHUNKS_FILE): | |
| with open(CHUNKS_FILE, "rb") as f: | |
| chunks = pickle.load(f) | |
| index = faiss.read_index(INDEX_FILE) | |
| else: | |
| embeddings = model.encode(chunks) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(np.array(embeddings)) | |
| faiss.write_index(index, INDEX_FILE) | |
| with open(CHUNKS_FILE, "wb") as f: | |
| pickle.dump(chunks, f) | |
| return index, chunks | |
| def retrieve_context(query, index, chunks, top_k=3): | |
| query_emb = model.encode([query]) | |
| distances, indices = index.search(np.array(query_emb), top_k) | |
| return "\n\n".join([chunks[i] for i in indices[0]]) | |
| def answer_question(query, index, chunks): | |
| context = retrieve_context(query, index, chunks) | |
| prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:" | |
| response = llm(prompt, max_new_tokens=256, do_sample=False) | |
| return response[0]["generated_text"].split("Answer:")[-1].strip() | |
| def generate_quiz(index, chunks): | |
| context = retrieve_context("generate quiz questions", index, chunks) | |
| prompt = f"Based on the following context, generate 3 quiz questions with multiple choice answers:\n\n{context}\n\nQuestions:" | |
| response = llm(prompt, max_new_tokens=512, do_sample=False) | |
| return response[0]["generated_text"].split("Questions:")[-1].strip() | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| state = {"index": None, "chunks": []} | |
| gr.Markdown("# ๐ AI Revision Assistant") | |
| with gr.Row(): | |
| file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload your revision PDFs") | |
| status_output = gr.Textbox(label="Status", interactive=False) | |
| def process(files): | |
| all_chunks = [] | |
| for file in files: | |
| text = load_pdf(file.name) | |
| chunks = split_text(text) | |
| all_chunks.extend(chunks) | |
| index, chunks = create_or_load_index(all_chunks) | |
| state["index"] = index | |
| state["chunks"] = chunks | |
| return f"Processed {len(files)} files. You can now ask questions or generate quizzes." | |
| file_input.change(fn=process, inputs=file_input, outputs=status_output) | |
| question_input = gr.Textbox(label="Ask a revision question") | |
| answer_output = gr.Textbox(label="Answer", lines=5) | |
| question_input.submit(fn=lambda q: answer_question(q, state["index"], state["chunks"]) if state["index"] else "Please upload files first.", inputs=question_input, outputs=answer_output) | |
| quiz_btn = gr.Button("Quiz Me") | |
| quiz_output = gr.Textbox(label="Generated Quiz Questions", lines=6) | |
| quiz_btn.click(fn=lambda: generate_quiz(state["index"], state["chunks"]) if state["index"] else "Please upload files first.", outputs=quiz_output) | |
| demo.launch(debug=True) | |