RevisionHelp / app.py
traleela's picture
Upload 2 files
473affa verified
import os
import fitz # PyMuPDF
import faiss
import numpy as np
import pickle
import torch
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import gradio as gr
# Load or create FAISS index and associated data
INDEX_FILE = "faiss_index.bin"
CHUNKS_FILE = "chunks.pkl"
model = SentenceTransformer("all-MiniLM-L6-v2")
llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", device=-1)
def load_pdf(file):
doc = fitz.open(file)
text = "\n".join(page.get_text() for page in doc)
return text
def split_text(text, chunk_size=500):
words = text.split()
return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
def create_or_load_index(chunks):
if os.path.exists(INDEX_FILE) and os.path.exists(CHUNKS_FILE):
with open(CHUNKS_FILE, "rb") as f:
chunks = pickle.load(f)
index = faiss.read_index(INDEX_FILE)
else:
embeddings = model.encode(chunks)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
faiss.write_index(index, INDEX_FILE)
with open(CHUNKS_FILE, "wb") as f:
pickle.dump(chunks, f)
return index, chunks
def retrieve_context(query, index, chunks, top_k=3):
query_emb = model.encode([query])
distances, indices = index.search(np.array(query_emb), top_k)
return "\n\n".join([chunks[i] for i in indices[0]])
def answer_question(query, index, chunks):
context = retrieve_context(query, index, chunks)
prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
response = llm(prompt, max_new_tokens=256, do_sample=False)
return response[0]["generated_text"].split("Answer:")[-1].strip()
def generate_quiz(index, chunks):
context = retrieve_context("generate quiz questions", index, chunks)
prompt = f"Based on the following context, generate 3 quiz questions with multiple choice answers:\n\n{context}\n\nQuestions:"
response = llm(prompt, max_new_tokens=512, do_sample=False)
return response[0]["generated_text"].split("Questions:")[-1].strip()
# Gradio UI
with gr.Blocks() as demo:
state = {"index": None, "chunks": []}
gr.Markdown("# ๐Ÿ“˜ AI Revision Assistant")
with gr.Row():
file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload your revision PDFs")
status_output = gr.Textbox(label="Status", interactive=False)
def process(files):
all_chunks = []
for file in files:
text = load_pdf(file.name)
chunks = split_text(text)
all_chunks.extend(chunks)
index, chunks = create_or_load_index(all_chunks)
state["index"] = index
state["chunks"] = chunks
return f"Processed {len(files)} files. You can now ask questions or generate quizzes."
file_input.change(fn=process, inputs=file_input, outputs=status_output)
question_input = gr.Textbox(label="Ask a revision question")
answer_output = gr.Textbox(label="Answer", lines=5)
question_input.submit(fn=lambda q: answer_question(q, state["index"], state["chunks"]) if state["index"] else "Please upload files first.", inputs=question_input, outputs=answer_output)
quiz_btn = gr.Button("Quiz Me")
quiz_output = gr.Textbox(label="Generated Quiz Questions", lines=6)
quiz_btn.click(fn=lambda: generate_quiz(state["index"], state["chunks"]) if state["index"] else "Please upload files first.", outputs=quiz_output)
demo.launch(debug=True)