traleela commited on
Commit
b6f944c
·
verified ·
1 Parent(s): f744708

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import fitz # PyMuPDF
3
+ import os
4
+ import numpy as np
5
+ import faiss
6
+ from sentence_transformers import SentenceTransformer
7
+ import gradio as gr
8
+
9
+ def load_pdf(file_path):
10
+ doc = fitz.open(file_path)
11
+ return "\n".join(page.get_text() for page in doc)
12
+
13
+ def split_into_chunks(text, chunk_size=500):
14
+ words = text.split()
15
+ return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
16
+
17
+ def create_index(chunks):
18
+ model = SentenceTransformer('all-MiniLM-L6-v2')
19
+ embeddings = model.encode(chunks)
20
+ index = faiss.IndexFlatL2(embeddings.shape[1])
21
+ index.add(np.array(embeddings))
22
+ return index, embeddings, chunks, model
23
+
24
+ def ask_question(query, index, embeddings, chunks, model, top_k=3, context_window=1):
25
+ query_emb = model.encode([query])
26
+ distances, indices = index.search(np.array(query_emb), top_k)
27
+
28
+ context_chunks = set()
29
+ for i in indices[0]:
30
+ for j in range(max(0, i - context_window), min(len(chunks), i + context_window + 1)):
31
+ context_chunks.add(j)
32
+
33
+ sorted_indices = sorted(context_chunks)
34
+ return "\n\n".join([chunks[i] for i in sorted_indices])
35
+
36
+ def generate_simple_flashcards(chunks, max_flashcards=10):
37
+ flashcards = []
38
+ for chunk in chunks:
39
+ sentences = chunk.split('. ')
40
+ for s in sentences:
41
+ if " is " in s and len(s.split()) < 25:
42
+ parts = s.split(" is ", 1)
43
+ q = f"What is {parts[0].strip()}?"
44
+ a = parts[1].strip().rstrip('.')
45
+ flashcards.append((q, a))
46
+ if len(flashcards) >= max_flashcards:
47
+ return flashcards
48
+ return flashcards
49
+
50
+ state = {"index": None, "embeddings": None, "chunks": [], "model": None}
51
+
52
+ def process_pdfs(files):
53
+ all_chunks = []
54
+ for file in files:
55
+ text = load_pdf(file.name)
56
+ chunks = split_into_chunks(text)
57
+ all_chunks.extend(chunks)
58
+ if all_chunks:
59
+ index, emb, chks, model = create_index(all_chunks)
60
+ state.update({"index": index, "embeddings": emb, "chunks": chks, "model": model})
61
+ return f"Processed {len(files)} PDF(s)."
62
+ else:
63
+ return "No text extracted."
64
+
65
+ def ask(query):
66
+ return ask_question(query, state["index"], state["embeddings"], state["chunks"], state["model"])
67
+
68
+ def flashcards():
69
+ pairs = generate_simple_flashcards(state["chunks"])
70
+ return "\n\n".join([f"Q: {q}\nA: {a}" for q, a in pairs])
71
+
72
+ with gr.Blocks() as demo:
73
+ gr.Markdown("## 📘 AI Revision Assistant (Free Version) – Ask, Review, Quiz Yourself")
74
+ with gr.Row():
75
+ file_input = gr.File(label="Upload PDFs", file_count="multiple")
76
+ status = gr.Textbox(label="Status")
77
+ file_input.change(process_pdfs, inputs=file_input, outputs=status)
78
+
79
+ with gr.Tab("Ask a Question"):
80
+ qbox = gr.Textbox(label="Your Question")
81
+ abox = gr.Textbox(label="Answer")
82
+ qbox.submit(ask, inputs=qbox, outputs=abox)
83
+
84
+ with gr.Tab("Simple Flashcards"):
85
+ fbox = gr.Textbox(label="Auto-Generated Flashcards")
86
+ gr.Button("Generate").click(flashcards, outputs=fbox)
87
+
88
+ demo.launch()