Exam_Helper_APP / app.py
JaveriaZia's picture
Create app.py
ef66c93 verified
import gradio as gr
import fitz # PyMuPDF
import os
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import uuid
from groq import Groq
# Load embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")
# Initialize vector store and document store
document_chunks = []
doc_embeddings = []
doc_ids = []
index = None
# Get Groq API key from environment variable
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)
# Load and split PDF
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text()
return text
# Chunking logic
def chunk_text(text, max_tokens=500):
import re
sentences = re.split(r'(?<=[.!?]) +', text)
chunks = []
chunk = ""
tokens = 0
for sentence in sentences:
sentence_tokens = len(sentence.split())
if tokens + sentence_tokens > max_tokens:
chunks.append(chunk.strip())
chunk = sentence
tokens = sentence_tokens
else:
chunk += " " + sentence
tokens += sentence_tokens
if chunk:
chunks.append(chunk.strip())
return chunks
# Indexing
def index_pdf(pdf_file):
global document_chunks, doc_embeddings, doc_ids, index
if not pdf_file:
return "❌ Please upload a PDF file."
text = extract_text_from_pdf(pdf_file.name)
document_chunks = chunk_text(text)
doc_embeddings = embedder.encode(document_chunks)
doc_embeddings = np.array(doc_embeddings).astype("float32")
dimension = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(doc_embeddings)
doc_ids = [str(uuid.uuid4()) for _ in range(len(document_chunks))]
return "βœ… PDF indexed successfully. You can now ask questions."
# Retrieve top chunks
def retrieve_relevant_chunks(query, k=3):
query_embedding = embedder.encode([query]).astype("float32")
distances, indices = index.search(query_embedding, k)
return [document_chunks[i] for i in indices[0]]
# Generate answer using Groq
def generate_answer(user_query):
if index is None:
return "❌ Please upload and index a PDF first."
top_chunks = retrieve_relevant_chunks(user_query, k=3)
context = "\n\n".join(top_chunks)
messages = [
{"role": "system", "content": "You are a helpful academic assistant who answers questions based on uploaded PDF papers."},
{"role": "user", "content": f"Context: {context}\n\nQuestion: {user_query}"}
]
try:
response = client.chat.completions.create(
messages=messages,
model="llama3-8b-8192",
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"❌ Error generating response: {e}"
# Gradio UI
with gr.Blocks(title="πŸ“˜ PDF Question Assistant") as demo:
gr.Markdown("# πŸ“˜ Ask Questions About Your PDF")
with gr.Tab("πŸ“„ Upload & Index"):
with gr.Row():
pdf_input = gr.File(label="Upload PDF File", type="filepath", file_types=[".pdf"])
upload_btn = gr.Button("πŸ” Index PDF", variant="primary")
upload_status = gr.Textbox(label="", interactive=False, placeholder="Status will appear here...")
with gr.Tab("❓ Ask a Question"):
with gr.Row():
query = gr.Textbox(label="Ask something from the PDF", placeholder="E.g. What is the main argument of the paper?")
query_btn = gr.Button("🧠 Get Answer")
answer = gr.Textbox(label="Answer", placeholder="AI-generated answer will appear here...", lines=8)
upload_btn.click(fn=index_pdf, inputs=[pdf_input], outputs=[upload_status])
query_btn.click(fn=generate_answer, inputs=[query], outputs=[answer])
if __name__ == "__main__":
demo.launch()