student_rag / app.py
Dani786's picture
Update app.py
722f7a0 verified
import os
import fitz # PyMuPDF
import faiss
import numpy as np
import gradio as gr
from groq import Groq
from sentence_transformers import SentenceTransformer
# βœ… Load Groq API key from Hugging Face Secrets
client = Groq(api_key=os.environ["GROQ_API_KEY"])
# βœ… Sentence embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# === PDF β†’ Text extraction ===
def extract_text_from_pdf(pdf_path):
text = ""
with fitz.open(pdf_path) as doc:
for page in doc:
text += page.get_text()
return text
# === Chunking text ===
def chunk_text(text, chunk_size=500):
sentences = text.split(". ")
chunks, current = [], ""
for sentence in sentences:
if len(current) + len(sentence) < chunk_size:
current += sentence + ". "
else:
chunks.append(current.strip())
current = sentence + ". "
if current:
chunks.append(current.strip())
return chunks
# === Vector store (FAISS) ===
class VectorStore:
def __init__(self):
self.index = faiss.IndexFlatL2(384)
self.chunks = []
def add(self, embeddings, texts):
self.index.add(np.array(embeddings))
self.chunks.extend(texts)
def search(self, query, top_k=5):
vec = embedding_model.encode([query])
_, I = self.index.search(np.array(vec), top_k)
return [self.chunks[i] for i in I[0]]
vs = VectorStore()
system_prompt = "You are a study supervisor helping students understand their uploaded documents."
# === Ask LLaMA 3 using Groq ===
def ask_llama3(system_prompt, user_prompt):
try:
result = client.chat.completions.create(
model="llama3-8b-8192",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
)
return result.choices[0].message.content
except Exception as e:
return f"❌ Groq API Error: {e}"
# === PDF upload handler ===
def upload_pdf(pdf_file):
try:
text = extract_text_from_pdf(pdf_file.name)
chunks = chunk_text(text)
embeddings = embedding_model.encode(chunks)
vs.add(embeddings, chunks)
return "βœ… Document uploaded and processed!"
except Exception as e:
return f"❌ PDF Processing Error: {e}"
# === QA handler ===
def ask_question(question):
if not vs.chunks:
return "⚠️ Please upload and process a PDF document first."
try:
docs = vs.search(question)
context = "\n".join(docs)
prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {question}"
return ask_llama3(system_prompt, prompt)
except Exception as e:
return f"❌ Question Answering Error: {e}"
# === Gradio UI ===
with gr.Blocks() as demo:
gr.Markdown("## πŸ“š RAG PDF QA using LLaMA3 via Groq API")
with gr.Row():
pdf_file = gr.File(label="Upload PDF Document")
upload_button = gr.Button("Process PDF")
with gr.Row():
question = gr.Textbox(label="Ask a question from the document")
ask_button = gr.Button("Ask")
answer = gr.Textbox(label="Answer", lines=6)
upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer)
ask_button.click(ask_question, inputs=question, outputs=answer)
demo.launch()