Spaces:

Dani786
/

student_rag

Sleeping

App Files Files Community

Dani786 commited on Jul 14, 2025

Commit

722f7a0

verified ·

1 Parent(s): 6d05c60

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -23

app.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import os
-import fitz
 import faiss
 import numpy as np
 import gradio as gr
 from groq import Groq
 from sentence_transformers import SentenceTransformer
-# === SET YOUR GROQ API KEY HERE ===
-os.environ["GROQ_API_KEY"] = "sk-your_actual_key_here"
 client = Groq(api_key=os.environ["GROQ_API_KEY"])
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-# === PDF → Text ===
 def extract_text_from_pdf(pdf_path):
     text = ""
     with fitz.open(pdf_path) as doc:
@@ -19,7 +20,7 @@ def extract_text_from_pdf(pdf_path):
             text += page.get_text()
     return text
-# === Chunking ===
 def chunk_text(text, chunk_size=500):
     sentences = text.split(". ")
     chunks, current = [], ""
@@ -33,7 +34,7 @@ def chunk_text(text, chunk_size=500):
         chunks.append(current.strip())
     return chunks
-# === Embedding + FAISS ===
 class VectorStore:
     def __init__(self):
         self.index = faiss.IndexFlatL2(384)
@@ -44,14 +45,14 @@ class VectorStore:
         self.chunks.extend(texts)
     def search(self, query, top_k=5):
-        query_vec = embedding_model.encode([query])
-        D, I = self.index.search(np.array(query_vec), top_k)
         return [self.chunks[i] for i in I[0]]
 vs = VectorStore()
-system_prompt = "You are a study supervisor helping students understand their documents."
-# === Groq LLaMA 3 Inference ===
 def ask_llama3(system_prompt, user_prompt):
     try:
         result = client.chat.completions.create(
@@ -65,7 +66,7 @@ def ask_llama3(system_prompt, user_prompt):
     except Exception as e:
         return f"❌ Groq API Error: {e}"
-# === Gradio Logic ===
 def upload_pdf(pdf_file):
     try:
         text = extract_text_from_pdf(pdf_file.name)
@@ -74,31 +75,32 @@ def upload_pdf(pdf_file):
         vs.add(embeddings, chunks)
         return "✅ Document uploaded and processed!"
     except Exception as e:
-        return f"❌ Error in PDF processing: {e}"
 def ask_question(question):
     if not vs.chunks:
-        return "⚠️ Please upload a document first."
     try:
         docs = vs.search(question)
         context = "\n".join(docs)
-        user_prompt = f"Use this context to answer the question:\n\n{context}\n\nQuestion: {question}"
-        return ask_llama3(system_prompt, user_prompt)
     except Exception as e:
-        return f"❌ Error during question answering: {e}"
 # === Gradio UI ===
 with gr.Blocks() as demo:
-    gr.Markdown("## 📚 RAG PDF QA with LLaMA3 + Groq")
     with gr.Row():
-        pdf_file = gr.File(label="Upload PDF")
         upload_button = gr.Button("Process PDF")
     with gr.Row():
-        user_question = gr.Textbox(label="Ask your question here")
-        submit_button = gr.Button("Ask")
-        answer_box = gr.Textbox(label="Answer", lines=5)
-    upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer_box)
-    submit_button.click(ask_question, inputs=user_question, outputs=answer_box)
 demo.launch()

 import os
+import fitz  # PyMuPDF
 import faiss
 import numpy as np
 import gradio as gr
 from groq import Groq
 from sentence_transformers import SentenceTransformer
+# ✅ Load Groq API key from Hugging Face Secrets
 client = Groq(api_key=os.environ["GROQ_API_KEY"])
+# ✅ Sentence embedding model
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# === PDF → Text extraction ===
 def extract_text_from_pdf(pdf_path):
     text = ""
     with fitz.open(pdf_path) as doc:
             text += page.get_text()
     return text
+# === Chunking text ===
 def chunk_text(text, chunk_size=500):
     sentences = text.split(". ")
     chunks, current = [], ""
         chunks.append(current.strip())
     return chunks
+# === Vector store (FAISS) ===
 class VectorStore:
     def __init__(self):
         self.index = faiss.IndexFlatL2(384)
         self.chunks.extend(texts)
     def search(self, query, top_k=5):
+        vec = embedding_model.encode([query])
+        _, I = self.index.search(np.array(vec), top_k)
         return [self.chunks[i] for i in I[0]]
 vs = VectorStore()
+system_prompt = "You are a study supervisor helping students understand their uploaded documents."
+# === Ask LLaMA 3 using Groq ===
 def ask_llama3(system_prompt, user_prompt):
     try:
         result = client.chat.completions.create(
     except Exception as e:
         return f"❌ Groq API Error: {e}"
+# === PDF upload handler ===
 def upload_pdf(pdf_file):
     try:
         text = extract_text_from_pdf(pdf_file.name)
         vs.add(embeddings, chunks)
         return "✅ Document uploaded and processed!"
     except Exception as e:
+        return f"❌ PDF Processing Error: {e}"
+# === QA handler ===
 def ask_question(question):
     if not vs.chunks:
+        return "⚠️ Please upload and process a PDF document first."
     try:
         docs = vs.search(question)
         context = "\n".join(docs)
+        prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {question}"
+        return ask_llama3(system_prompt, prompt)
     except Exception as e:
+        return f"❌ Question Answering Error: {e}"
 # === Gradio UI ===
 with gr.Blocks() as demo:
+    gr.Markdown("## 📚 RAG PDF QA using LLaMA3 via Groq API")
     with gr.Row():
+        pdf_file = gr.File(label="Upload PDF Document")
         upload_button = gr.Button("Process PDF")
     with gr.Row():
+        question = gr.Textbox(label="Ask a question from the document")
+        ask_button = gr.Button("Ask")
+        answer = gr.Textbox(label="Answer", lines=6)
+    upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer)
+    ask_button.click(ask_question, inputs=question, outputs=answer)
 demo.launch()