Spaces:

Dani786
/

student_rag

Sleeping

App Files Files Community

Dani786 commited on Jul 14, 2025

Commit

8107894

verified ·

1 Parent(s): 7a5a5e8

Create app.py

Browse files

Files changed (1) hide show

app.py +104 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import fitz
+import faiss
+import numpy as np
+import gradio as gr
+from groq import Groq
+from sentence_transformers import SentenceTransformer
+# === SET YOUR GROQ API KEY HERE ===
+os.environ["GROQ_API_KEY"] = "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"
+client = Groq(api_key=os.environ["GROQ_API_KEY"])
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# === PDF → Text ===
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with fitz.open(pdf_path) as doc:
+        for page in doc:
+            text += page.get_text()
+    return text
+# === Chunking ===
+def chunk_text(text, chunk_size=500):
+    sentences = text.split(". ")
+    chunks, current = [], ""
+    for sentence in sentences:
+        if len(current) + len(sentence) < chunk_size:
+            current += sentence + ". "
+        else:
+            chunks.append(current.strip())
+            current = sentence + ". "
+    if current:
+        chunks.append(current.strip())
+    return chunks
+# === Embedding + FAISS ===
+class VectorStore:
+    def __init__(self):
+        self.index = faiss.IndexFlatL2(384)
+        self.chunks = []
+    def add(self, embeddings, texts):
+        self.index.add(np.array(embeddings))
+        self.chunks.extend(texts)
+    def search(self, query, top_k=5):
+        query_vec = embedding_model.encode([query])
+        D, I = self.index.search(np.array(query_vec), top_k)
+        return [self.chunks[i] for i in I[0]]
+vs = VectorStore()
+system_prompt = "You are a study supervisor helping students understand their documents."
+# === Groq LLaMA 3 Inference ===
+def ask_llama3(system_prompt, user_prompt):
+    try:
+        result = client.chat.completions.create(
+            model="llama3-8b-8192",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ]
+        )
+        return result.choices[0].message.content
+    except Exception as e:
+        return f"❌ Groq API Error: {e}"
+# === Gradio Logic ===
+def upload_pdf(pdf_file):
+    try:
+        text = extract_text_from_pdf(pdf_file.name)
+        chunks = chunk_text(text)
+        embeddings = embedding_model.encode(chunks)
+        vs.add(embeddings, chunks)
+        return "✅ Document uploaded and processed!"
+    except Exception as e:
+        return f"❌ Error in PDF processing: {e}"
+def ask_question(question):
+    if not vs.chunks:
+        return "⚠️ Please upload a document first."
+    try:
+        docs = vs.search(question)
+        context = "\n".join(docs)
+        user_prompt = f"Use this context to answer the question:\n\n{context}\n\nQuestion: {question}"
+        return ask_llama3(system_prompt, user_prompt)
+    except Exception as e:
+        return f"❌ Error during question answering: {e}"
+# === Gradio UI ===
+with gr.Blocks() as demo:
+    gr.Markdown("## 📚 RAG PDF QA with LLaMA3 + Groq")
+    with gr.Row():
+        pdf_file = gr.File(label="Upload PDF")
+        upload_button = gr.Button("Process PDF")
+    with gr.Row():
+        user_question = gr.Textbox(label="Ask your question here")
+        submit_button = gr.Button("Ask")
+        answer_box = gr.Textbox(label="Answer", lines=5)
+    upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer_box)
+    submit_button.click(ask_question, inputs=user_question, outputs=answer_box)
+demo.launch()