Spaces:

Muqadas-13
/

Smart_RAG_Document_QA_Assistant

Build error

App Files Files Community

Muqadas-13 commited on Jul 16, 2025

Commit

2153a88

verified ·

1 Parent(s): 0544644

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -100

app.py CHANGED Viewed

@@ -1,104 +1,97 @@
-import os
-import streamlit as st
-from PyPDF2 import PdfReader
-from docx import Document
 import faiss
 import numpy as np
-import torch
 from groq import Groq
-from sentence_transformers import SentenceTransformer
-# ✅ Force CPU to avoid meta tensor issues
-torch.set_default_device("cpu")
-# ✅ Load Groq API key from environment
-client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-# ✅ Load sentence transformer model safely
-embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", trust_remote_code=True)
-# ✅ Initialize FAISS index and chunk storage
-INDEX = faiss.IndexFlatL2(384)
-stored_chunks = []
-# ✅ Streamlit UI Styling
-st.markdown("""
-    <style>
-    .main-title {
-        font-size: 40px;
-        color: #2E86C1;
-        font-weight: bold;
-        text-align: center;
-        margin-bottom: 30px;
-    }
-    .card {
-        background-color: #ffffff;
-        padding: 20px;
-        border-radius: 15px;
-        box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
-        margin-top: 20px;
-    }
-    body {
-        background-color: #f8fbfd;
-    }
-    </style>
-""", unsafe_allow_html=True)
-st.markdown('<div class="main-title">📄 Smart RAG Document QA Assistant</div>', unsafe_allow_html=True)
-# ✅ Extract text from uploaded files
-def extract_text(file):
-    if file.type == "application/pdf":
-        reader = PdfReader(file)
-        return " ".join([page.extract_text() or "" for page in reader.pages])
-    elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-        doc = Document(file)
-        return "\n".join([p.text for p in doc.paragraphs])
-    elif file.type.startswith("text"):
-        return file.read().decode("utf-8")
-    return ""
-# ✅ Chunk long text into smaller pieces
-def chunk_text(text, chunk_size=200):
-    words = text.split()
-    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
-# ✅ Store embeddings in FAISS
-def store_embeddings(chunks):
-    vectors = embed_model.encode(chunks)
-    INDEX.add(np.array(vectors, dtype=np.float32))
-    stored_chunks.extend(chunks)
-# ✅ Retrieve most relevant chunks for a query
-def retrieve_similar_chunks(query, top_k=3):
-    query_vector = embed_model.encode([query])
-    distances, indices = INDEX.search(np.array(query_vector, dtype=np.float32), top_k)
-    return [stored_chunks[i] for i in indices[0]]
-# ✅ Use Groq to answer based on context
-def get_llm_answer(query, context):
-    prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
-    chat_completion = client.chat.completions.create(
-        messages=[{"role": "user", "content": prompt}],
-        model="llama3-70b-8192"
-    )
-    return chat_completion.choices[0].message.content
-# ✅ Streamlit App Interface
-uploaded_file = st.file_uploader("📁 Upload your document", type=["pdf", "docx", "txt"])
-query = st.text_input("💬 Ask a question about your document")
-if uploaded_file:
-    with st.spinner("Processing file..."):
-        text = extract_text(uploaded_file)
-        chunks = chunk_text(text)
-        store_embeddings(chunks)
-    st.success("✅ Document uploaded and indexed!")
-if st.button("🧠 Get Answer") and query:
-    with st.spinner("Thinking..."):
-        context = "\n\n".join(retrieve_similar_chunks(query))
-        answer = get_llm_answer(query, context)
-        st.markdown(f'<div class="card"><b>Answer:</b><br>{answer}</div>', unsafe_allow_html=True)
-st.markdown("<br><center style='color: grey;'>Built by Muqadas with ❤️ using Streamlit + Groq + FAISS</center>", unsafe_allow_html=True)

+!pip install -q gradio sentence-transformers faiss-cpu pdfplumber groq
+import gradio as gr
+import pdfplumber
+from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 from groq import Groq
+# Global vars
+model = SentenceTransformer("all-MiniLM-L6-v2")
+documents, embeddings, index, text_chunks, client = [], None, None, [], None
+def ask_llama3(system_prompt, user_prompt):
+    global client
+    try:
+        chat_completion = client.chat.completions.create(
+            model="llama-3.1-8b-instant",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ]
+        )
+        return chat_completion.choices[0].message.content
+    except Exception as e:
+        return f"❌ LLaMA3 error: {e}"
+def pdf_to_chunks(pdf_file, user_key):
+    global text_chunks, embeddings, index, client
+    try:
+        client = Groq(api_key=user_key)
+    except Exception as e:
+        return f"❌ API key error: {e}"
+    text_chunks = []
+    try:
+        with pdfplumber.open(pdf_file.name) as pdf:
+            for page in pdf.pages:
+                text = page.extract_text()
+                if text:
+                    sentences = text.split(". ")
+                    text_chunks.extend(sentences)
+        if not text_chunks:
+            return "❗ No text found in PDF."
+        embeddings = model.encode(text_chunks, convert_to_tensor=False)
+        embeddings = np.array(embeddings).astype("float32")
+        dimension = embeddings.shape[1]
+        index = faiss.IndexFlatL2(dimension)
+        index.add(embeddings)
+        return "✅ PDF processed and indexed successfully."
+    except Exception as e:
+        return f"❌ PDF processing error: {e}"
+def query_document(question):
+    global index, text_chunks, model
+    if index is None or not text_chunks:
+        return "❗ Please upload and process a PDF first."
+    try:
+        q_embedding = model.encode([question])[0].astype("float32")
+        D, I = index.search(np.array([q_embedding]), 5)
+        retrieved_chunks = [text_chunks[i] for i in I[0]]
+        context = "\n".join(retrieved_chunks)
+        system_prompt = "You are a helpful study supervisor. Use the provided context to answer clearly."
+        user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}"
+        return ask_llama3(system_prompt, user_prompt)
+    except Exception as e:
+        return f"❌ Query error: {e}"
+# UI
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    <div style="text-align:center; background:#f97316; color:white; padding: 12px; border-radius: 10px;">
+        <h2>📘 PDF Study Assistant</h2>
+        <p>Ask questions from your uploaded PDF using Groq + LLaMA3</p>
+    </div>
+    """)
+    with gr.Column():
+        api_input = gr.Textbox(label="🔑 Groq API Key", type="password")
+        pdf_input = gr.File(label="📄 Upload PDF", file_types=[".pdf"])
+        upload_btn = gr.Button("📥 Extract & Index PDF", variant="primary")
+        status_output = gr.Textbox(label="🛠️ Status", interactive=False)
+        question = gr.Textbox(label="❓ Ask a Question", lines=2)
+        get_answer_btn = gr.Button("💬 Get Answer")
+        answer_output = gr.Textbox(label="📢 Answer", lines=10, interactive=False)
+    upload_btn.click(fn=pdf_to_chunks, inputs=[pdf_input, api_input], outputs=[status_output])
+    get_answer_btn.click(fn=query_document, inputs=[question], outputs=[answer_output])
+demo.launch()