Spaces:

Zohaib366
/

Enhanced_RAG_Chatbot

Sleeping

App Files Files Community

Zohaib366 commited on May 24, 2025

Commit

215d2e6

verified ·

1 Parent(s): 8f2a20e

Upload 2 files

Browse files

Files changed (2) hide show

app.py +114 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import gradio as gr
+import fitz  # PyMuPDF
+import os
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import faiss
+from groq import Groq
+# Initialize Groq client
+groq_client = Groq(api_key="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")
+model = "llama3-8b-8192"
+embedder = SentenceTransformer('all-MiniLM-L6-v2')
+# Global state
+state = {
+    "document_chunks": [],
+    "metadata": [],
+    "index": None,
+    "embeddings": None
+}
+# Extract text from PDF using file path
+def extract_text_from_pdf(file_path):
+    doc = fitz.open(file_path)
+    texts = []
+    for i, page in enumerate(doc):
+        text = page.get_text().strip()
+        if text:
+            texts.append({"text": text, "page": i + 1})
+    return texts
+# Process PDFs
+def process_pdfs(files):
+    state["document_chunks"] = []
+    state["metadata"] = []
+    for file in files:
+        file_name = os.path.basename(file.name)
+        chunks = extract_text_from_pdf(file.name)
+        for chunk in chunks:
+            state["document_chunks"].append(chunk['text'])
+            state["metadata"].append({"file": file_name, "page": chunk['page']})
+    embeddings = embedder.encode(state["document_chunks"], show_progress_bar=True)
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(np.array(embeddings))
+    state["index"] = index
+    state["embeddings"] = embeddings
+    return "✅ Book(s) loaded successfully!"
+# Retrieve top chunks
+def retrieve_chunks(question, top_k=3):
+    if not state["index"]:
+        return []
+    q_embedding = embedder.encode([question])
+    D, I = state["index"].search(q_embedding, top_k)
+    return [(state["document_chunks"][i], state["metadata"][i]) for i in I[0]]
+# Generate answer with source references
+def generate_answer(context, question):
+    context_text = "\n\n".join(
+        f"{chunk}\n\n[Source: {meta['file']}, Page: {meta['page']}]"
+        for chunk, meta in context
+    )
+    prompt = f"""You are a helpful assistant. Use the context below to answer the question.
+Include the source references (file name and page number) in your answer.
+Context:
+{context_text}
+Question:
+{question}
+Answer (with sources):"""
+    response = groq_client.chat.completions.create(
+        model=model,
+        messages=[{"role": "user", "content": prompt}],
+        temperature=0.2
+    )
+    return response.choices[0].message.content
+# Chat function for ChatInterface
+def chatbot_interface_fn(message, history):
+    if not state["document_chunks"]:
+        return "⚠️ Please upload PDF files first."
+    context = retrieve_chunks(message)
+    return generate_answer(context, message)
+# Gradio UI
+with gr.Blocks(title="RAG Chatbot") as demo:
+    gr.Markdown("# 📚 Enhanced RAG Chatbot\nUpload books and chat naturally!")
+    with gr.Row():
+        pdf_input = gr.File(file_types=[".pdf"], file_count="multiple", label="📂 Upload PDFs")
+        upload_btn = gr.Button("Upload & Process PDFs")
+        status = gr.Textbox(label="Status", interactive=False)
+    upload_btn.click(process_pdfs, inputs=[pdf_input], outputs=[status])
+    gr.ChatInterface(
+        fn=chatbot_interface_fn,
+        chatbot=gr.Chatbot(height=400, type="messages"),
+        textbox=gr.Textbox(placeholder="Ask about the PDFs...", scale=7),
+        title="📖 PDF Chat",
+        description="Ask questions based on uploaded PDF content.",
+        submit_btn="Send"
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=4.0.0
+PyMuPDF
+sentence-transformers
+faiss-cpu
+numpy
+groq