Spaces:

Zohaib366
/

Enhanced_RAG_Chatbot

Sleeping

App Files Files Community

Zohaib366 commited on May 24, 2025

Commit

ac02361

verified ·

1 Parent(s): 215d2e6

UPDATED APP.PY

Browse files

Files changed (1) hide show

app.py +119 -114

app.py CHANGED Viewed

@@ -1,114 +1,119 @@
-import gradio as gr
-import fitz  # PyMuPDF
-import os
-from sentence_transformers import SentenceTransformer
-import numpy as np
-import faiss
-from groq import Groq
-# Initialize Groq client
-groq_client = Groq(api_key="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")
-model = "llama3-8b-8192"
-embedder = SentenceTransformer('all-MiniLM-L6-v2')
-# Global state
-state = {
-    "document_chunks": [],
-    "metadata": [],
-    "index": None,
-    "embeddings": None
-}
-# Extract text from PDF using file path
-def extract_text_from_pdf(file_path):
-    doc = fitz.open(file_path)
-    texts = []
-    for i, page in enumerate(doc):
-        text = page.get_text().strip()
-        if text:
-            texts.append({"text": text, "page": i + 1})
-    return texts
-# Process PDFs
-def process_pdfs(files):
-    state["document_chunks"] = []
-    state["metadata"] = []
-    for file in files:
-        file_name = os.path.basename(file.name)
-        chunks = extract_text_from_pdf(file.name)
-        for chunk in chunks:
-            state["document_chunks"].append(chunk['text'])
-            state["metadata"].append({"file": file_name, "page": chunk['page']})
-    embeddings = embedder.encode(state["document_chunks"], show_progress_bar=True)
-    dim = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dim)
-    index.add(np.array(embeddings))
-    state["index"] = index
-    state["embeddings"] = embeddings
-    return "✅ Book(s) loaded successfully!"
-# Retrieve top chunks
-def retrieve_chunks(question, top_k=3):
-    if not state["index"]:
-        return []
-    q_embedding = embedder.encode([question])
-    D, I = state["index"].search(q_embedding, top_k)
-    return [(state["document_chunks"][i], state["metadata"][i]) for i in I[0]]
-# Generate answer with source references
-def generate_answer(context, question):
-    context_text = "\n\n".join(
-        f"{chunk}\n\n[Source: {meta['file']}, Page: {meta['page']}]"
-        for chunk, meta in context
-    )
-    prompt = f"""You are a helpful assistant. Use the context below to answer the question.
-Include the source references (file name and page number) in your answer.
-Context:
-{context_text}
-Question:
-{question}
-Answer (with sources):"""
-    response = groq_client.chat.completions.create(
-        model=model,
-        messages=[{"role": "user", "content": prompt}],
-        temperature=0.2
-    )
-    return response.choices[0].message.content
-# Chat function for ChatInterface
-def chatbot_interface_fn(message, history):
-    if not state["document_chunks"]:
-        return "⚠️ Please upload PDF files first."
-    context = retrieve_chunks(message)
-    return generate_answer(context, message)
-# Gradio UI
-with gr.Blocks(title="RAG Chatbot") as demo:
-    gr.Markdown("# 📚 Enhanced RAG Chatbot\nUpload books and chat naturally!")
-    with gr.Row():
-        pdf_input = gr.File(file_types=[".pdf"], file_count="multiple", label="📂 Upload PDFs")
-        upload_btn = gr.Button("Upload & Process PDFs")
-        status = gr.Textbox(label="Status", interactive=False)
-    upload_btn.click(process_pdfs, inputs=[pdf_input], outputs=[status])
-    gr.ChatInterface(
-        fn=chatbot_interface_fn,
-        chatbot=gr.Chatbot(height=400, type="messages"),
-        textbox=gr.Textbox(placeholder="Ask about the PDFs...", scale=7),
-        title="📖 PDF Chat",
-        description="Ask questions based on uploaded PDF content.",
-        submit_btn="Send"
-    )
-if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
+import fitz  # PyMuPDF
+import os
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import faiss
+from groq import Groq
+# Load API key from environment variable
+api_key = os.getenv("GROQ_API_KEY")
+if not api_key:
+    raise ValueError("❌ GROQ_API_KEY environment variable not set.")
+# Initialize Groq client
+groq_client = Groq(api_key=api_key)
+model = "llama3-8b-8192"
+embedder = SentenceTransformer('all-MiniLM-L6-v2')
+# Global state
+state = {
+    "document_chunks": [],
+    "metadata": [],
+    "index": None,
+    "embeddings": None
+}
+# Extract text from PDF using file path
+def extract_text_from_pdf(file_path):
+    doc = fitz.open(file_path)
+    texts = []
+    for i, page in enumerate(doc):
+        text = page.get_text().strip()
+        if text:
+            texts.append({"text": text, "page": i + 1})
+    return texts
+# Process PDFs
+def process_pdfs(files):
+    state["document_chunks"] = []
+    state["metadata"] = []
+    for file in files:
+        file_name = os.path.basename(file.name)
+        chunks = extract_text_from_pdf(file.name)
+        for chunk in chunks:
+            state["document_chunks"].append(chunk['text'])
+            state["metadata"].append({"file": file_name, "page": chunk['page']})
+    embeddings = embedder.encode(state["document_chunks"], show_progress_bar=True)
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(np.array(embeddings))
+    state["index"] = index
+    state["embeddings"] = embeddings
+    return "✅ Book(s) loaded successfully!"
+# Retrieve top chunks
+def retrieve_chunks(question, top_k=3):
+    if not state["index"]:
+        return []
+    q_embedding = embedder.encode([question])
+    D, I = state["index"].search(q_embedding, top_k)
+    return [(state["document_chunks"][i], state["metadata"][i]) for i in I[0]]
+# Generate answer with source references
+def generate_answer(context, question):
+    context_text = "\n\n".join(
+        f"{chunk}\n\n[Source: {meta['file']}, Page: {meta['page']}]"
+        for chunk, meta in context
+    )
+    prompt = f"""You are a helpful assistant. Use the context below to answer the question.
+Include the source references (file name and page number) in your answer.
+Context:
+{context_text}
+Question:
+{question}
+Answer (with sources):"""
+    response = groq_client.chat.completions.create(
+        model=model,
+        messages=[{"role": "user", "content": prompt}],
+        temperature=0.2
+    )
+    return response.choices[0].message.content
+# Chat function for ChatInterface
+def chatbot_interface_fn(message, history):
+    if not state["document_chunks"]:
+        return "⚠️ Please upload PDF files first."
+    context = retrieve_chunks(message)
+    return generate_answer(context, message)
+# Gradio UI
+with gr.Blocks(title="RAG Chatbot") as demo:
+    gr.Markdown("# 📚 Enhanced RAG Chatbot\nUpload books and chat naturally!")
+    with gr.Row():
+        pdf_input = gr.File(file_types=[".pdf"], file_count="multiple", label="📂 Upload PDFs")
+        upload_btn = gr.Button("Upload & Process PDFs")
+        status = gr.Textbox(label="Status", interactive=False)
+    upload_btn.click(process_pdfs, inputs=[pdf_input], outputs=[status])
+    gr.ChatInterface(
+        fn=chatbot_interface_fn,
+        chatbot=gr.Chatbot(height=400, type="messages"),
+        textbox=gr.Textbox(placeholder="Ask about the PDFs...", scale=7),
+        title="📖 PDF Chat",
+        description="Ask questions based on uploaded PDF content.",
+        submit_btn="Send"
+    )
+if __name__ == "__main__":
+    demo.launch()