Zohaib366's picture
added api key
97c6b1f verified
import gradio as gr
import fitz # PyMuPDF
import os
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
from groq import Groq
# Initialize Groq client
key = os.getenv("GROQ_API_KEY")
if not key:
raise ValueError("No API key found")
groq_client = Groq(api_key=key)
model = "llama3-8b-8192"
embedder = SentenceTransformer('all-MiniLM-L6-v2')
# Global state
state = {
"document_chunks": [],
"metadata": [],
"index": None,
"embeddings": None
}
# Extract text from PDF using file path
def extract_text_from_pdf(file_path):
doc = fitz.open(file_path)
texts = []
for i, page in enumerate(doc):
text = page.get_text().strip()
if text:
texts.append({"text": text, "page": i + 1})
return texts
# Process PDFs
def process_pdfs(files):
state["document_chunks"] = []
state["metadata"] = []
for file in files:
file_name = os.path.basename(file.name)
chunks = extract_text_from_pdf(file.name)
for chunk in chunks:
state["document_chunks"].append(chunk['text'])
state["metadata"].append({"file": file_name, "page": chunk['page']})
embeddings = embedder.encode(state["document_chunks"], show_progress_bar=True)
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings))
state["index"] = index
state["embeddings"] = embeddings
return "βœ… Book(s) loaded successfully!"
# Retrieve top chunks
def retrieve_chunks(question, top_k=3):
if not state["index"]:
return []
q_embedding = embedder.encode([question])
D, I = state["index"].search(q_embedding, top_k)
return [(state["document_chunks"][i], state["metadata"][i]) for i in I[0]]
# Generate answer with source references
def generate_answer(context, question):
context_text = "\n\n".join(
f"{chunk}\n\n[Source: {meta['file']}, Page: {meta['page']}]"
for chunk, meta in context
)
prompt = f"""You are a helpful assistant. Use the context below to answer the question.
Include the source references (file name and page number) in your answer.
Context:
{context_text}
Question:
{question}
Answer (with sources):"""
response = groq_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=0.2
)
return response.choices[0].message.content
# Chat function for ChatInterface
def chatbot_interface_fn(message, history):
if not state["document_chunks"]:
return "⚠️ Please upload PDF files first."
context = retrieve_chunks(message)
return generate_answer(context, message)
# Gradio UI
with gr.Blocks(title="RAG Chatbot") as demo:
gr.Markdown("# πŸ“š Enhanced RAG Chatbot\nUpload books and chat naturally!")
with gr.Row():
pdf_input = gr.File(file_types=[".pdf"], file_count="multiple", label="πŸ“‚ Upload PDFs")
upload_btn = gr.Button("Upload & Process PDFs")
status = gr.Textbox(label="Status", interactive=False)
upload_btn.click(process_pdfs, inputs=[pdf_input], outputs=[status])
gr.ChatInterface(
fn=chatbot_interface_fn,
chatbot=gr.Chatbot(height=400, type="messages"),
textbox=gr.Textbox(placeholder="Ask about the PDFs...", scale=7),
title="πŸ“– PDF Chat",
description="Ask questions based on uploaded PDF content.",
submit_btn="Send"
)
if __name__ == "__main__":
demo.launch()