Pharma-Chatbot / app.py
asad9641's picture
Update app.py
4162c75 verified
# Patch cached_download import for compatibility with newer huggingface-hub
import sys
import types
try:
from huggingface_hub import cached_download
except ImportError:
import huggingface_hub
huggingface_hub.cached_download = lambda *args, **kwargs: None
import os
import io
import requests
import pdfplumber
import numpy as np
import faiss
import gradio as gr
from sklearn.preprocessing import normalize
from sentence_transformers import SentenceTransformer
# =========================================================
# βœ… Global Variables
# =========================================================
DOCS = []
FAISS_INDEX = None
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# =========================================================
# βœ… Embedding Model Setup
# =========================================================
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# =========================================================
# βœ… Helper Functions
# =========================================================
def extract_text_from_pdf(file_bytes):
text = ""
with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
for page in pdf.pages:
page_text = page.extract_text() or ""
text += page_text + "\n"
return text.strip()
def chunk_text(text, chunk_size=700):
words = text.split()
return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
def embed_texts(texts):
embeddings = embedder.encode(texts)
embeddings = normalize(embeddings)
return np.array(embeddings).astype("float32")
def build_faiss_index(embeddings):
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)
return index
def search_docs(query, k=4):
global DOCS, FAISS_INDEX
if not DOCS or FAISS_INDEX is None:
return ["⚠️ Please upload and process a PDF first."]
q_emb = embed_texts([query])
D, I = FAISS_INDEX.search(q_emb, k)
return [DOCS[i]["text"] for i in I[0]]
# =========================================================
# βœ… GROQ API Chat Function
# =========================================================
def call_groq_chat(system_prompt, user_prompt):
if not GROQ_API_KEY:
return "⚠️ Missing GROQ_API_KEY. Please set it in Hugging Face Space secrets."
url = "https://api.groq.com/openai/v1/chat/completions"
headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
body = {
"model": "llama-3.1-8b-instant",
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
"temperature": 0.3
}
try:
resp = requests.post(url, headers=headers, json=body, timeout=30)
if resp.status_code == 401:
return "❌ Unauthorized: Invalid or missing Groq API key."
if resp.status_code == 404:
return "❌ API endpoint or model not found."
if resp.status_code == 429:
return "⚠️ Too many requests. Please try again later."
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"]
except Exception as e:
return f"❌ Error contacting Groq API: {str(e)}"
# =========================================================
# βœ… Process PDF
# =========================================================
def process_pdf(file_obj):
global DOCS, FAISS_INDEX
if file_obj is None:
yield "⚠️ Please upload a PDF first."
return
try:
yield "πŸ“₯ Reading PDF..."
raw = None
if isinstance(file_obj, dict) and "data" in file_obj:
raw = file_obj["data"]
elif hasattr(file_obj, "read"):
raw = file_obj.read()
elif isinstance(file_obj, str) and os.path.exists(file_obj):
with open(file_obj, "rb") as f:
raw = f.read()
if raw is None:
yield f"❌ Unsupported file type: {type(file_obj)}"
return
yield "✏️ Extracting text..."
text = extract_text_from_pdf(raw)
if not text.strip():
yield "⚠️ No extractable text found."
return
yield "πŸ“„ Splitting text into chunks..."
chunks = chunk_text(text)
yield "🧠 Creating embeddings..."
DOCS = [{"text": c} for c in chunks]
embs = embed_texts([d["text"] for d in DOCS])
yield "πŸ“¦ Building FAISS index..."
FAISS_INDEX = build_faiss_index(embs)
yield f"βœ… Successfully processed {len(chunks)} chunks."
except Exception as e:
yield f"❌ Error processing PDF: {str(e)}"
# =========================================================
# βœ… Answer Question
# =========================================================
def answer_question(query, history):
if not DOCS or FAISS_INDEX is None:
return history + [["⚠️ Please upload and process a PDF first.", ""]]
related = search_docs(query)
context = "\n\n".join(related)
system_prompt = "You are a helpful assistant answering based on the provided document."
user_prompt = f"Document context:\n{context}\n\nUser question: {query}"
answer = call_groq_chat(system_prompt, user_prompt)
history.append([query, answer])
return history
# =========================================================
# βœ… UI Design (Modern Look)
# =========================================================
with gr.Blocks(
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray"),
css="""
body {background: linear-gradient(135deg, #e3f2fd, #bbdefb);}
.gradio-container {max-width: 900px !important; margin: auto;}
.chatbox {height: 400px; overflow: auto; background: white; border-radius: 12px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1); padding: 10px;}
.status-box {background: #f0f8ff; border-radius: 8px; padding: 10px; color: #333;}
h1 {text-align:center; font-size: 2em; color: #0d47a1;}
"""
) as app:
gr.Markdown("<h1>πŸ“˜ AI PDF Q&A Assistant</h1><p style='text-align:center;'>Powered by Groq + FAISS + Gradio</p>")
with gr.Row():
pdf_file = gr.File(label="πŸ“‚ Upload PDF", file_types=[".pdf"])
process_btn = gr.Button("βš™οΈ Process PDF", variant="primary")
status_box = gr.Textbox(label="πŸ“Š Status", elem_classes="status-box", interactive=False)
process_btn.click(process_pdf, inputs=pdf_file, outputs=status_box)
gr.Markdown("### πŸ’¬ Ask Questions About Your PDF")
chatbot = gr.Chatbot(label="Chat", elem_classes="chatbox", bubble_full_width=False)
query_box = gr.Textbox(label="Type your question here...")
clear_btn = gr.Button("🧹 Clear Chat")
query_box.submit(answer_question, [query_box, chatbot], chatbot)
clear_btn.click(lambda: None, None, chatbot, queue=False)
# =========================================================
# βœ… Launch
# =========================================================
if __name__ == "__main__":
app.launch()