Spaces:

asad9641
/

Pharma-Chatbot

Sleeping

App Files Files Community

Pharma-Chatbot / app.py

asad9641

Update app.py

4162c75 verified 2 months ago

raw

history blame contribute delete

7.11 kB

	# Patch cached_download import for compatibility with newer huggingface-hub
	import sys
	import types

	try:
	from huggingface_hub import cached_download
	except ImportError:
	import huggingface_hub
	huggingface_hub.cached_download = lambda args, *kwargs: None


	import os
	import io
	import requests
	import pdfplumber
	import numpy as np
	import faiss
	import gradio as gr
	from sklearn.preprocessing import normalize
	from sentence_transformers import SentenceTransformer

	# =========================================================
	# ✅ Global Variables
	# =========================================================
	DOCS = []
	FAISS_INDEX = None
	GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

	# =========================================================
	# ✅ Embedding Model Setup
	# =========================================================
	embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	# =========================================================
	# ✅ Helper Functions
	# =========================================================
	def extract_text_from_pdf(file_bytes):
	text = ""
	with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
	for page in pdf.pages:
	page_text = page.extract_text() or ""
	text += page_text + "\n"
	return text.strip()

	def chunk_text(text, chunk_size=700):
	words = text.split()
	return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

	def embed_texts(texts):
	embeddings = embedder.encode(texts)
	embeddings = normalize(embeddings)
	return np.array(embeddings).astype("float32")

	def build_faiss_index(embeddings):
	dim = embeddings.shape[1]
	index = faiss.IndexFlatL2(dim)
	index.add(embeddings)
	return index

	def search_docs(query, k=4):
	global DOCS, FAISS_INDEX
	if not DOCS or FAISS_INDEX is None:
	return ["⚠️ Please upload and process a PDF first."]
	q_emb = embed_texts([query])
	D, I = FAISS_INDEX.search(q_emb, k)
	return [DOCS[i]["text"] for i in I[0]]

	# =========================================================
	# ✅ GROQ API Chat Function
	# =========================================================
	def call_groq_chat(system_prompt, user_prompt):
	if not GROQ_API_KEY:
	return "⚠️ Missing GROQ_API_KEY. Please set it in Hugging Face Space secrets."

	url = "https://api.groq.com/openai/v1/chat/completions"
	headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
	body = {
	"model": "llama-3.1-8b-instant",
	"messages": [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	"temperature": 0.3
	}

	try:
	resp = requests.post(url, headers=headers, json=body, timeout=30)
	if resp.status_code == 401:
	return "❌ Unauthorized: Invalid or missing Groq API key."
	if resp.status_code == 404:
	return "❌ API endpoint or model not found."
	if resp.status_code == 429:
	return "⚠️ Too many requests. Please try again later."
	resp.raise_for_status()
	return resp.json()["choices"][0]["message"]["content"]
	except Exception as e:
	return f"❌ Error contacting Groq API: {str(e)}"

	# =========================================================
	# ✅ Process PDF
	# =========================================================
	def process_pdf(file_obj):
	global DOCS, FAISS_INDEX

	if file_obj is None:
	yield "⚠️ Please upload a PDF first."
	return

	try:
	yield "📥 Reading PDF..."

	raw = None
	if isinstance(file_obj, dict) and "data" in file_obj:
	raw = file_obj["data"]
	elif hasattr(file_obj, "read"):
	raw = file_obj.read()
	elif isinstance(file_obj, str) and os.path.exists(file_obj):
	with open(file_obj, "rb") as f:
	raw = f.read()

	if raw is None:
	yield f"❌ Unsupported file type: {type(file_obj)}"
	return

	yield "✏️ Extracting text..."
	text = extract_text_from_pdf(raw)
	if not text.strip():
	yield "⚠️ No extractable text found."
	return

	yield "📄 Splitting text into chunks..."
	chunks = chunk_text(text)

	yield "🧠 Creating embeddings..."
	DOCS = [{"text": c} for c in chunks]
	embs = embed_texts([d["text"] for d in DOCS])

	yield "📦 Building FAISS index..."
	FAISS_INDEX = build_faiss_index(embs)

	yield f"✅ Successfully processed {len(chunks)} chunks."
	except Exception as e:
	yield f"❌ Error processing PDF: {str(e)}"

	# =========================================================
	# ✅ Answer Question
	# =========================================================
	def answer_question(query, history):
	if not DOCS or FAISS_INDEX is None:
	return history + [["⚠️ Please upload and process a PDF first.", ""]]

	related = search_docs(query)
	context = "\n\n".join(related)

	system_prompt = "You are a helpful assistant answering based on the provided document."
	user_prompt = f"Document context:\n{context}\n\nUser question: {query}"

	answer = call_groq_chat(system_prompt, user_prompt)
	history.append([query, answer])
	return history

	# =========================================================
	# ✅ UI Design (Modern Look)
	# =========================================================
	with gr.Blocks(
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray"),
	css="""
	body {background: linear-gradient(135deg, #e3f2fd, #bbdefb);}
	.gradio-container {max-width: 900px !important; margin: auto;}
	.chatbox {height: 400px; overflow: auto; background: white; border-radius: 12px;
	box-shadow: 0 2px 10px rgba(0,0,0,0.1); padding: 10px;}
	.status-box {background: #f0f8ff; border-radius: 8px; padding: 10px; color: #333;}
	h1 {text-align:center; font-size: 2em; color: #0d47a1;}
	"""
	) as app:
	gr.Markdown("<h1>📘 AI PDF Q&A Assistant</h1><p style='text-align:center;'>Powered by Groq + FAISS + Gradio</p>")

	with gr.Row():
	pdf_file = gr.File(label="📂 Upload PDF", file_types=[".pdf"])
	process_btn = gr.Button("⚙️ Process PDF", variant="primary")

	status_box = gr.Textbox(label="📊 Status", elem_classes="status-box", interactive=False)
	process_btn.click(process_pdf, inputs=pdf_file, outputs=status_box)

	gr.Markdown("### 💬 Ask Questions About Your PDF")
	chatbot = gr.Chatbot(label="Chat", elem_classes="chatbox", bubble_full_width=False)
	query_box = gr.Textbox(label="Type your question here...")
	clear_btn = gr.Button("🧹 Clear Chat")

	query_box.submit(answer_question, [query_box, chatbot], chatbot)
	clear_btn.click(lambda: None, None, chatbot, queue=False)

	# =========================================================
	# ✅ Launch
	# =========================================================
	if __name__ == "__main__":
	app.launch()