Spaces:

amarshiv86
/

doc-qa-rag

Sleeping

App Files Files Community

doc-qa-rag / app.py

amarshiv86

ci: deploy app.py

ef01804 verified about 2 months ago

raw

history blame contribute delete

7.04 kB

	import os
	import requests
	import gradio as gr
	from pypdf import PdfReader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_chroma import Chroma
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_core.documents import Document

	# ── Config ────────────────────────────────────────────────────
	EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
	LLM_MODEL = "google/flan-t5-large" # free, no special permissions needed
	CHROMA_DIR = "/tmp/chroma_db"
	HF_TOKEN = os.getenv("HF_TOKEN", "")
	HF_API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"

	# ── Embeddings ────────────────────────────────────────────────
	print("Loading embedding model...")
	embeddings = HuggingFaceEmbeddings(
	model_name=EMBED_MODEL,
	model_kwargs={"device": "cpu"},
	encode_kwargs={"normalize_embeddings": True},
	)
	print("Embeddings ready ✓")

	# ── State ─────────────────────────────────────────────────────
	vectorstore = None
	current_doc = None

	# ── LLM call via HF Inference API (classic, no router) ────────
	def call_llm(prompt: str) -> str:
	headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 256,
	"temperature": 0.3,
	"do_sample": False,
	}
	}
	resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30)
	resp.raise_for_status()
	result = resp.json()
	if isinstance(result, list):
	return result[0].get("generated_text", "").strip()
	return str(result)

	# ── PDF processing ────────────────────────────────────────────
	def process_pdf(pdf_file):
	global vectorstore, current_doc
	if pdf_file is None:
	return "Please upload a PDF file."
	try:
	reader = PdfReader(pdf_file.name)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	if not text.strip():
	return "Could not extract text from this PDF."
	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	chunks = splitter.split_text(text)
	docs = [Document(page_content=c) for c in chunks]
	vectorstore = Chroma.from_documents(
	documents=docs,
	embedding=embeddings,
	persist_directory=CHROMA_DIR,
	)
	current_doc = os.path.basename(pdf_file.name)
	return f"✅ Processed {current_doc} — {len(reader.pages)} pages, {len(chunks)} chunks indexed. Ready!"
	except Exception as e:
	return f"❌ Error: {str(e)}"

	# ── RAG query ─────────────────────────────────────────────────
	def answer_question(question, history):
	global vectorstore
	if not question.strip():
	return history, ""
	if vectorstore is None:
	history = history + [
	{"role": "user", "content": question},
	{"role": "assistant", "content": "⚠️ Please upload a PDF first."},
	]
	return history, ""
	try:
	retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
	relevant_docs = retriever.invoke(question)
	context = "\n\n".join([d.page_content for d in relevant_docs])

	prompt = f"""Answer the question based on the context below.
	If the answer is not in the context, say "I couldn't find that in the document."

	Context: {context}

	Question: {question}
	Answer:"""

	answer = call_llm(prompt)
	# flan-t5 returns only the answer, strip the prompt if echoed
	if "Answer:" in answer:
	answer = answer.split("Answer:")[-1].strip()

	history = history + [
	{"role": "user", "content": question},
	{"role": "assistant", "content": answer},
	]
	return history, ""
	except Exception as e:
	history = history + [
	{"role": "user", "content": question},
	{"role": "assistant", "content": f"❌ Error: {str(e)}"},
	]
	return history, ""

	def clear_chat():
	return [], ""

	def clear_db():
	global vectorstore, current_doc
	vectorstore = None
	current_doc = None
	return "🗑️ Document cleared.", [], ""

	# ── UI ────────────────────────────────────────────────────────
	with gr.Blocks(title="Document Q&A · RAG") as demo:
	gr.Markdown("""
	# 📄 Document Q&A — RAG Pipeline
	Upload a PDF and ask questions. Powered by LangChain + ChromaDB + Flan-T5.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
	upload_btn = gr.Button("→ Process PDF", variant="primary")
	upload_status = gr.Markdown("Upload a PDF to get started.")
	clear_btn = gr.Button("🗑️ Clear document", variant="secondary")
	gr.Markdown("""
	### How it works
	1. Upload any PDF document
	2. Text is chunked and embedded into ChromaDB
	3. Your question retrieves the most relevant chunks
	4. Flan-T5 generates an answer from those chunks

	### Tips
	- Ask specific questions about the document
	- Works best with text-based PDFs
	- Try: "What is the main topic?"
	""")

	with gr.Column(scale=2):
	chatbot = gr.Chatbot(label="Chat", height=450)
	question = gr.Textbox(
	label="Ask a question about the document",
	placeholder="e.g. What are the main conclusions?",
	lines=2,
	)
	with gr.Row():
	ask_btn = gr.Button("→ Ask", variant="primary")
	clear_chat_btn = gr.Button("Clear chat")

	upload_btn.click(fn=process_pdf, inputs=pdf_input, outputs=upload_status)
	ask_btn.click(fn=answer_question, inputs=[question, chatbot], outputs=[chatbot, question])
	question.submit(fn=answer_question, inputs=[question, chatbot], outputs=[chatbot, question])
	clear_chat_btn.click(fn=clear_chat, outputs=[chatbot, question])
	clear_btn.click(fn=clear_db, outputs=[upload_status, chatbot, question])

	gr.Markdown("---\nPart of the [AI Engineer Portfolio](https://github.com/amarshiv86)")

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)