Spaces:

nwamgbowo
/

RAG-Assistant

Running

App Files Files Community

RAG-Assistant / src /streamlit_app.py

nwamgbowo

Update src/streamlit_app.py

8c26925 verified 43 minutes ago

raw

history blame contribute delete

14.4 kB


	#!/usr/bin/env python3
	# -- coding: utf-8 --

	"""
	build_and_deploy_nitda_rag.py

	Creates a Space-ready NITDA RAG project (Gradio app) and optionally uploads it to Hugging Face Spaces.

	Usage examples:
	# 1) Just create the project locally
	python build_and_deploy_nitda_rag.py --project nitda-rag

	# 2) Create + Deploy (requires HF_TOKEN env var with write access)
	export HF_TOKEN=hf_xxx_your_access_token
	python build_and_deploy_nitda_rag.py --project nitda-rag --space-id nwamgbowo/nitda-rag --deploy

	After deployment, open:
	https://huggingface.co/spaces/nwamgbowo/nitda-rag

	Then, in the app UI, click "Initialize (build index + load model)" and ask questions.
	"""

	import os
	import sys
	import argparse
	from pathlib import Path
	from textwrap import dedent

	# ----------------------------
	# File contents
	# ----------------------------
	APP_PY = dedent(r'''
	import os
	import time
	import shutil
	import traceback
	from typing import List

	import gradio as gr

	# Use LangChain community packages to avoid import drift
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyMuPDFLoader
	from langchain_community.embeddings import SentenceTransformerEmbeddings
	from langchain_community.vectorstores import Chroma

	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import requests

	# -----------------------------
	# Config
	# -----------------------------
	DOCS_DIR = "data" # where PDFs live inside the Space
	DB_DIR = "nitda_db" # Chroma persistence directory

	TOP_K = 3
	CHUNK_SIZE = 1000
	CHUNK_OVERLAP = 150
	CTX_LEN = 2048

	# Primary model: Mistral-7B (GPU recommended; CPU Spaces may OOM)
	PRIMARY_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
	PRIMARY_FILE = "mistral-7b-instruct-v0.2.Q6_K.gguf"
	PRIMARY_PARAMS = dict(
	n_ctx=CTX_LEN,
	n_threads=os.cpu_count() or 4,
	n_batch=256,
	n_gpu_layers=int(os.getenv("LLM_N_GPU_LAYERS", "0")), # set >0 on GPU Space
	verbose=False
	)

	# Fallback: TinyLlama (CPU-friendly, reliable on CPU Spaces)
	FALLBACK_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
	FALLBACK_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
	FALLBACK_PARAMS = dict(
	n_ctx=CTX_LEN,
	n_threads=os.cpu_count() or 4,
	n_batch=128,
	n_gpu_layers=0,
	verbose=False
	)

	SYSTEM_MESSAGE = (
	"You are an AI assistant specialized in NITDA information retrieval. "
	"Answer strictly from the provided context (official NITDA documents). "
	"If the answer is not in the context, say you don't know."
	)

	QNA_TEMPLATE = """[SYSTEM]
	{system}

	[CONTEXT]
	{context}

	[USER QUESTION]
	{question}

	[ASSISTANT]
	"""

	# -----------------------------
	# Auto-copy & seeding (STARTUP)
	# -----------------------------
	def list_pdfs(folder: str):
	os.makedirs(folder, exist_ok=True)
	return [os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(".pdf")]

	def seed_data_from_urls_if_empty():
	"""
	If data/ has no PDFs and SEED_PDF_URLS is set (comma-separated URLs),
	download those PDFs into data/.
	"""
	os.makedirs(DOCS_DIR, exist_ok=True)
	existing = [f for f in os.listdir(DOCS_DIR) if f.lower().endswith(".pdf")]
	if existing:
	return 0

	urls = os.getenv("SEED_PDF_URLS", "").strip()
	if not urls:
	return 0

	count = 0
	for url in [u.strip() for u in urls.split(",") if u.strip()]:
	try:
	fname = os.path.basename(url.split("?")[0]) or "document.pdf"
	dst = os.path.join(DOCS_DIR, fname)
	r = requests.get(url, timeout=120)
	r.raise_for_status()
	with open(dst, "wb") as f:
	f.write(r.content)
	count += 1
	print(f"[seed] Downloaded: {dst}")
	except Exception as e:
	print(f"[seed] Failed to download {url}: {e}")
	return count

	def ensure_data_ready_and_reset_index_if_changed():
	"""
	- Create data/
	- Copy PDFs from repo root into data/ if missing there
	- Optionally seed from URLs if data/ is empty
	- If anything changed, delete nitda_db/ to force reindex
	"""
	os.makedirs(DOCS_DIR, exist_ok=True)

	before = set(os.listdir(DOCS_DIR))
	copied = 0

	# Copy *.pdf from root into data/
	for fname in os.listdir("."):
	if fname.lower().endswith(".pdf"):
	src = os.path.join(".", fname)
	dst = os.path.join(DOCS_DIR, fname)
	if not os.path.exists(dst):
	try:
	shutil.copy2(src, dst)
	copied += 1
	print(f"[init] Copied root PDF → {dst}")
	except Exception as e:
	print(f"[init] Could not copy {src} to {dst}: {e}")

	seeded = seed_data_from_urls_if_empty()

	after = set(os.listdir(DOCS_DIR))
	changed = (copied > 0) or (seeded > 0) or (before != after)

	if changed and os.path.isdir(DB_DIR):
	try:
	shutil.rmtree(DB_DIR)
	print(f"[init] Removed old vector DB at {DB_DIR}/ (changed data/: {copied} copied, {seeded} seeded)")
	except Exception as e:
	print(f"[init] Could not remove {DB_DIR}/: {e}")

	# Call once on import (top-level)
	ensure_data_ready_and_reset_index_if_changed()

	# -----------------------------
	# Vector store builder/loader
	# -----------------------------
	def build_or_load_vectorstore():
	"""Load existing Chroma DB if present; else build from PDFs in data/."""
	# Use persisted DB if present
	if os.path.isdir(DB_DIR) and os.listdir(DB_DIR):
	embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	return Chroma(persist_directory=DB_DIR, embedding_function=embeddings)

	pdfs = list_pdfs(DOCS_DIR)
	if not pdfs:
	raise FileNotFoundError(
	f"No PDFs found in '{DOCS_DIR}'. Upload PDFs to the 'data/' folder, "
	f"use the auto-copy (place PDFs in repo root), or set SEED_PDF_URLS."
	)

	# Load and chunk
	docs = []
	for p in pdfs:
	loader = PyMuPDFLoader(p)
	docs.extend(loader.load())

	splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
	chunks = splitter.split_documents(docs)

	if not chunks:
	raise ValueError("No text chunks were generated from the PDFs. Are the files readable?")

	# Embed + persist
	embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vs = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=DB_DIR)
	vs.persist()
	return vs

	# -----------------------------
	# LLM loader (with fallback)
	# -----------------------------
	def load_llm():
	"""
	Try to load primary (Mistral model). If it fails (OOM on CPU Space),
	fallback to TinyLlama automatically. You can force fallback by setting
	Space Variable USE_TINYLLAMA=1.
	"""
	if os.getenv("USE_TINYLLAMA", "0") == "1":
	model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE)
	return Llama(model_path=model_path, **FALLBACK_PARAMS)

	try:
	model_path = hf_hub_download(repo_id=PRIMARY_REPO, filename=PRIMARY_FILE)
	return Llama(model_path=model_path, **PRIMARY_PARAMS)
	except Exception as e:
	print(f"[WARN] Primary model load failed: {e}. Falling back to TinyLlama.")
	model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE)
	return Llama(model_path=model_path, **FALLBACK_PARAMS)

	def render_context(docs):
	parts = []
	for i, d in enumerate(docs, 1):
	meta = d.metadata or {}
	src = meta.get("source", "document")
	page = meta.get("page", None)
	tag = f"{src}" + (f" (page {page})" if page is not None else "")
	parts.append(f"[{i}] {tag}\n{d.page_content}")
	return "\n\n".join(parts)

	def generate_answer(question, retriever, llm):
	if not question.strip():
	return "Please enter a question."
	try:
	hits = retriever.get_relevant_documents(question)
	if not hits:
	return "I couldn't find relevant context in the documents."
	context = render_context(hits)
	prompt = QNA_TEMPLATE.format(system=SYSTEM_MESSAGE, context=context, question=question.strip())

	out = llm(
	prompt=prompt,
	max_tokens=512,
	temperature=0.2,
	top_p=0.95,
	repeat_penalty=1.1,
	stop=["</s>", "[USER QUESTION]", "[SYSTEM]"]
	)
	return out.get("choices", [{}])[0].get("text", "").strip() or "The model returned no text."
	except Exception as e:
	return f"Error generating answer:\n{e}\n\n{traceback.format_exc()}"

	# -----------------------------
	# Gradio App (lazy init)
	# -----------------------------
	with gr.Blocks(title="NITDA RAG Assistant") as demo:
	gr.Markdown("## NITDA RAG Assistant\nAsk questions based on official NITDA documents in the `data/` folder.")

	retriever_state = gr.State(None)
	llm_state = gr.State(None)

	status = gr.Markdown("Status: Not initialized.")
	init_btn = gr.Button("Initialize (build index + load model)")

	def init_resources():
	t0 = time.time()
	vs = build_or_load_vectorstore()
	retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
	llm = load_llm()
	dt = time.time() - t0
	return retriever, llm, f"Status: Ready in {dt:.1f}s."

	init_btn.click(fn=lambda: init_resources(), inputs=None, outputs=[retriever_state, llm_state, status])

	q = gr.Textbox(label="Your question", placeholder="Ask about NITDA...", lines=2)
	a = gr.Markdown()
	ask_btn = gr.Button("Ask")

	def on_ask(question, retriever, llm):
	if retriever is None or llm is None:
	return "Please click Initialize (build index + load model) first."
	return generate_answer(question, retriever, llm)

	ask_btn.click(on_ask, inputs=[q, retriever_state, llm_state], outputs=[a])

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)
	''').strip() + "\n"

	REQUIREMENTS_TXT = dedent(r'''
	# UI
	gradio==4.37.2

	# LLM runtime
	llama-cpp-python==0.2.60
	huggingface_hub==0.23.5

	# LangChain stable community integrations
	langchain==0.1.16
	langchain-community==0.0.34
	langchain-text-splitters==0.0.1

	# Vector DB + embeddings
	chromadb==0.4.24
	sentence-transformers==2.7.0

	# PDF loader
	pymupdf==1.23.26

	# Utils
	numpy==1.26.4
	pandas==2.1.4
	requests==2.32.3
	''').strip() + "\n"

	RUNTIME_TXT = "python-3.10\n"

	DATA_README = dedent(r'''
	# Data folder

	Place your NITDA PDFs here. Example filenames:
	- NITDA-ACT-2007-2019-Edition1.pdf
	- Digital-Literacy-Framework.pdf
	- FrameworkAndGuidelinesForPublicInternetAccessPIA1.pdf
	- NATIONAL-REGULATORY-GUIDELINE-FOR-ELECTRONIC-INVOICING-IN-NIGERIA-2025.pdf
	''').strip() + "\n"


	def write_project(project_dir: Path):
	project_dir.mkdir(parents=True, exist_ok=True)
	(project_dir / "app.py").write_text(APP_PY, encoding="utf-8")
	(project_dir / "requirements.txt").write_text(REQUIREMENTS_TXT, encoding="utf-8")
	(project_dir / "runtime.txt").write_text(RUNTIME_TXT, encoding="utf-8")
	data_dir = project_dir / "data"
	data_dir.mkdir(parents=True, exist_ok=True)
	(data_dir / "README.md").write_text(DATA_README, encoding="utf-8")
	print(f"✅ Wrote project to: {project_dir.resolve()}")
	for p in ["app.py", "requirements.txt", "runtime.txt", "data/README.md"]:
	print(" -", project_dir / p)

	def deploy_to_space(project_dir: Path, space_id: str, private: bool = False):
	"""Deploy the folder to a Hugging Face Space (SDK: Gradio). Requires HF_TOKEN env var."""
	from huggingface_hub import HfApi, create_repo, login
	token = os.getenv("HF_TOKEN")
	if not token:
	raise RuntimeError("HF_TOKEN not set. Create a token at https://huggingface.co/settings/tokens and `export HF_TOKEN=...`")
	login(token=token)
	try:
	create_repo(repo_id=space_id, repo_type="space", space_sdk="gradio", private=private)
	print(f"🆕 Created Space: {space_id}")
	except Exception as e:
	print(f"ℹ️ Space exists or cannot be created: {e}")
	api = HfApi()
	api.upload_folder(
	folder_path=str(project_dir),
	repo_id=space_id,
	repo_type="space",
	commit_message="Deploy NITDA RAG",
	ignore_patterns=[".git", "__pycache__", ".ipynb_checkpoints"],
	)
	print(f"✅ Uploaded. Space: https://huggingface.co/spaces/{space_id}")
	print(f" App URL: https://{space_id.replace('/', '-')}.hf.space")

	def main():
	parser = argparse.ArgumentParser(description="Create and optionally deploy a NITDA RAG app to Hugging Face Spaces.")
	parser.add_argument("--project", required=True, help="Local project directory to create (e.g., nitda-rag)")
	parser.add_argument("--space-id", help="Hugging Face Space ID (e.g., nwamgbowo/nitda-rag)")
	parser.add_argument("--deploy", action="store_true", help="Upload the project to the specified Space")
	parser.add_argument("--private", action="store_true", help="Create the Space as private (default: public)")
	args = parser.parse_args()

	project_dir = Path(args.project).resolve()
	write_project(project_dir)

	if args.deploy:
	if not args.space_id:
	print("❌ --deploy requires --space-id (e.g., --space-id nwamgbowo/nitda-rag)")
	sys.exit(2)
	deploy_to_space(project_dir, args.space_id, private=args.private)
	print("\n🔔 After the Space is Running:")
	print(" 1) Upload PDFs to the data/ folder (or rely on auto-copy from root / URL seeding).")
	print(" 2) Click 'Initialize (build index + load model)'.")
	print(" 3) Ask questions.")
	print("\n💡 CPU Space tip: If Mistral fails to load, set Space Variable USE_TINYLLAMA=1 to force TinyLlama.\n")
	else:
	print("\n🚀 To run locally:")
	print(f" cd {project_dir}")
	print(" pip install -r requirements.txt")
	print(" python app.py")
	print("\n📌 Then open http://localhost:7860 and click 'Initialize (build index + load model)'.")
	print("📂 Put your PDFs under the data/ folder (or in repo root; auto-copy will handle it).")

	if __name__ == "__main__":
	main()