RAG-Assistant / src /streamlit_app.py
nwamgbowo's picture
Update src/streamlit_app.py
8c26925 verified
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
build_and_deploy_nitda_rag.py
Creates a Space-ready NITDA RAG project (Gradio app) and optionally uploads it to Hugging Face Spaces.
Usage examples:
# 1) Just create the project locally
python build_and_deploy_nitda_rag.py --project nitda-rag
# 2) Create + Deploy (requires HF_TOKEN env var with write access)
export HF_TOKEN=hf_xxx_your_access_token
python build_and_deploy_nitda_rag.py --project nitda-rag --space-id nwamgbowo/nitda-rag --deploy
After deployment, open:
https://huggingface.co/spaces/nwamgbowo/nitda-rag
Then, in the app UI, click "Initialize (build index + load model)" and ask questions.
"""
import os
import sys
import argparse
from pathlib import Path
from textwrap import dedent
# ----------------------------
# File contents
# ----------------------------
APP_PY = dedent(r'''
import os
import time
import shutil
import traceback
from typing import List
import gradio as gr
# Use LangChain community packages to avoid import drift
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import requests
# -----------------------------
# Config
# -----------------------------
DOCS_DIR = "data" # where PDFs live inside the Space
DB_DIR = "nitda_db" # Chroma persistence directory
TOP_K = 3
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 150
CTX_LEN = 2048
# Primary model: Mistral-7B (GPU recommended; CPU Spaces may OOM)
PRIMARY_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
PRIMARY_FILE = "mistral-7b-instruct-v0.2.Q6_K.gguf"
PRIMARY_PARAMS = dict(
n_ctx=CTX_LEN,
n_threads=os.cpu_count() or 4,
n_batch=256,
n_gpu_layers=int(os.getenv("LLM_N_GPU_LAYERS", "0")), # set >0 on GPU Space
verbose=False
)
# Fallback: TinyLlama (CPU-friendly, reliable on CPU Spaces)
FALLBACK_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
FALLBACK_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
FALLBACK_PARAMS = dict(
n_ctx=CTX_LEN,
n_threads=os.cpu_count() or 4,
n_batch=128,
n_gpu_layers=0,
verbose=False
)
SYSTEM_MESSAGE = (
"You are an AI assistant specialized in NITDA information retrieval. "
"Answer strictly from the provided context (official NITDA documents). "
"If the answer is not in the context, say you don't know."
)
QNA_TEMPLATE = """[SYSTEM]
{system}
[CONTEXT]
{context}
[USER QUESTION]
{question}
[ASSISTANT]
"""
# -----------------------------
# Auto-copy & seeding (STARTUP)
# -----------------------------
def list_pdfs(folder: str):
os.makedirs(folder, exist_ok=True)
return [os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(".pdf")]
def seed_data_from_urls_if_empty():
"""
If data/ has no PDFs and SEED_PDF_URLS is set (comma-separated URLs),
download those PDFs into data/.
"""
os.makedirs(DOCS_DIR, exist_ok=True)
existing = [f for f in os.listdir(DOCS_DIR) if f.lower().endswith(".pdf")]
if existing:
return 0
urls = os.getenv("SEED_PDF_URLS", "").strip()
if not urls:
return 0
count = 0
for url in [u.strip() for u in urls.split(",") if u.strip()]:
try:
fname = os.path.basename(url.split("?")[0]) or "document.pdf"
dst = os.path.join(DOCS_DIR, fname)
r = requests.get(url, timeout=120)
r.raise_for_status()
with open(dst, "wb") as f:
f.write(r.content)
count += 1
print(f"[seed] Downloaded: {dst}")
except Exception as e:
print(f"[seed] Failed to download {url}: {e}")
return count
def ensure_data_ready_and_reset_index_if_changed():
"""
- Create data/
- Copy PDFs from repo root into data/ if missing there
- Optionally seed from URLs if data/ is empty
- If anything changed, delete nitda_db/ to force reindex
"""
os.makedirs(DOCS_DIR, exist_ok=True)
before = set(os.listdir(DOCS_DIR))
copied = 0
# Copy *.pdf from root into data/
for fname in os.listdir("."):
if fname.lower().endswith(".pdf"):
src = os.path.join(".", fname)
dst = os.path.join(DOCS_DIR, fname)
if not os.path.exists(dst):
try:
shutil.copy2(src, dst)
copied += 1
print(f"[init] Copied root PDF β†’ {dst}")
except Exception as e:
print(f"[init] Could not copy {src} to {dst}: {e}")
seeded = seed_data_from_urls_if_empty()
after = set(os.listdir(DOCS_DIR))
changed = (copied > 0) or (seeded > 0) or (before != after)
if changed and os.path.isdir(DB_DIR):
try:
shutil.rmtree(DB_DIR)
print(f"[init] Removed old vector DB at {DB_DIR}/ (changed data/: {copied} copied, {seeded} seeded)")
except Exception as e:
print(f"[init] Could not remove {DB_DIR}/: {e}")
# Call once on import (top-level)
ensure_data_ready_and_reset_index_if_changed()
# -----------------------------
# Vector store builder/loader
# -----------------------------
def build_or_load_vectorstore():
"""Load existing Chroma DB if present; else build from PDFs in data/."""
# Use persisted DB if present
if os.path.isdir(DB_DIR) and os.listdir(DB_DIR):
embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
return Chroma(persist_directory=DB_DIR, embedding_function=embeddings)
pdfs = list_pdfs(DOCS_DIR)
if not pdfs:
raise FileNotFoundError(
f"No PDFs found in '{DOCS_DIR}'. Upload PDFs to the 'data/' folder, "
f"use the auto-copy (place PDFs in repo root), or set SEED_PDF_URLS."
)
# Load and chunk
docs = []
for p in pdfs:
loader = PyMuPDFLoader(p)
docs.extend(loader.load())
splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
chunks = splitter.split_documents(docs)
if not chunks:
raise ValueError("No text chunks were generated from the PDFs. Are the files readable?")
# Embed + persist
embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vs = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=DB_DIR)
vs.persist()
return vs
# -----------------------------
# LLM loader (with fallback)
# -----------------------------
def load_llm():
"""
Try to load primary (Mistral model). If it fails (OOM on CPU Space),
fallback to TinyLlama automatically. You can force fallback by setting
Space Variable USE_TINYLLAMA=1.
"""
if os.getenv("USE_TINYLLAMA", "0") == "1":
model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE)
return Llama(model_path=model_path, **FALLBACK_PARAMS)
try:
model_path = hf_hub_download(repo_id=PRIMARY_REPO, filename=PRIMARY_FILE)
return Llama(model_path=model_path, **PRIMARY_PARAMS)
except Exception as e:
print(f"[WARN] Primary model load failed: {e}. Falling back to TinyLlama.")
model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE)
return Llama(model_path=model_path, **FALLBACK_PARAMS)
def render_context(docs):
parts = []
for i, d in enumerate(docs, 1):
meta = d.metadata or {}
src = meta.get("source", "document")
page = meta.get("page", None)
tag = f"{src}" + (f" (page {page})" if page is not None else "")
parts.append(f"[{i}] {tag}\n{d.page_content}")
return "\n\n".join(parts)
def generate_answer(question, retriever, llm):
if not question.strip():
return "Please enter a question."
try:
hits = retriever.get_relevant_documents(question)
if not hits:
return "I couldn't find relevant context in the documents."
context = render_context(hits)
prompt = QNA_TEMPLATE.format(system=SYSTEM_MESSAGE, context=context, question=question.strip())
out = llm(
prompt=prompt,
max_tokens=512,
temperature=0.2,
top_p=0.95,
repeat_penalty=1.1,
stop=["</s>", "[USER QUESTION]", "[SYSTEM]"]
)
return out.get("choices", [{}])[0].get("text", "").strip() or "The model returned no text."
except Exception as e:
return f"Error generating answer:\n{e}\n\n{traceback.format_exc()}"
# -----------------------------
# Gradio App (lazy init)
# -----------------------------
with gr.Blocks(title="NITDA RAG Assistant") as demo:
gr.Markdown("## NITDA RAG Assistant\nAsk questions based on official NITDA documents in the `data/` folder.")
retriever_state = gr.State(None)
llm_state = gr.State(None)
status = gr.Markdown("**Status:** Not initialized.")
init_btn = gr.Button("Initialize (build index + load model)")
def init_resources():
t0 = time.time()
vs = build_or_load_vectorstore()
retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
llm = load_llm()
dt = time.time() - t0
return retriever, llm, f"**Status:** Ready in {dt:.1f}s."
init_btn.click(fn=lambda: init_resources(), inputs=None, outputs=[retriever_state, llm_state, status])
q = gr.Textbox(label="Your question", placeholder="Ask about NITDA...", lines=2)
a = gr.Markdown()
ask_btn = gr.Button("Ask")
def on_ask(question, retriever, llm):
if retriever is None or llm is None:
return "Please click **Initialize (build index + load model)** first."
return generate_answer(question, retriever, llm)
ask_btn.click(on_ask, inputs=[q, retriever_state, llm_state], outputs=[a])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
''').strip() + "\n"
REQUIREMENTS_TXT = dedent(r'''
# UI
gradio==4.37.2
# LLM runtime
llama-cpp-python==0.2.60
huggingface_hub==0.23.5
# LangChain stable community integrations
langchain==0.1.16
langchain-community==0.0.34
langchain-text-splitters==0.0.1
# Vector DB + embeddings
chromadb==0.4.24
sentence-transformers==2.7.0
# PDF loader
pymupdf==1.23.26
# Utils
numpy==1.26.4
pandas==2.1.4
requests==2.32.3
''').strip() + "\n"
RUNTIME_TXT = "python-3.10\n"
DATA_README = dedent(r'''
# Data folder
Place your NITDA PDFs here. Example filenames:
- NITDA-ACT-2007-2019-Edition1.pdf
- Digital-Literacy-Framework.pdf
- FrameworkAndGuidelinesForPublicInternetAccessPIA1.pdf
- NATIONAL-REGULATORY-GUIDELINE-FOR-ELECTRONIC-INVOICING-IN-NIGERIA-2025.pdf
''').strip() + "\n"
def write_project(project_dir: Path):
project_dir.mkdir(parents=True, exist_ok=True)
(project_dir / "app.py").write_text(APP_PY, encoding="utf-8")
(project_dir / "requirements.txt").write_text(REQUIREMENTS_TXT, encoding="utf-8")
(project_dir / "runtime.txt").write_text(RUNTIME_TXT, encoding="utf-8")
data_dir = project_dir / "data"
data_dir.mkdir(parents=True, exist_ok=True)
(data_dir / "README.md").write_text(DATA_README, encoding="utf-8")
print(f"βœ… Wrote project to: {project_dir.resolve()}")
for p in ["app.py", "requirements.txt", "runtime.txt", "data/README.md"]:
print(" -", project_dir / p)
def deploy_to_space(project_dir: Path, space_id: str, private: bool = False):
"""Deploy the folder to a Hugging Face Space (SDK: Gradio). Requires HF_TOKEN env var."""
from huggingface_hub import HfApi, create_repo, login
token = os.getenv("HF_TOKEN")
if not token:
raise RuntimeError("HF_TOKEN not set. Create a token at https://huggingface.co/settings/tokens and `export HF_TOKEN=...`")
login(token=token)
try:
create_repo(repo_id=space_id, repo_type="space", space_sdk="gradio", private=private)
print(f"πŸ†• Created Space: {space_id}")
except Exception as e:
print(f"ℹ️ Space exists or cannot be created: {e}")
api = HfApi()
api.upload_folder(
folder_path=str(project_dir),
repo_id=space_id,
repo_type="space",
commit_message="Deploy NITDA RAG",
ignore_patterns=[".git", "__pycache__", "*.ipynb_checkpoints*"],
)
print(f"βœ… Uploaded. Space: https://huggingface.co/spaces/{space_id}")
print(f" App URL: https://{space_id.replace('/', '-')}.hf.space")
def main():
parser = argparse.ArgumentParser(description="Create and optionally deploy a NITDA RAG app to Hugging Face Spaces.")
parser.add_argument("--project", required=True, help="Local project directory to create (e.g., nitda-rag)")
parser.add_argument("--space-id", help="Hugging Face Space ID (e.g., nwamgbowo/nitda-rag)")
parser.add_argument("--deploy", action="store_true", help="Upload the project to the specified Space")
parser.add_argument("--private", action="store_true", help="Create the Space as private (default: public)")
args = parser.parse_args()
project_dir = Path(args.project).resolve()
write_project(project_dir)
if args.deploy:
if not args.space_id:
print("❌ --deploy requires --space-id (e.g., --space-id nwamgbowo/nitda-rag)")
sys.exit(2)
deploy_to_space(project_dir, args.space_id, private=args.private)
print("\nπŸ”” After the Space is Running:")
print(" 1) Upload PDFs to the data/ folder (or rely on auto-copy from root / URL seeding).")
print(" 2) Click 'Initialize (build index + load model)'.")
print(" 3) Ask questions.")
print("\nπŸ’‘ CPU Space tip: If Mistral fails to load, set Space Variable USE_TINYLLAMA=1 to force TinyLlama.\n")
else:
print("\nπŸš€ To run locally:")
print(f" cd {project_dir}")
print(" pip install -r requirements.txt")
print(" python app.py")
print("\nπŸ“Œ Then open http://localhost:7860 and click 'Initialize (build index + load model)'.")
print("πŸ“‚ Put your PDFs under the data/ folder (or in repo root; auto-copy will handle it).")
if __name__ == "__main__":
main()