#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ build_and_deploy_nitda_rag.py Creates a Space-ready NITDA RAG project (Gradio app) and optionally uploads it to Hugging Face Spaces. Usage examples: # 1) Just create the project locally python build_and_deploy_nitda_rag.py --project nitda-rag # 2) Create + Deploy (requires HF_TOKEN env var with write access) export HF_TOKEN=hf_xxx_your_access_token python build_and_deploy_nitda_rag.py --project nitda-rag --space-id nwamgbowo/nitda-rag --deploy After deployment, open: https://huggingface.co/spaces/nwamgbowo/nitda-rag Then, in the app UI, click "Initialize (build index + load model)" and ask questions. """ import os import sys import argparse from pathlib import Path from textwrap import dedent # ---------------------------- # File contents # ---------------------------- APP_PY = dedent(r''' import os import time import shutil import traceback from typing import List import gradio as gr # Use LangChain community packages to avoid import drift from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyMuPDFLoader from langchain_community.embeddings import SentenceTransformerEmbeddings from langchain_community.vectorstores import Chroma from huggingface_hub import hf_hub_download from llama_cpp import Llama import requests # ----------------------------- # Config # ----------------------------- DOCS_DIR = "data" # where PDFs live inside the Space DB_DIR = "nitda_db" # Chroma persistence directory TOP_K = 3 CHUNK_SIZE = 1000 CHUNK_OVERLAP = 150 CTX_LEN = 2048 # Primary model: Mistral-7B (GPU recommended; CPU Spaces may OOM) PRIMARY_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" PRIMARY_FILE = "mistral-7b-instruct-v0.2.Q6_K.gguf" PRIMARY_PARAMS = dict( n_ctx=CTX_LEN, n_threads=os.cpu_count() or 4, n_batch=256, n_gpu_layers=int(os.getenv("LLM_N_GPU_LAYERS", "0")), # set >0 on GPU Space verbose=False ) # Fallback: TinyLlama (CPU-friendly, reliable on CPU Spaces) FALLBACK_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" FALLBACK_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" FALLBACK_PARAMS = dict( n_ctx=CTX_LEN, n_threads=os.cpu_count() or 4, n_batch=128, n_gpu_layers=0, verbose=False ) SYSTEM_MESSAGE = ( "You are an AI assistant specialized in NITDA information retrieval. " "Answer strictly from the provided context (official NITDA documents). " "If the answer is not in the context, say you don't know." ) QNA_TEMPLATE = """[SYSTEM] {system} [CONTEXT] {context} [USER QUESTION] {question} [ASSISTANT] """ # ----------------------------- # Auto-copy & seeding (STARTUP) # ----------------------------- def list_pdfs(folder: str): os.makedirs(folder, exist_ok=True) return [os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(".pdf")] def seed_data_from_urls_if_empty(): """ If data/ has no PDFs and SEED_PDF_URLS is set (comma-separated URLs), download those PDFs into data/. """ os.makedirs(DOCS_DIR, exist_ok=True) existing = [f for f in os.listdir(DOCS_DIR) if f.lower().endswith(".pdf")] if existing: return 0 urls = os.getenv("SEED_PDF_URLS", "").strip() if not urls: return 0 count = 0 for url in [u.strip() for u in urls.split(",") if u.strip()]: try: fname = os.path.basename(url.split("?")[0]) or "document.pdf" dst = os.path.join(DOCS_DIR, fname) r = requests.get(url, timeout=120) r.raise_for_status() with open(dst, "wb") as f: f.write(r.content) count += 1 print(f"[seed] Downloaded: {dst}") except Exception as e: print(f"[seed] Failed to download {url}: {e}") return count def ensure_data_ready_and_reset_index_if_changed(): """ - Create data/ - Copy PDFs from repo root into data/ if missing there - Optionally seed from URLs if data/ is empty - If anything changed, delete nitda_db/ to force reindex """ os.makedirs(DOCS_DIR, exist_ok=True) before = set(os.listdir(DOCS_DIR)) copied = 0 # Copy *.pdf from root into data/ for fname in os.listdir("."): if fname.lower().endswith(".pdf"): src = os.path.join(".", fname) dst = os.path.join(DOCS_DIR, fname) if not os.path.exists(dst): try: shutil.copy2(src, dst) copied += 1 print(f"[init] Copied root PDF → {dst}") except Exception as e: print(f"[init] Could not copy {src} to {dst}: {e}") seeded = seed_data_from_urls_if_empty() after = set(os.listdir(DOCS_DIR)) changed = (copied > 0) or (seeded > 0) or (before != after) if changed and os.path.isdir(DB_DIR): try: shutil.rmtree(DB_DIR) print(f"[init] Removed old vector DB at {DB_DIR}/ (changed data/: {copied} copied, {seeded} seeded)") except Exception as e: print(f"[init] Could not remove {DB_DIR}/: {e}") # Call once on import (top-level) ensure_data_ready_and_reset_index_if_changed() # ----------------------------- # Vector store builder/loader # ----------------------------- def build_or_load_vectorstore(): """Load existing Chroma DB if present; else build from PDFs in data/.""" # Use persisted DB if present if os.path.isdir(DB_DIR) and os.listdir(DB_DIR): embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") return Chroma(persist_directory=DB_DIR, embedding_function=embeddings) pdfs = list_pdfs(DOCS_DIR) if not pdfs: raise FileNotFoundError( f"No PDFs found in '{DOCS_DIR}'. Upload PDFs to the 'data/' folder, " f"use the auto-copy (place PDFs in repo root), or set SEED_PDF_URLS." ) # Load and chunk docs = [] for p in pdfs: loader = PyMuPDFLoader(p) docs.extend(loader.load()) splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP) chunks = splitter.split_documents(docs) if not chunks: raise ValueError("No text chunks were generated from the PDFs. Are the files readable?") # Embed + persist embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vs = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=DB_DIR) vs.persist() return vs # ----------------------------- # LLM loader (with fallback) # ----------------------------- def load_llm(): """ Try to load primary (Mistral model). If it fails (OOM on CPU Space), fallback to TinyLlama automatically. You can force fallback by setting Space Variable USE_TINYLLAMA=1. """ if os.getenv("USE_TINYLLAMA", "0") == "1": model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE) return Llama(model_path=model_path, **FALLBACK_PARAMS) try: model_path = hf_hub_download(repo_id=PRIMARY_REPO, filename=PRIMARY_FILE) return Llama(model_path=model_path, **PRIMARY_PARAMS) except Exception as e: print(f"[WARN] Primary model load failed: {e}. Falling back to TinyLlama.") model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE) return Llama(model_path=model_path, **FALLBACK_PARAMS) def render_context(docs): parts = [] for i, d in enumerate(docs, 1): meta = d.metadata or {} src = meta.get("source", "document") page = meta.get("page", None) tag = f"{src}" + (f" (page {page})" if page is not None else "") parts.append(f"[{i}] {tag}\n{d.page_content}") return "\n\n".join(parts) def generate_answer(question, retriever, llm): if not question.strip(): return "Please enter a question." try: hits = retriever.get_relevant_documents(question) if not hits: return "I couldn't find relevant context in the documents." context = render_context(hits) prompt = QNA_TEMPLATE.format(system=SYSTEM_MESSAGE, context=context, question=question.strip()) out = llm( prompt=prompt, max_tokens=512, temperature=0.2, top_p=0.95, repeat_penalty=1.1, stop=["", "[USER QUESTION]", "[SYSTEM]"] ) return out.get("choices", [{}])[0].get("text", "").strip() or "The model returned no text." except Exception as e: return f"Error generating answer:\n{e}\n\n{traceback.format_exc()}" # ----------------------------- # Gradio App (lazy init) # ----------------------------- with gr.Blocks(title="NITDA RAG Assistant") as demo: gr.Markdown("## NITDA RAG Assistant\nAsk questions based on official NITDA documents in the `data/` folder.") retriever_state = gr.State(None) llm_state = gr.State(None) status = gr.Markdown("**Status:** Not initialized.") init_btn = gr.Button("Initialize (build index + load model)") def init_resources(): t0 = time.time() vs = build_or_load_vectorstore() retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K}) llm = load_llm() dt = time.time() - t0 return retriever, llm, f"**Status:** Ready in {dt:.1f}s." init_btn.click(fn=lambda: init_resources(), inputs=None, outputs=[retriever_state, llm_state, status]) q = gr.Textbox(label="Your question", placeholder="Ask about NITDA...", lines=2) a = gr.Markdown() ask_btn = gr.Button("Ask") def on_ask(question, retriever, llm): if retriever is None or llm is None: return "Please click **Initialize (build index + load model)** first." return generate_answer(question, retriever, llm) ask_btn.click(on_ask, inputs=[q, retriever_state, llm_state], outputs=[a]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860) ''').strip() + "\n" REQUIREMENTS_TXT = dedent(r''' # UI gradio==4.37.2 # LLM runtime llama-cpp-python==0.2.60 huggingface_hub==0.23.5 # LangChain stable community integrations langchain==0.1.16 langchain-community==0.0.34 langchain-text-splitters==0.0.1 # Vector DB + embeddings chromadb==0.4.24 sentence-transformers==2.7.0 # PDF loader pymupdf==1.23.26 # Utils numpy==1.26.4 pandas==2.1.4 requests==2.32.3 ''').strip() + "\n" RUNTIME_TXT = "python-3.10\n" DATA_README = dedent(r''' # Data folder Place your NITDA PDFs here. Example filenames: - NITDA-ACT-2007-2019-Edition1.pdf - Digital-Literacy-Framework.pdf - FrameworkAndGuidelinesForPublicInternetAccessPIA1.pdf - NATIONAL-REGULATORY-GUIDELINE-FOR-ELECTRONIC-INVOICING-IN-NIGERIA-2025.pdf ''').strip() + "\n" def write_project(project_dir: Path): project_dir.mkdir(parents=True, exist_ok=True) (project_dir / "app.py").write_text(APP_PY, encoding="utf-8") (project_dir / "requirements.txt").write_text(REQUIREMENTS_TXT, encoding="utf-8") (project_dir / "runtime.txt").write_text(RUNTIME_TXT, encoding="utf-8") data_dir = project_dir / "data" data_dir.mkdir(parents=True, exist_ok=True) (data_dir / "README.md").write_text(DATA_README, encoding="utf-8") print(f"āœ… Wrote project to: {project_dir.resolve()}") for p in ["app.py", "requirements.txt", "runtime.txt", "data/README.md"]: print(" -", project_dir / p) def deploy_to_space(project_dir: Path, space_id: str, private: bool = False): """Deploy the folder to a Hugging Face Space (SDK: Gradio). Requires HF_TOKEN env var.""" from huggingface_hub import HfApi, create_repo, login token = os.getenv("HF_TOKEN") if not token: raise RuntimeError("HF_TOKEN not set. Create a token at https://huggingface.co/settings/tokens and `export HF_TOKEN=...`") login(token=token) try: create_repo(repo_id=space_id, repo_type="space", space_sdk="gradio", private=private) print(f"šŸ†• Created Space: {space_id}") except Exception as e: print(f"ā„¹ļø Space exists or cannot be created: {e}") api = HfApi() api.upload_folder( folder_path=str(project_dir), repo_id=space_id, repo_type="space", commit_message="Deploy NITDA RAG", ignore_patterns=[".git", "__pycache__", "*.ipynb_checkpoints*"], ) print(f"āœ… Uploaded. Space: https://huggingface.co/spaces/{space_id}") print(f" App URL: https://{space_id.replace('/', '-')}.hf.space") def main(): parser = argparse.ArgumentParser(description="Create and optionally deploy a NITDA RAG app to Hugging Face Spaces.") parser.add_argument("--project", required=True, help="Local project directory to create (e.g., nitda-rag)") parser.add_argument("--space-id", help="Hugging Face Space ID (e.g., nwamgbowo/nitda-rag)") parser.add_argument("--deploy", action="store_true", help="Upload the project to the specified Space") parser.add_argument("--private", action="store_true", help="Create the Space as private (default: public)") args = parser.parse_args() project_dir = Path(args.project).resolve() write_project(project_dir) if args.deploy: if not args.space_id: print("āŒ --deploy requires --space-id (e.g., --space-id nwamgbowo/nitda-rag)") sys.exit(2) deploy_to_space(project_dir, args.space_id, private=args.private) print("\nšŸ”” After the Space is Running:") print(" 1) Upload PDFs to the data/ folder (or rely on auto-copy from root / URL seeding).") print(" 2) Click 'Initialize (build index + load model)'.") print(" 3) Ask questions.") print("\nšŸ’” CPU Space tip: If Mistral fails to load, set Space Variable USE_TINYLLAMA=1 to force TinyLlama.\n") else: print("\nšŸš€ To run locally:") print(f" cd {project_dir}") print(" pip install -r requirements.txt") print(" python app.py") print("\nšŸ“Œ Then open http://localhost:7860 and click 'Initialize (build index + load model)'.") print("šŸ“‚ Put your PDFs under the data/ folder (or in repo root; auto-copy will handle it).") if __name__ == "__main__": main()