Spaces:

nwamgbowo
/

RAG-Assistant

Running

File size: 14,358 Bytes


#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
build_and_deploy_nitda_rag.py

Creates a Space-ready NITDA RAG project (Gradio app) and optionally uploads it to Hugging Face Spaces.

Usage examples:
  # 1) Just create the project locally
  python build_and_deploy_nitda_rag.py --project nitda-rag

  # 2) Create + Deploy (requires HF_TOKEN env var with write access)
  export HF_TOKEN=hf_xxx_your_access_token
  python build_and_deploy_nitda_rag.py --project nitda-rag --space-id nwamgbowo/nitda-rag --deploy

After deployment, open:
  https://huggingface.co/spaces/nwamgbowo/nitda-rag

Then, in the app UI, click "Initialize (build index + load model)" and ask questions.
"""

import os
import sys
import argparse
from pathlib import Path
from textwrap import dedent

# ----------------------------
# File contents
# ----------------------------
APP_PY = dedent(r'''
import os
import time
import shutil
import traceback
from typing import List

import gradio as gr

# Use LangChain community packages to avoid import drift
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma

from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import requests

# -----------------------------
# Config
# -----------------------------
DOCS_DIR = "data"       # where PDFs live inside the Space
DB_DIR = "nitda_db"     # Chroma persistence directory

TOP_K = 3
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 150
CTX_LEN = 2048

# Primary model: Mistral-7B (GPU recommended; CPU Spaces may OOM)
PRIMARY_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
PRIMARY_FILE = "mistral-7b-instruct-v0.2.Q6_K.gguf"
PRIMARY_PARAMS = dict(
    n_ctx=CTX_LEN,
    n_threads=os.cpu_count() or 4,
    n_batch=256,
    n_gpu_layers=int(os.getenv("LLM_N_GPU_LAYERS", "0")),  # set >0 on GPU Space
    verbose=False
)

# Fallback: TinyLlama (CPU-friendly, reliable on CPU Spaces)
FALLBACK_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
FALLBACK_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
FALLBACK_PARAMS = dict(
    n_ctx=CTX_LEN,
    n_threads=os.cpu_count() or 4,
    n_batch=128,
    n_gpu_layers=0,
    verbose=False
)

SYSTEM_MESSAGE = (
    "You are an AI assistant specialized in NITDA information retrieval. "
    "Answer strictly from the provided context (official NITDA documents). "
    "If the answer is not in the context, say you don't know."
)

QNA_TEMPLATE = """[SYSTEM]
{system}

[CONTEXT]
{context}

[USER QUESTION]
{question}

[ASSISTANT]
"""

# -----------------------------
# Auto-copy & seeding (STARTUP)
# -----------------------------
def list_pdfs(folder: str):
    os.makedirs(folder, exist_ok=True)
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(".pdf")]

def seed_data_from_urls_if_empty():
    """
    If data/ has no PDFs and SEED_PDF_URLS is set (comma-separated URLs),
    download those PDFs into data/.
    """
    os.makedirs(DOCS_DIR, exist_ok=True)
    existing = [f for f in os.listdir(DOCS_DIR) if f.lower().endswith(".pdf")]
    if existing:
        return 0

    urls = os.getenv("SEED_PDF_URLS", "").strip()
    if not urls:
        return 0

    count = 0
    for url in [u.strip() for u in urls.split(",") if u.strip()]:
        try:
            fname = os.path.basename(url.split("?")[0]) or "document.pdf"
            dst = os.path.join(DOCS_DIR, fname)
            r = requests.get(url, timeout=120)
            r.raise_for_status()
            with open(dst, "wb") as f:
                f.write(r.content)
            count += 1
            print(f"[seed] Downloaded: {dst}")
        except Exception as e:
            print(f"[seed] Failed to download {url}: {e}")
    return count

def ensure_data_ready_and_reset_index_if_changed():
    """
    - Create data/
    - Copy PDFs from repo root into data/ if missing there
    - Optionally seed from URLs if data/ is empty
    - If anything changed, delete nitda_db/ to force reindex
    """
    os.makedirs(DOCS_DIR, exist_ok=True)

    before = set(os.listdir(DOCS_DIR))
    copied = 0

    # Copy *.pdf from root into data/
    for fname in os.listdir("."):
        if fname.lower().endswith(".pdf"):
            src = os.path.join(".", fname)
            dst = os.path.join(DOCS_DIR, fname)
            if not os.path.exists(dst):
                try:
                    shutil.copy2(src, dst)
                    copied += 1
                    print(f"[init] Copied root PDF → {dst}")
                except Exception as e:
                    print(f"[init] Could not copy {src} to {dst}: {e}")

    seeded = seed_data_from_urls_if_empty()

    after = set(os.listdir(DOCS_DIR))
    changed = (copied > 0) or (seeded > 0) or (before != after)

    if changed and os.path.isdir(DB_DIR):
        try:
            shutil.rmtree(DB_DIR)
            print(f"[init] Removed old vector DB at {DB_DIR}/ (changed data/: {copied} copied, {seeded} seeded)")
        except Exception as e:
            print(f"[init] Could not remove {DB_DIR}/: {e}")

# Call once on import (top-level)
ensure_data_ready_and_reset_index_if_changed()

# -----------------------------
# Vector store builder/loader
# -----------------------------
def build_or_load_vectorstore():
    """Load existing Chroma DB if present; else build from PDFs in data/."""
    # Use persisted DB if present
    if os.path.isdir(DB_DIR) and os.listdir(DB_DIR):
        embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        return Chroma(persist_directory=DB_DIR, embedding_function=embeddings)

    pdfs = list_pdfs(DOCS_DIR)
    if not pdfs:
        raise FileNotFoundError(
            f"No PDFs found in '{DOCS_DIR}'. Upload PDFs to the 'data/' folder, "
            f"use the auto-copy (place PDFs in repo root), or set SEED_PDF_URLS."
        )

    # Load and chunk
    docs = []
    for p in pdfs:
        loader = PyMuPDFLoader(p)
        docs.extend(loader.load())

    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
    chunks = splitter.split_documents(docs)

    if not chunks:
        raise ValueError("No text chunks were generated from the PDFs. Are the files readable?")

    # Embed + persist
    embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vs = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=DB_DIR)
    vs.persist()
    return vs

# -----------------------------
# LLM loader (with fallback)
# -----------------------------
def load_llm():
    """
    Try to load primary (Mistral model). If it fails (OOM on CPU Space),
    fallback to TinyLlama automatically. You can force fallback by setting
    Space Variable USE_TINYLLAMA=1.
    """
    if os.getenv("USE_TINYLLAMA", "0") == "1":
        model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE)
        return Llama(model_path=model_path, **FALLBACK_PARAMS)

    try:
        model_path = hf_hub_download(repo_id=PRIMARY_REPO, filename=PRIMARY_FILE)
        return Llama(model_path=model_path, **PRIMARY_PARAMS)
    except Exception as e:
        print(f"[WARN] Primary model load failed: {e}. Falling back to TinyLlama.")
        model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE)
        return Llama(model_path=model_path, **FALLBACK_PARAMS)

def render_context(docs):
    parts = []
    for i, d in enumerate(docs, 1):
        meta = d.metadata or {}
        src = meta.get("source", "document")
        page = meta.get("page", None)
        tag = f"{src}" + (f" (page {page})" if page is not None else "")
        parts.append(f"[{i}] {tag}\n{d.page_content}")
    return "\n\n".join(parts)

def generate_answer(question, retriever, llm):
    if not question.strip():
        return "Please enter a question."
    try:
        hits = retriever.get_relevant_documents(question)
        if not hits:
            return "I couldn't find relevant context in the documents."
        context = render_context(hits)
        prompt = QNA_TEMPLATE.format(system=SYSTEM_MESSAGE, context=context, question=question.strip())

        out = llm(
            prompt=prompt,
            max_tokens=512,
            temperature=0.2,
            top_p=0.95,
            repeat_penalty=1.1,
            stop=["</s>", "[USER QUESTION]", "[SYSTEM]"]
        )
        return out.get("choices", [{}])[0].get("text", "").strip() or "The model returned no text."
    except Exception as e:
        return f"Error generating answer:\n{e}\n\n{traceback.format_exc()}"

# -----------------------------
# Gradio App (lazy init)
# -----------------------------
with gr.Blocks(title="NITDA RAG Assistant") as demo:
    gr.Markdown("## NITDA RAG Assistant\nAsk questions based on official NITDA documents in the `data/` folder.")

    retriever_state = gr.State(None)
    llm_state = gr.State(None)

    status = gr.Markdown("**Status:** Not initialized.")
    init_btn = gr.Button("Initialize (build index + load model)")

    def init_resources():
        t0 = time.time()
        vs = build_or_load_vectorstore()
        retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
        llm = load_llm()
        dt = time.time() - t0
        return retriever, llm, f"**Status:** Ready in {dt:.1f}s."

    init_btn.click(fn=lambda: init_resources(), inputs=None, outputs=[retriever_state, llm_state, status])

    q = gr.Textbox(label="Your question", placeholder="Ask about NITDA...", lines=2)
    a = gr.Markdown()
    ask_btn = gr.Button("Ask")

    def on_ask(question, retriever, llm):
        if retriever is None or llm is None:
            return "Please click **Initialize (build index + load model)** first."
        return generate_answer(question, retriever, llm)

    ask_btn.click(on_ask, inputs=[q, retriever_state, llm_state], outputs=[a])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)
''').strip() + "\n"

REQUIREMENTS_TXT = dedent(r'''
# UI
gradio==4.37.2

# LLM runtime
llama-cpp-python==0.2.60
huggingface_hub==0.23.5

# LangChain stable community integrations
langchain==0.1.16
langchain-community==0.0.34
langchain-text-splitters==0.0.1

# Vector DB + embeddings
chromadb==0.4.24
sentence-transformers==2.7.0

# PDF loader
pymupdf==1.23.26

# Utils
numpy==1.26.4
pandas==2.1.4
requests==2.32.3
''').strip() + "\n"

RUNTIME_TXT = "python-3.10\n"

DATA_README = dedent(r'''
# Data folder

Place your NITDA PDFs here. Example filenames:
- NITDA-ACT-2007-2019-Edition1.pdf
- Digital-Literacy-Framework.pdf
- FrameworkAndGuidelinesForPublicInternetAccessPIA1.pdf
- NATIONAL-REGULATORY-GUIDELINE-FOR-ELECTRONIC-INVOICING-IN-NIGERIA-2025.pdf
''').strip() + "\n"


def write_project(project_dir: Path):
    project_dir.mkdir(parents=True, exist_ok=True)
    (project_dir / "app.py").write_text(APP_PY, encoding="utf-8")
    (project_dir / "requirements.txt").write_text(REQUIREMENTS_TXT, encoding="utf-8")
    (project_dir / "runtime.txt").write_text(RUNTIME_TXT, encoding="utf-8")
    data_dir = project_dir / "data"
    data_dir.mkdir(parents=True, exist_ok=True)
    (data_dir / "README.md").write_text(DATA_README, encoding="utf-8")
    print(f"✅ Wrote project to: {project_dir.resolve()}")
    for p in ["app.py", "requirements.txt", "runtime.txt", "data/README.md"]:
        print("   -", project_dir / p)

def deploy_to_space(project_dir: Path, space_id: str, private: bool = False):
    """Deploy the folder to a Hugging Face Space (SDK: Gradio). Requires HF_TOKEN env var."""
    from huggingface_hub import HfApi, create_repo, login
    token = os.getenv("HF_TOKEN")
    if not token:
        raise RuntimeError("HF_TOKEN not set. Create a token at https://huggingface.co/settings/tokens and `export HF_TOKEN=...`")
    login(token=token)
    try:
        create_repo(repo_id=space_id, repo_type="space", space_sdk="gradio", private=private)
        print(f"🆕 Created Space: {space_id}")
    except Exception as e:
        print(f"ℹ️ Space exists or cannot be created: {e}")
    api = HfApi()
    api.upload_folder(
        folder_path=str(project_dir),
        repo_id=space_id,
        repo_type="space",
        commit_message="Deploy NITDA RAG",
        ignore_patterns=[".git", "__pycache__", "*.ipynb_checkpoints*"],
    )
    print(f"✅ Uploaded. Space: https://huggingface.co/spaces/{space_id}")
    print(f"   App URL: https://{space_id.replace('/', '-')}.hf.space")

def main():
    parser = argparse.ArgumentParser(description="Create and optionally deploy a NITDA RAG app to Hugging Face Spaces.")
    parser.add_argument("--project", required=True, help="Local project directory to create (e.g., nitda-rag)")
    parser.add_argument("--space-id", help="Hugging Face Space ID (e.g., nwamgbowo/nitda-rag)")
    parser.add_argument("--deploy", action="store_true", help="Upload the project to the specified Space")
    parser.add_argument("--private", action="store_true", help="Create the Space as private (default: public)")
    args = parser.parse_args()

    project_dir = Path(args.project).resolve()
    write_project(project_dir)

    if args.deploy:
        if not args.space_id:
            print("❌ --deploy requires --space-id (e.g., --space-id nwamgbowo/nitda-rag)")
            sys.exit(2)
        deploy_to_space(project_dir, args.space_id, private=args.private)
        print("\n🔔 After the Space is Running:")
        print("   1) Upload PDFs to the data/ folder (or rely on auto-copy from root / URL seeding).")
        print("   2) Click 'Initialize (build index + load model)'.")
        print("   3) Ask questions.")
        print("\n💡 CPU Space tip: If Mistral fails to load, set Space Variable USE_TINYLLAMA=1 to force TinyLlama.\n")
    else:
        print("\n🚀 To run locally:")
        print(f"   cd {project_dir}")
        print("   pip install -r requirements.txt")
        print("   python app.py")
        print("\n📌 Then open http://localhost:7860 and click 'Initialize (build index + load model)'.")
        print("📂 Put your PDFs under the data/ folder (or in repo root; auto-copy will handle it).")

if __name__ == "__main__":
    main()