Spaces:

nwamgbowo
/

RAG-Assistant

Running

App Files Files Community

nwamgbowo commited on about 9 hours ago

Commit

94f335b

verified ·

1 Parent(s): 26a7ff5

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +252 -33

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,259 @@
-import altair as alt
-import numpy as np
-import pandas as pd
-import streamlit as st
 """
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
 """
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
     ))

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
 """
+build_and_deploy_nitda_rag.py
+Creates a Space-ready NITDA RAG project (Gradio app) and optionally uploads it to Hugging Face Spaces.
+Usage examples:
+  # 1) Just create the project locally
+  python build_and_deploy_nitda_rag.py --project nitda-rag
+  # 2) Create + Deploy (requires HF_TOKEN env var with write access)
+  export HF_TOKEN=hf_xxx_your_access_token
+  python build_and_deploy_nitda_rag.py --project nitda-rag --space-id nwamgbowo/nitda-rag --deploy
+After deployment, open:
+  https://huggingface.co/spaces/nwamgbowo/nitda-rag
+Then, in the app UI, click "Initialize (build index + load model)" and ask questions.
 """
+import os
+import sys
+import argparse
+from pathlib import Path
+from textwrap import dedent
+# ----------------------------
+# File contents
+# ----------------------------
+APP_PY = dedent(r'''
+import os
+import time
+import traceback
+from typing import List
+import gradio as gr
+# Use LangChain community packages to avoid import drift
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_community.embeddings import SentenceTransformerEmbeddings
+from langchain_community.vectorstores import Chroma
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# -----------------------------
+# Config
+# -----------------------------
+DOCS_DIR = "data"       # where PDFs live inside the Space
+DB_DIR = "nitda_db"     # Chroma persistence directory
+TOP_K = 3
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 150
+CTX_LEN = 2048
+# Primary model: Mistral-7B (GPU recommended; CPU Spaces may OOM)
+PRIMARY_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+PRIMARY_FILE = "mistral-7b-instruct-v0.2.Q6_K.gguf"
+PRIMARY_PARAMS = dict(
+    n_ctx=CTX_LEN,
+    n_threads=os.cpu_count() or 4,
+    n_batch=256,
+    n_gpu_layers=int(os.getenv("LLM_N_GPU_LAYERS", "0")),  # set >0 on GPU Space
+    verbose=False
+)
+# Fallback: TinyLlama (CPU-friendly, reliable on CPU Spaces)
+FALLBACK_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+FALLBACK_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+FALLBACK_PARAMS = dict(
+    n_ctx=CTX_LEN,
+    n_threads=os.cpu_count() or 4,
+    n_batch=128,
+    n_gpu_layers=0,
+    verbose=False
+)
+SYSTEM_MESSAGE = (
+    "You are an AI assistant specialized in NITDA information retrieval. "
+    "Answer strictly from the provided context (official NITDA documents). "
+    "If the answer is not in the context, say you don't know."
+)
+QNA_TEMPLATE = """[SYSTEM]
+{system}
+[CONTEXT]
+{context}
+[USER QUESTION]
+{question}
+[ASSISTANT]
+"""
+# -----------------------------
+# Helpers
+# -----------------------------
+def list_pdfs(folder: str):
+    os.makedirs(folder, exist_ok=True)
+    return [os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(".pdf")]
+def build_or_load_vectorstore():
+    """Load existing Chroma DB if present; else build from PDFs in data/."""
+    if os.path.isdir(DB_DIR) and os.listdir(DB_DIR):
+        embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        return Chroma(persist_directory=DB_DIR, embedding_function=embeddings)
+    pdfs = list_pdfs(DOCS_DIR)
+    if not pdfs:
+        raise FileNotFoundError(f"No PDFs found in '{DOCS_DIR}'. Upload your PDFs to the 'data/' folder.")
+    docs = []
+    for p in pdfs:
+        loader = PyMuPDFLoader(p)
+        docs.extend(loader.load())
+    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
+    chunks = splitter.split_documents(docs)
+    embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    vs = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=DB_DIR)
+    vs.persist()
+    return vs
+def load_llm():
+    """
+    Try to load primary (Mistral model). If it fails (OOM on CPU Space),
+    fallback to TinyLlama automatically. You can force fallback by setting
+    Space Variable USE_TINYLLAMA=1.
+    """
+    if os.getenv("USE_TINYLLAMA", "0") == "1":
+        model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE)
+        return Llama(model_path=model_path, **FALLBACK_PARAMS)
+    try:
+        model_path = hf_hub_download(repo_id=PRIMARY_REPO, filename=PRIMARY_FILE)
+        return Llama(model_path=model_path, **PRIMARY_PARAMS)
+    except Exception as e:
+        print(f"[WARN] Primary model load failed: {e}. Falling back to TinyLlama.")
+        model_path = hf_hub_download(repo_id=FALLBACK_REPO, filename=FALLBACK_FILE)
+        return Llama(model_path=model_path, **FALLBACK_PARAMS)
+def render_context(docs):
+    parts = []
+    for i, d in enumerate(docs, 1):
+        meta = d.metadata or {}
+        src = meta.get("source", "document")
+        page = meta.get("page", None)
+        tag = f"{src}" + (f" (page {page})" if page is not None else "")
+        parts.append(f"[{i}] {tag}\n{d.page_content}")
+    return "\n\n".join(parts)
+def generate_answer(question, retriever, llm):
+    if not question.strip():
+        return "Please enter a question."
+    try:
+        hits = retriever.get_relevant_documents(question)
+        if not hits:
+            return "I couldn't find relevant context in the documents."
+        context = render_context(hits)
+        prompt = QNA_TEMPLATE.format(system=SYSTEM_MESSAGE, context=context, question=question.strip())
+        out = llm(
+            prompt=prompt,
+            max_tokens=512,
+            temperature=0.2,
+            top_p=0.95,
+            repeat_penalty=1.1,
+            stop=["</s>", "[USER QUESTION]", "[SYSTEM]"]
+        )
+        return out.get("choices", [{}])[0].get("text", "").strip() or "The model returned no text."
+    except Exception as e:
+        return f"Error generating answer:\n{e}\n\n{traceback.format_exc()}"
+# -----------------------------
+# Gradio App (lazy init)
+# -----------------------------
+with gr.Blocks(title="NITDA RAG Assistant") as demo:
+    gr.Markdown("## NITDA RAG Assistant\nAsk questions based on official NITDA documents in the `data/` folder.")
+    retriever_state = gr.State(None)
+    llm_state = gr.State(None)
+    status = gr.Markdown("**Status:** Not initialized.")
+    init_btn = gr.Button("Initialize (build index + load model)")
+    def init_resources():
+        t0 = time.time()
+        vs = build_or_load_vectorstore()
+        retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
+        llm = load_llm()
+        dt = time.time() - t0
+        return retriever, llm, f"**Status:** Ready in {dt:.1f}s."
+    init_btn.click(fn=lambda: init_resources(), inputs=None, outputs=[retriever_state, llm_state, status])
+    q = gr.Textbox(label="Your question", placeholder="Ask about NITDA...", lines=2)
+    a = gr.Markdown()
+    ask_btn = gr.Button("Ask")
+    def on_ask(question, retriever, llm):
+        if retriever is None or llm is None:
+            return "Please click **Initialize (build index + load model)** first."
+        return generate_answer(question, retriever, llm)
+    ask_btn.click(on_ask, inputs=[q, retriever_state, llm_state], outputs=[a])
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+''').strip() + "\n"
+REQUIREMENTS_TXT = dedent(r'''
+# UI
+gradio==4.37.2
+# LLM runtime
+llama-cpp-python==0.2.60
+huggingface_hub==0.23.5
+# LangChain stable community integrations
+langchain==0.1.16
+langchain-community==0.0.34
+langchain-text-splitters==0.0.1
+# Vector DB + embeddings
+chromadb==0.4.24
+sentence-transformers==2.7.0
+# PDF loader
+pymupdf==1.23.26
+# Utils
+numpy==1.26.4
+pandas==2.1.4
+''').strip() + "\n"
+RUNTIME_TXT = "python-3.10\n"
+DATA_README = dedent(r'''
+# Data folder
+Place your NITDA PDFs here. Example filenames:
+python build_and_deploy_nitda_rag.py \
+  --space-id nwamgbowo/nitda-rag \
+  --pdf "/path/to/NITDA-ACT-2007-2019-Edition1.pdf" \
+  --pdf "/path/to/Digital-Literacy-Framework.pdf" \
+  --pdf "/path/to/FrameworkAndGuidelinesForPublicInternetAccessPIA1.pdf" \
+  --pdf "/path/to/NATIONAL-REGULATORY-GUIDELINE-FOR-ELECTRONIC-INVOICING-IN-NIGERIA-2025.pdf"
+''').strip() + "\n"
     ))