Spaces:

ajayinsac
/

VMware2AzureLocal

Sleeping

App Files Files Community

ajayinsac commited on Sep 10, 2025

Commit

0b055a7

verified ·

1 Parent(s): 22f921e

Create app.py

Browse files

Files changed (1) hide show

app.py +674 -0

app.py ADDED Viewed

	@@ -0,0 +1,674 @@

+#!/usr/bin/env python3
+"""
+VMware On-Prem → Azure Local Migration Assistant (Gradio)
+Features
+- FAQ / approach Q&A with trusted-source citations (links)
+- Upload & index PDF/DOCX/TXT (session-local)
+- Lightweight RAG (TF-IDF over chunks)
+- Design/Runbook auto-review with rubric (0–5) + gaps + fixes
+- All Hugging Face Spaces friendly (no share=True, no GPU deps, no external APIs)
+Author: you
+"""
+import os
+import io
+import re
+import json
+import time
+from typing import List, Tuple, Dict, Any
+import gradio as gr
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+# -------- Optional, small footprint parsers --------
+# PDF
+try:
+    from pypdf import PdfReader
+except Exception:
+    PdfReader = None
+# DOCX
+try:
+    import docx
+except Exception:
+    docx = None
+# =========================
+# Trusted Sources (Allowlist)
+# =========================
+TRUSTED_SOURCES = [
+    # Microsoft Learn / Docs
+    ("Azure VMware Solution (AVS)", "https://learn.microsoft.com/azure/azure-vmware/"),
+    ("Azure Migrate", "https://learn.microsoft.com/azure/migrate/"),
+    ("Azure Stack HCI / Azure Local", "https://learn.microsoft.com/azure-stack/"),
+    ("Cloud Adoption Framework (CAF)", "https://learn.microsoft.com/azure/cloud-adoption-framework/"),
+    ("Azure Well-Architected Framework (WAF)", "https://learn.microsoft.com/azure/well-architected/"),
+    # VMware
+    ("VMware HCX Docs", "https://docs.vmware.com/en/VMware-HCX/"),
+    ("VMware vSphere Docs", "https://docs.vmware.com/en/VMware-vSphere/index.html"),
+    # Security & Compliance
+    ("NIST SP 800-53", "https://csrc.nist.gov/publications/sp800-53"),
+    ("FedRAMP Baselines", "https://www.fedramp.gov/"),
+    ("IRS Publication 1075 (FTI)", "https://www.irs.gov/pub/irs-pdf/p1075.pdf"),
+]
+# =========================
+# Ontology (Domains/Subdomains)
+# =========================
+ONTOLOGY = {
+    "Assessment": ["Inventory", "Dependencies", "Performance", "Criticality", "Readiness"],
+    "Architecture": ["Landing Zone", "Azure Local Footprint", "AVS", "Environments"],
+    "Networking": ["ExpressRoute", "VPN", "IP Plan", "DNS", "Load Balancing", "Private Link", "HCX Network"],
+    "Identity": ["Entra ID", "AD DS", "PIM", "MFA", "RBAC", "Break-Glass"],
+    "Migration": ["HCX", "Azure Migrate", "Cutover", "Rollback", "Data Sync"],
+    "Data": ["Storage", "Backup", "Snapshots", "Immutability", "Residency"],
+    "Security": ["Defender", "Sentinel", "Policy", "Purview", "Key Vault"],
+    "DR": ["ASR", "Failover", "RTO/RPO", "Runbooks", "Tests"],
+    "Ops": ["Monitor", "Log Analytics", "Patching", "Change Mgmt", "ITIL"],
+    "Cost": ["Right-Sizing", "Reservations", "Tagging", "Budgets"],
+    "Program": ["RAID", "Comms", "Training", "RACI", "Gates"],
+    "Troubleshooting": ["HCX Failures", "DNS Drift", "Identity Tokens", "Latency"],
+}
+# =========================
+# Heuristic Design Checks (keywords → rubric mapping)
+# =========================
+CHECKS = {
+    "security": {
+        "weight": 1.0,
+        "keywords": [
+            "Defender for Cloud", "Microsoft Defender", "Sentinel", "Key Vault", "encryption",
+            "TLS", "KMS", "HSM", "Just-In-Time", "JIT", "PIM", "MFA", "Conditional Access",
+            "Azure Policy", "Purview", "classification", "DLP", "RBAC", "least privilege"
+        ],
+        "controls": ["NIST-AC-2", "NIST-SC-13", "IRS1075 §9.3"]
+    },
+    "reliability": {
+        "weight": 1.0,
+        "keywords": [
+            "Availability Zone", "zonal", "ASR", "Site Recovery", "backup", "failover",
+            "failback", "DR drill", "runbook", "immutable", "soft delete", "RTO", "RPO"
+        ],
+    },
+    "performance": {
+        "weight": 1.0,
+        "keywords": [
+            "right-size", "IOPS", "latency", "throughput", "benchmark", "autoscale",
+            "SKU", "Managed Disks", "Premium SSD", "Ultra", "Standard SSD"
+        ],
+    },
+    "operations": {
+        "weight": 1.0,
+        "keywords": [
+            "Azure Monitor", "Log Analytics", "alerts", "workbooks", "patch", "change management",
+            "incident", "problem", "request", "ITIL", "configuration drift"
+        ],
+    },
+    "cost": {
+        "weight": 1.0,
+        "keywords": [
+            "reservation", "Reserved Instances", "Savings Plan", "spot",
+            "tagging", "chargeback", "showback", "budget", "cost anomaly"
+        ],
+    },
+    "networking": {
+        "weight": 1.0,
+        "keywords": [
+            "ExpressRoute", "ER", "VPN", "BGP", "MTU", "NSG", "ASG", "UDR", "Private Link",
+            "DNS", "DHCP", "load balancer", "hub and spoke", "landing zone network"
+        ],
+    },
+    "identity": {
+        "weight": 1.0,
+        "keywords": [
+            "Entra ID", "Azure AD", "Active Directory", "domain trust", "AADDS",
+            "Conditional Access", "PIM", "break-glass", "least privilege"
+        ],
+    },
+    "migration": {
+        "weight": 1.0,
+        "keywords": [
+            "HCX", "vMotion", "RAV", "Azure Migrate", "replication", "Mobility Group",
+            "cutover", "rollback", "pilot", "wave"
+        ],
+    },
+    "architecture": {
+        "weight": 1.0,
+        "keywords": [
+            "Landing Zone", "hub", "spoke", "policy", "RBAC", "naming",
+            "AVS", "Azure Local", "Azure Stack HCI", "Local Zone"
+        ],
+    },
+}
+# =========================
+# FAQ seeds (concise, cite trusted links)
+# =========================
+FAQ_SEEDS = [
+    {
+        "q": "How do we migrate VMware workloads to Azure Local?",
+        "a": (
+            "Typical paths are **Azure VMware Solution (AVS)** with **HCX** (bulk/RAV/vMotion) or "
+            "**Azure Migrate** for discovery, assessment, and server/db/web migration. "
+            "Establish a governed **Landing Zone** (hub/spoke, Policy, RBAC), plan ExpressRoute/VPN, "
+            "pilot a few VMs, then cut over in waves with rollback plans. "
+            "See AVS, Azure Migrate, and CAF for prescriptive guidance."
+        ),
+        "refs": ["Azure VMware Solution (AVS)", "Azure Migrate", "Cloud Adoption Framework (CAF)"]
+    },
+    {
+        "q": "What downtime should we expect?",
+        "a": (
+            "Depends on method and app architecture. **HCX vMotion** can provide minimal downtime; "
+            "**HCX RAV** and **bulk migration** usually require short cutover windows. "
+            "Always pilot, measure replication lag, and agree on a timeboxed backout."
+        ),
+        "refs": ["VMware HCX Docs"]
+    },
+    {
+        "q": "How do we meet IRS Pub 1075 and NIST controls?",
+        "a": (
+            "Map design controls to frameworks: enforce least privilege (RBAC/PIM/MFA), "
+            "encrypt at rest/in transit (Key Vault/HSM, TLS), centralize telemetry (Sentinel), "
+            "and document evidence (policies, runbooks, DR tests). Use CAF/WAF security pillars."
+        ),
+        "refs": ["IRS Publication 1075 (FTI)", "NIST SP 800-53", "Azure Well-Architected Framework (WAF)"]
+    },
+    {
+        "q": "ExpressRoute or VPN?",
+        "a": (
+            "**ExpressRoute** is preferred for predictable performance and private connectivity; "
+            "VPN is fine for initial testing or lower-throughput needs. Many designs use both "
+            "for redundancy and phased cutover."
+        ),
+        "refs": ["Cloud Adoption Framework (CAF)"]
+    },
+]
+# =========================
+# Utilities: text extraction & chunking
+# =========================
+def extract_text_from_pdf(fileobj: io.BytesIO) -> str:
+    if PdfReader is None:
+        return ""
+    try:
+        reader = PdfReader(fileobj)
+        parts = []
+        for page in reader.pages:
+            txt = page.extract_text() or ""
+            parts.append(txt)
+        return "\n".join(parts)
+    except Exception:
+        return ""
+def extract_text_from_docx(fileobj: io.BytesIO) -> str:
+    if docx is None:
+        return ""
+    try:
+        document = docx.Document(fileobj)
+        return "\n".join([p.text for p in document.paragraphs])
+    except Exception:
+        return ""
+def extract_text_from_txt(fileobj: io.BytesIO) -> str:
+    try:
+        return fileobj.read().decode("utf-8", errors="ignore")
+    except Exception:
+        return ""
+def read_file_to_text(file: gr.File) -> Tuple[str, str]:
+    """
+    Returns (text, filename)
+    """
+    if file is None:
+        return "", ""
+    name = os.path.basename(file.name) if file.name else "uploaded"
+    with open(file.name, "rb") as f:
+        raw = f.read()
+    ext = (name.split(".")[-1] or "").lower()
+    bio = io.BytesIO(raw)
+    if ext in ["pdf"]:
+        txt = extract_text_from_pdf(bio)
+    elif ext in ["docx"]:
+        txt = extract_text_from_docx(bio)
+    elif ext in ["txt"]:
+        txt = extract_text_from_txt(bio)
+    else:
+        txt = ""
+    return txt, name
+def chunk_text(text: str, max_len: int = 900, overlap: int = 120) -> List[str]:
+    """
+    Simple sliding window chunker by characters; robust and fast.
+    """
+    text = re.sub(r"\s+", " ", text).strip()
+    chunks = []
+    i = 0
+    n = len(text)
+    while i < n:
+        j = min(i + max_len, n)
+        chunk = text[i:j]
+        if chunk:
+            chunks.append(chunk)
+        i = j - overlap
+        if i < 0:
+            i = 0
+        if i >= n:
+            break
+    return chunks
+# =========================
+# RAG Index (session-scoped)
+# =========================
+def build_index(files: List[gr.File]) -> Tuple[Any, Any, Any]:
+    """
+    Build a TF-IDF vectorizer over all chunks from uploaded documents.
+    Returns: (vectorizer, matrix, chunks_with_meta)
+    """
+    all_chunks = []
+    meta = []
+    if not files:
+        return None, None, None
+    for f in files:
+        txt, fname = read_file_to_text(f)
+        if not txt.strip():
+            continue
+        chunks = chunk_text(txt)
+        for c in chunks:
+            all_chunks.append(c)
+            meta.append({"file": os.path.basename(f.name), "snippet": c[:120] + ("..." if len(c) > 120 else "")})
+    if not all_chunks:
+        return None, None, None
+    vectorizer = TfidfVectorizer(stop_words="english", max_features=25000)
+    X = vectorizer.fit_transform(all_chunks)
+    return vectorizer, X, [{"text": t, **m} for t, m in zip(all_chunks, meta)]
+def retrieve_answer(
+    query: str,
+    vectorizer: Any,
+    matrix: Any,
+    corpus: List[Dict[str, str]],
+    k: int = 4
+) -> Tuple[str, List[Dict[str, str]]]:
+    """
+    Return synthesized answer + top-k supporting chunks with filenames.
+    """
+    if not query or vectorizer is None or matrix is None or not corpus:
+        return "", []
+    qv = vectorizer.transform([query])
+    sims = cosine_similarity(qv, matrix).ravel()
+    top_idx = sims.argsort()[::-1][:k]
+    snippets = []
+    for i in top_idx:
+        item = corpus[i]
+        snippets.append({
+            "file": item["file"],
+            "relevance": float(sims[i]),
+            "excerpt": item["text"][:500] + ("..." if len(item["text"]) > 500 else "")
+        })
+    # Simple synthesis: bullet list of the top excerpts + a short summary hint.
+    answer = "Here are the most relevant excerpts from your uploaded documents:\n\n"
+    for s in snippets:
+        answer += f"- **{s['file']}** (relevance {s['relevance']:.2f}): {s['excerpt']}\n\n"
+    answer += "Tip: Ask a follow-up like “Summarize the cutover plan” or “List missing security controls.”"
+    return answer, snippets
+# =========================
+# Design / Runbook Auto-Review
+# =========================
+def score_text_against_checks(text: str) -> Tuple[Dict[str, float], List[Dict[str, str]]]:
+    """
+    Returns per-pillar scores (0..5) and a list of gaps with fixes.
+    Very simple keyword coverage approach + gap heuristics.
+    """
+    text_low = text.lower()
+    pillar_scores = {}
+    gaps = []
+    for pillar, cfg in CHECKS.items():
+        hits = 0
+        kws = cfg["keywords"]
+        for kw in kws:
+            if kw.lower() in text_low:
+                hits += 1
+        coverage = hits / max(1, len(kws))
+        score = round(min(5.0, 5.0 * (0.3 + 0.7 * coverage)), 2)  # baseline 1.5, up to 5.0
+        pillar_scores[pillar] = score
+        # naive gap examples:
+        if pillar == "networking":
+            if "expressroute".lower() not in text_low and "er " not in text_low:
+                gaps.append({
+                    "id": "NET-ER-001",
+                    "severity": "High",
+                    "desc": "ExpressRoute (ER) not referenced; consider ER for predictable private connectivity.",
+                    "fix": "Design dual ER circuits with diverse POPs; fall back to VPN during pilot."
+                })
+            if "dns" not in text_low:
+                gaps.append({
+                    "id": "NET-DNS-002",
+                    "severity": "Med",
+                    "desc": "DNS plan not mentioned; risk of name resolution drift post-cutover.",
+                    "fix": "Document forwarders/zones, conditional forwarding, and DNS cutover sequencing."
+                })
+            if "mtu" not in text_low and "hcx" in text_low:
+                gaps.append({
+                    "id": "NET-MTU-003",
+                    "severity": "Med",
+                    "desc": "HCX present but MTU tuning not referenced.",
+                    "fix": "Validate path MTU for HCX tunnels; align NSX/physical network settings."
+                })
+        if pillar == "identity":
+            if "pim" not in text_low:
+                gaps.append({
+                    "id": "ID-PIM-004",
+                    "severity": "Med",
+                    "desc": "No mention of Privileged Identity Management (PIM).",
+                    "fix": "Enable PIM for admin roles; require approvals/justification; enforce MFA."
+                })
+            if "break-glass" not in text_low:
+                gaps.append({
+                    "id": "ID-BG-005",
+                    "severity": "Low",
+                    "desc": "No break-glass account reference.",
+                    "fix": "Create monitored break-glass accounts with strong controls and regular review."
+                })
+        if pillar == "security":
+            if "key vault" not in text_low and "hsm" not in text_low:
+                gaps.append({
+                    "id": "SEC-KEY-006",
+                    "severity": "High",
+                    "desc": "Key management not described.",
+                    "fix": "Use Azure Key Vault (HSM-backed if needed); rotate secrets/keys; restrict access via RBAC."
+                })
+            if "sentinel" not in text_low:
+                gaps.append({
+                    "id": "SEC-SIEM-007",
+                    "severity": "Med",
+                    "desc": "SIEM not referenced.",
+                    "fix": "Onboard to Microsoft Sentinel; define data connectors and incident processes."
+                })
+            if "policy" not in text_low:
+                gaps.append({
+                    "id": "SEC-POL-008",
+                    "severity": "Med",
+                    "desc": "Azure Policy governance not mentioned.",
+                    "fix": "Attach ALZ policies/initiatives for guardrails (encryption, tags, allowed locations, SKUs)."
+                })
+        if pillar == "reliability":
+            if ("asr" not in text_low) and ("site recovery" not in text_low):
+                gaps.append({
+                    "id": "REL-ASR-009",
+                    "severity": "Med",
+                    "desc": "No DR replication tool referenced.",
+                    "fix": "Use Azure Site Recovery (ASR) or HCX DR for failover/failback; schedule DR drills."
+                })
+            if "backup" not in text_low and "recovery services vault" not in text_low:
+                gaps.append({
+                    "id": "REL-BKP-010",
+                    "severity": "High",
+                    "desc": "Backup strategy not captured.",
+                    "fix": "Configure Azure Backup with immutable storage and soft delete; test restores."
+                })
+            if ("rto" not in text_low) or ("rpo" not in text_low):
+                gaps.append({
+                    "id": "REL-RTORPO-011",
+                    "severity": "Med",
+                    "desc": "RTO/RPO targets not documented.",
+                    "fix": "Define business-aligned RTO/RPO and validate during pilot/cutover."
+                })
+        if pillar == "architecture":
+            if ("landing zone" not in text_low) and ("landing-zone" not in text_low):
+                gaps.append({
+                    "id": "ARC-ALZ-012",
+                    "severity": "High",
+                    "desc": "Azure Landing Zone baseline not referenced.",
+                    "fix": "Adopt ALZ (hub/spoke, Policy, RBAC, logging) before migration waves."
+                })
+        if pillar == "migration":
+            if ("rollback" not in text_low) and ("backout" not in text_low):
+                gaps.append({
+                    "id": "MIG-ROLL-013",
+                    "severity": "High",
+                    "desc": "Rollback/backout path not documented.",
+                    "fix": "Document clear backout steps and timebox for each wave; test in pilot."
+                })
+            if "pilot" not in text_low:
+                gaps.append({
+                    "id": "MIG-PILOT-014",
+                    "severity": "Med",
+                    "desc": "No pilot mentioned.",
+                    "fix": "Execute a pilot with representative workloads; capture metrics and lessons."
+                })
+        if pillar == "cost":
+            if "tag" not in text_low:
+                gaps.append({
+                    "id": "COST-TAG-015",
+                    "severity": "Med",
+                    "desc": "Tagging strategy absent (owner, env, app).",
+                    "fix": "Enforce tags via Policy; enable showback/chargeback and budgets."
+                })
+    # Overall score = average of pillars
+    if pillar_scores:
+        overall = round(sum(pillar_scores.values()) / len(pillar_scores), 2)
+    else:
+        overall = 0.0
+    # Insert an overall summary as the first "gap" entry if overall < 3.5
+    if overall < 3.5:
+        gaps.insert(0, {
+            "id": "SUMMARY",
+            "severity": "Info",
+            "desc": f"Overall score is {overall}. Focus first on High-severity gaps.",
+            "fix": "Prioritize ER/DNS/Backup/ALZ/PIM/Key Vault where missing; re-run the check after updates."
+        })
+    return {"overall": overall, **pillar_scores}, gaps
+def review_uploaded_docs(files: List[gr.File]) -> Tuple[str, Dict[str, Any], List[List[str]]]:
+    """
+    Aggregate text from uploaded docs, run heuristic review, and return:
+    - markdown summary
+    - json result
+    - table rows for Gaps (id, severity, description, fix)
+    """
+    if not files:
+        return "Please upload at least one PDF/DOCX/TXT.", {}, []
+    text_full = []
+    file_list = []
+    for f in files:
+        txt, fname = read_file_to_text(f)
+        if txt.strip():
+            text_full.append(txt)
+            file_list.append(os.path.basename(f.name))
+    if not text_full:
+        return "Could not parse text from the provided files.", {}, []
+    combined = "\n\n".join(text_full)
+    scores, gaps = score_text_against_checks(combined)
+    md = f"### Design/Runbook Review\n"
+    md += f"**Files analyzed:** {', '.join(file_list)}\n\n"
+    md += f"**Overall Score:** {scores['overall']} / 5.0\n\n"
+    md += "**Per-Pillar Scores:**\n\n"
+    for k, v in scores.items():
+        if k == "overall":
+            continue
+        md += f"- **{k.capitalize()}**: {v}\n"
+    md += "\n**Top Recommendations:**\n"
+    for g in gaps[:6]:
+        md += f"- ({g['severity']}) **{g['id']}** — {g['desc']} → _{g['fix']}_\n"
+    # JSON + table
+    result_json = {
+        "timestamp": int(time.time()),
+        "files": file_list,
+        "scores": scores,
+        "gaps": gaps
+    }
+    table_rows = [[g["id"], g["severity"], g["desc"], g["fix"]] for g in gaps]
+    return md, result_json, table_rows
+# =========================
+# Q&A Logic
+# =========================
+def list_refs(ref_names: List[str]) -> str:
+    links = []
+    for nm in ref_names:
+        hit = [x for x in TRUSTED_SOURCES if x[0] == nm]
+        if hit:
+            links.append(f"[{nm}]({hit[0][1]})")
+    return " | ".join(links)
+def answer_faq_or_approach(
+    question: str,
+    use_uploaded_docs: bool,
+    vectorizer: Any,
+    matrix: Any,
+    corpus: List[Dict[str, str]]
+) -> str:
+    q = (question or "").strip()
+    if not q:
+        return "Please enter a question."
+    # First try seeded FAQs (very light semantic: keyword match)
+    for item in FAQ_SEEDS:
+        if all(w.lower() in q.lower() for w in re.findall(r"\w+", item["q"])[:3]):
+            return f"{item['a']}\n\n**Trusted sources:** {list_refs(item['refs'])}"
+    # If requested, try RAG on uploaded docs
+    if use_uploaded_docs and vectorizer is not None and matrix is not None and corpus:
+        rag_answer, _snips = retrieve_answer(q, vectorizer, matrix, corpus, k=4)
+        if rag_answer.strip():
+            # Always append trusted sources list for user orientation
+            refs = list_refs(["Azure VMware Solution (AVS)", "Azure Migrate", "Cloud Adoption Framework (CAF)"])
+            return f"{rag_answer}\n\n**Trusted sources:** {refs}"
+    # Fallback generic approach with citations
+    generic = (
+        "**Suggested approach:**\n"
+        "1) Confirm **Landing Zone** (hub/spoke, Policy, RBAC, logging).\n"
+        "2) Establish **ExpressRoute/VPN** and DNS plans; validate MTU if using **HCX**.\n"
+        "3) Run **Azure Migrate** discovery/assessment; classify (rehost/refactor/modernize).\n"
+        "4) Pilot 2–3 VMs; choose **HCX (bulk/RAV/vMotion)** or **Azure Migrate** for cutover.\n"
+        "5) Define **RTO/RPO**, backup, and **ASR**/DR drills; document rollback.\n"
+        "6) Onboard to **Defender/Sentinel**, enforce **Key Vault** and **PIM/MFA**.\n"
+        "7) Optimize cost (right-size, reservations) and tag everything.\n"
+    )
+    refs = list_refs([
+        "Azure VMware Solution (AVS)",
+        "Azure Migrate",
+        "Cloud Adoption Framework (CAF)",
+        "Azure Well-Architected Framework (WAF)",
+        "VMware HCX Docs"
+    ])
+    return f"{generic}\n**Trusted sources:** {refs}"
+# =========================
+# Gradio UI
+# =========================
+with gr.Blocks(title="VMware → Azure Local Migration Assistant") as demo:
+    gr.Markdown(
+        "# VMware On-Prem → Azure Local Migration Assistant\n"
+        "Ask questions, upload migration/design documents for review, and get recommendations.\n"
+        "_Sources: Microsoft Learn/Docs, VMware Docs, NIST, IRS Pub 1075 (linked below)._"
+    )
+    # Session state for RAG
+    st_vectorizer = gr.State(None)
+    st_matrix = gr.State(None)
+    st_corpus = gr.State(None)
+    with gr.Tabs():
+        with gr.Tab("Ask Anything"):
+            with gr.Row():
+                question = gr.Textbox(
+                    label="Your question (FAQs, approach, troubleshooting)",
+                    placeholder="e.g., How do I plan a pilot with HCX RAV and ensure minimal downtime?"
+                )
+            use_docs = gr.Checkbox(label="Also search my uploaded documents (if any)", value=True)
+            ask_btn = gr.Button("Answer")
+            answer_box = gr.Markdown()
+        with gr.Tab("Upload & Review Design"):
+            gr.Markdown("Upload **PDF / DOCX / TXT** (multiple allowed). Then build the index and/or run a review.")
+            files = gr.File(file_count="multiple", file_types=[".pdf", ".docx", ".txt"], label="Upload documents")
+            with gr.Row():
+                build_btn = gr.Button("Build/Refresh Search Index")
+                review_btn = gr.Button("Run Design/Runbook Review")
+            index_info = gr.Markdown()
+            review_md = gr.Markdown()
+            review_json = gr.JSON()
+            gaps_table = gr.Dataframe(
+                headers=["Gap ID", "Severity", "Description", "Fix"],
+                datatype=["str", "str", "str", "str"],
+                interactive=False,
+                label="Gaps & Recommendations"
+            )
+        with gr.Tab("Trusted Sources & Ontology"):
+            gr.Markdown("### Trusted / Authoritative Sources (Allow-list)")
+            # Render links
+            links_md = "\n".join([f"- [{nm}]({url})" for nm, url in TRUSTED_SOURCES])
+            gr.Markdown(links_md)
+            gr.Markdown("### Knowledge Taxonomy (Domains → Subdomains)")
+            onto_str = ""
+            for dom, subs in ONTOLOGY.items():
+                onto_str += f"- **{dom}**: {', '.join(subs)}\n"
+            gr.Markdown(onto_str)
+            gr.Markdown(
+                "### Notes\n"
+                "- This app does **not** call external APIs. Use the links above for deep-dives into official guidance.\n"
+                "- Design checks are heuristic; always validate against your Architecture Board and security teams."
+            )
+    # ====== Wiring ======
+    def on_build_index(files_list):
+        vec, X, cor = build_index(files_list)
+        if vec is None:
+            return (gr.update(value="No text could be extracted. Make sure files are PDF/DOCX/TXT."),
+                    None, None, None)
+        msg = f"Indexed {len(cor)} chunks from {len(files_list)} file(s). You can now toggle 'Also search my uploaded documents' in the Ask Anything tab."
+        return msg, vec, X, cor
+    build_btn.click(
+        on_build_index,
+        inputs=[files],
+        outputs=[index_info, st_vectorizer, st_matrix, st_corpus]
+    )
+    def on_review(files_list):
+        md, js, table = review_uploaded_docs(files_list)
+        return md, js, table
+    review_btn.click(
+        on_review,
+        inputs=[files],
+        outputs=[review_md, review_json, gaps_table]
+    )
+    ask_btn.click(
+        answer_faq_or_approach,
+        inputs=[question, use_docs, st_vectorizer, st_matrix, st_corpus],
+        outputs=[answer_box]
+    )
+# Standard HF Spaces entrypoint
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))