Spaces:

ajayinsac
/

VMware2AzureLocal

Sleeping

App Files Files Community

ajayinsac commited on Sep 11, 2025

Commit

5038afa

verified ·

1 Parent(s): 6660a36

Update app.py

Browse files

Files changed (1) hide show

app.py +172 -282

app.py CHANGED Viewed

@@ -5,7 +5,7 @@
 VMware On-Prem → Azure Local Migration Assistant (Gradio)
 - Upload design/migration docs (PDF/DOCX/TXT/MD).
 - Ask questions; get reliable, detailed, and relevant answers.
-- Intent-aware (definitions, how-tos, comparisons, plans), topic-aware (sdn/migration/dr/security/cost).
 - No external APIs. No scikit-learn.
 Run locally:
@@ -18,7 +18,7 @@ import io
 import re
 import math
 from typing import List, Tuple, Dict, Any
-from collections import Counter, defaultdict
 import gradio as gr
@@ -44,7 +44,7 @@ TRUSTED_SOURCES: List[Tuple[str, str]] = [
     # Core guidance
     ("Cloud Adoption Framework (CAF)", "https://learn.microsoft.com/azure/cloud-adoption-framework/"),
     ("Azure Well-Architected Framework (WAF)", "https://learn.microsoft.com/azure/architecture/framework/"),
-    # Networking
     ("Azure Virtual Network", "https://learn.microsoft.com/azure/virtual-network/"),
     ("Azure SDN concepts (HCI)", "https://learn.microsoft.com/azure-stack/hci/concepts/software-defined-networking"),
     ("Azure Arc (overview)", "https://learn.microsoft.com/azure/azure-arc/"),
@@ -110,7 +110,7 @@ def list_refs(ref_names: List[str]) -> str:
 # =========================
-# Intent & Topic detection
 # =========================
 _DEF_RE = re.compile(r"^\s*(what\s+is|what's|define|explain|tell\s+me\s+about)\b", re.I)
@@ -149,7 +149,7 @@ def topic_refs(topic: str) -> List[str]:
 # =========================
-# Tiny TF-IDF Index (no sklearn)
 # =========================
 class TinyTfidfIndex:
@@ -208,44 +208,7 @@ class TinyTfidfIndex:
 # =========================
-# Rubric (used to tailor RAG summaries)
-# =========================
-CHECKS = [
-    {"id": "landing_zone",  "desc": "Landing zone defined.",              "fix": "Use CAF blueprints.",                                     "keywords": ["landing", "hub", "spoke", "policy", "rbac"], "pillar": "governance"},
-    {"id": "connectivity",  "desc": "Connectivity planned.",              "fix": "Verify ER/VPN, DNS, MTU.",                                "keywords": ["expressroute", "vpn", "dns", "mtu", "hcx"],   "pillar": "networking"},
-    {"id": "migrate_tooling","desc": "Tooling chosen.",                   "fix": "Run Azure Migrate discovery.",                            "keywords": ["migrate", "discovery", "assessment", "hcx"],  "pillar": "operations"},
-    {"id": "security",      "desc": "Security configured.",               "fix": "Enable Key Vault, Defender, Sentinel, MFA.",             "keywords": ["vault", "defender", "sentinel", "mfa", "identity"], "pillar": "security"},
-    {"id": "dr_backup",     "desc": "Backups/DR defined.",                "fix": "Set RTO/RPO; test ASR.",                                 "keywords": ["backup", "rto", "rpo", "dr", "asr"],          "pillar": "reliability"},
-    {"id": "cost",          "desc": "Cost optimization.",                 "fix": "Use reservations, rightsizing, tags.",                   "keywords": ["cost", "reservation", "savings", "tag"],      "pillar": "cost"},
-]
-def score_text_against_checks(text: str) -> Tuple[Dict[str, float], List[Dict[str, str]]]:
-    toks = set(tokenize(text))
-    scores = defaultdict(float)
-    gaps = []
-    for chk in CHECKS:
-        matched = any(kw in toks for kw in chk["keywords"])
-        if matched:
-            scores["overall"] += 1.0
-            scores[chk["pillar"]] += 1.0
-        else:
-            gaps.append({
-                "id": chk["id"],
-                "desc": chk["desc"],
-                "fix": chk["fix"],
-                "severity": "high" if chk["pillar"] in ("security", "reliability") else "medium",
-            })
-    max_possible = float(len(CHECKS))
-    scores["overall"] = round(5.0 * (scores["overall"] / max_possible), 2)
-    for k in list(scores.keys()):
-        if k != "overall":
-            scores[k] = round(scores[k], 2)
-    return scores, gaps
-# =========================
-# File parsing
 # =========================
 def read_pdf_bytes(b: bytes) -> str:
@@ -296,214 +259,163 @@ def parse_file(file_obj: Dict[str, Any]) -> Dict[str, str]:
 # =========================
-# Helpers for composing detailed answers
 # =========================
 def _extract_subject_from_question(q: str) -> str:
-    """
-    Pulls the likely subject (e.g., 'Azure SDN') from 'what is/define/explain ...' questions.
-    Simple heuristic: remove leading interrogatives and trailing punctuation.
-    """
-    s = re.sub(r"^\s*(what\s+is|what's|define|explain|tell\s+me\s+about)\s+", "", q, flags=re.I).strip()
     s = re.sub(r"[?.!]+$", "", s).strip()
-    # Trim leading 'an', 'a', 'the'
     s = re.sub(r"^(an?|the)\s+", "", s, flags=re.I)
-    # Capitalize first letter of each word heuristically
-    return " ".join(w.capitalize() if w.isalpha() else w for w in s.split()) or "the topic"
-def _extract_key_points(text: str, max_points: int = 6) -> List[str]:
-    parts = re.split(r"(?<=[.!?])\s+", (text or "").strip())
-    points = []
-    for p in parts:
-        p = p.strip()
-        if 40 <= len(p) <= 300 and p not in points:
-            points.append(p)
-        if len(points) >= max_points:
-            break
-    return points
-def _topic_steps(topic: str) -> List[str]:
-    if topic == "sdn":
-        return [
-            "Define VNets/subnets and segmentation policy.",
-            "Automate configuration (ARM/Bicep/Terraform/GitOps).",
-            "Harden east–west flows with policy-based filtering.",
-            "Plan ingress/egress with load balancers and gateways.",
-            "Integrate with RBAC, logging, and change control.",
-        ]
-    if topic == "migration":
-        return [
-            "Establish governed landing zone (Policy, RBAC, logging).",
-            "Connect networks (ExpressRoute/VPN), validate DNS/MTU.",
-            "Discover/assess with Azure Migrate; classify apps.",
-            "Pilot 2–3 VMs; choose HCX or Azure Migrate cutover.",
-            "Migrate in waves; document rollback and success criteria.",
-        ]
-    if topic == "dr":
-        return [
-            "Define business RTO/RPO per workload.",
-            "Enable ASR where applicable; set up replication.",
-            "Run planned/unplanned failover drills; validate runbooks.",
-            "Harden backups (immutability, soft-delete).",
-            "Document recovery steps and responsibilities.",
-        ]
-    if topic == "security":
-        return [
-            "Centralize secrets in Key Vault; enable RBAC/PIM/MFA.",
-            "Enable Defender for Cloud and configure policies.",
-            "Collect/monitor logs; set alerts and playbooks.",
-            "Segment networks; restrict lateral movement.",
-            "Review identity hygiene and conditional access.",
-        ]
-    if topic == "cost":
-        return [
-            "Right-size compute/storage based on metrics.",
-            "Use reservations or Savings Plans where stable.",
-            "Automate tagging for showback/chargeback.",
-            "Schedule shutdowns for non-prod.",
-            "Monitor cost anomalies and budgets.",
-        ]
-    return [
-        "Clarify objective, constraints, and success criteria.",
-        "Assess current state and dependencies.",
-        "Choose an MVP approach; pilot and iterate.",
-        "Define rollout plan, rollback, and verification.",
-        "Measure results and continuously improve.",
-    ]
-def _compose_definition(subject: str, topic: str) -> str:
     """
-    Produces a clear, detailed definition for 'define/what is' questions
-    using the detected subject and topic to pick references.
     """
-    refs = list_refs(topic_refs(topic))
-    # Definition scaffold tailored to topic, but generic enough for any subject.
-    md = [
-        f"### {subject} — Detailed overview",
-        f"**Definition:** {subject} is a service/technology that centralizes control through software and policy so you can create, operate, and secure resources consistently across environments.",
-        "",
-        "**Why it matters:**",
-        "- Reduces manual configuration and errors with automation and governance.",
-        "- Improves security through consistent, policy-driven controls.",
-        "- Accelerates delivery with repeatable, programmable workflows.",
-        "",
-        "**Core capabilities:**",
-    ]
-    if topic == "sdn":
-        md += [
-            "- Programmatic virtual networking (VNets, subnets, routing).",
-            "- Microsegmentation and traffic filtering for east–west security.",
-            "- Software load balancing and gateway services for connectivity.",
-            "- Hybrid consistency across Azure and Azure Local (Azure Stack HCI).",
-        ]
-    elif topic == "migration":
-        md += [
-            "- Discovery and assessment of on-prem workloads.",
-            "- Replication, cutover orchestration (e.g., HCX or Azure Migrate).",
-            "- Wave-based moves with rollback and validation.",
-            "- Governance hooks for tagging, RBAC, policy.",
         ]
-    elif topic == "dr":
-        md += [
-            "- Replication and recovery planning (RPO/RTO).",
-            "- Failover/failback workflows and runbooks.",
-            "- Testing and non-disruptive drills.",
-            "- Integration with backup immutability and soft-delete.",
         ]
-    elif topic == "security":
-        md += [
-            "- Posture management, policy, and workload protections.",
-            "- Identity controls (RBAC, PIM/MFA), secrets management.",
-            "- Detection and response (alerts, analytics, playbooks).",
-            "- Compliance reporting and governance integration.",
         ]
-    elif topic == "cost":
-        md += [
-            "- Visibility into spend and resource utilization.",
-            "- Budgeting, alerts, and anomaly detection.",
-            "- Rightsizing and purchase optimizations (Reservations/Savings Plans).",
-            "- Tagging for showback/chargeback and accountability.",
-        ]
-    else:
-        md += [
-            "- Policy-driven management and automation.",
-            "- Consistent APIs/CLI/portal and GitOps-friendly workflows.",
-            "- Observability (logs/metrics) and compliance integration.",
         ]
-    md += [
-        "",
-        "**How it works (high-level):**",
-        "- A control plane applies intent (configuration/policy) to managed resources.",
-        "- Agents/providers translate intent into concrete changes.",
-        "- Telemetry feeds monitoring and governance for continuous improvement.",
-        "",
-        "**Common use cases:**",
-        "- Standardized environments across dev/test/prod.",
-        "- Stronger security posture via segmentation and least-privilege.",
-        "- Hybrid scenarios spanning Azure and Azure Local where relevant.",
-        "",
-        f"**Trusted sources:** {refs}",
     ]
-    return "\n".join(md)
-def _compose_detailed_from_snippets(query: str, snippets: List[Dict[str, str]], topic: str) -> str:
-    combined = "\n\n".join([s.get("excerpt", "") for s in snippets])
-    scores, gaps = score_text_against_checks(combined)
-    points = _extract_key_points(combined, max_points=6)
-    refs = list_refs(topic_refs(topic))
-    md = [
-        "### Answer (detailed)",
-        f"**Your question:** {query}",
-        "",
-        "**Executive summary:**",
     ]
-    if points:
-        for p in points:
-            md.append(f"- {p}")
-    else:
-        md.append("- Based on your documents, here is a structured plan and key considerations.")
-    md += ["", "#### Recommended steps"]
-    for step in _topic_steps(topic):
-        md.append(f"- {step}")
-    md += ["", "#### Supporting excerpts"]
-    for s in snippets:
-        md.append(f"- **{s['file']}** (relevance {s['relevance']:.2f}): {s['excerpt']}")
     md += ["", f"**Trusted sources:** {refs}"]
     return "\n".join(md)
-def _compose_topic_fallback(query: str, topic: str, intent: str) -> str:
-    # Use a topic-relevant fallback, with more detail than a plain template.
-    if intent == "define":
-        subject = _extract_subject_from_question(query)
-        return _compose_definition(subject, topic)
     refs = list_refs(topic_refs(topic))
-    headline = {
-        "sdn": "Azure SDN — Overview",
-        "migration": "Azure VMware / Azure Migrate — Overview",
-        "dr": "Azure Site Recovery (DR) — Overview",
-        "security": "Security & Governance in Azure — Overview",
-        "cost": "Cost Optimization in Azure — Overview",
-        "general": "Overview",
-    }[topic]
-    md = [
-        f"### {headline}",
-        f"**Your question:** {query}",
-        "",
-        "**Key points:**",
-    ]
-    for step in _topic_steps(topic):
-        md.append(f"- {step}")
-    md += [
-        "",
-        f"**Trusted sources:** {refs}",
-    ]
     return "\n".join(md)
@@ -511,13 +423,7 @@ def _compose_topic_fallback(query: str, topic: str, intent: str) -> str:
 # Main Answer Function
 # =========================
-def answer_faq_or_approach_detailed(
-    question: str,
-    use_uploaded_docs: bool,
-    index_obj: Any,
-    _matrix_unused: Any,
-    corpus: List[Dict[str, str]]
-) -> str:
     q = (question or "").strip()
     if not q:
         return "Please enter a question."
@@ -525,20 +431,24 @@ def answer_faq_or_approach_detailed(
     intent = detect_intent(q)
     topic = detect_topic(q)
-    # 1) Restrict FAQ route to migration-like queries only (prevents hijacking)
-    q_tokens = set(tokenize(q))
-    for item in FAQ_SEEDS:
-        seed_tokens = set(tokenize(item["q"]))
-        if not ({"migrate", "migration", "hcx", "avs"} & q_tokens):
-            continue
-        if seed_tokens and (len(seed_tokens & q_tokens) / float(len(seed_tokens))) >= 0.5:
-            return (
-                "### Answer (detailed)\n"
-                f"{item['a']}\n\n"
-                f"**Trusted sources:** {list_refs(item.get('refs', []))}"
-            )
-    # 2) Use uploaded docs (RAG) → detailed synthesized answer
     if use_uploaded_docs and index_obj is not None and corpus:
         top = index_obj.query(q, k=6)
         snippets = []
@@ -547,35 +457,24 @@ def answer_faq_or_approach_detailed(
             excerpt = (item["text"] or "").strip()
             if len(excerpt) > 700:
                 excerpt = excerpt[:700] + "..."
-            snippets.append({
-                "file": item["file"],
-                "relevance": float(sim),
-                "excerpt": excerpt
-            })
         if snippets:
-            return _compose_detailed_from_snippets(q, snippets, topic)
-    # 3) Intent-aware fallback (especially for definitions like "What is Azure SDN?")
-    if intent == "define":
-        subject = _extract_subject_from_question(q)
-        return _compose_definition(subject, topic)
-    # 4) Topic-aware fallback for other intents
-    return _compose_topic_fallback(q, topic, intent)
 # =========================
-# Build Index
 # =========================
 def build_index(files: List[Dict[str, Any]]):
     if not files:
         return None, None, [], "No files uploaded yet."
-    corpus: List[Dict[str, str]] = []
-    for f in files:
-        rec = parse_file(f)
-        if rec["text"]:
-            corpus.append(rec)
     if not corpus:
         return None, None, [], "No text extracted."
     tokenized = [tokenize(c["text"]) for c in corpus]
@@ -595,16 +494,12 @@ with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) a
         "- Click **Build Index**\n"
         "- Ask a question. Answers are **detailed** and **topic-relevant**\n"
     )
     with gr.Row():
         with gr.Column(scale=2):
             file_in = gr.Files(label="Upload docs", file_count="multiple", type="filepath")
             index_status = gr.Markdown("No index yet.")
-            st_index = gr.State()
-            st_matrix = gr.State()
-            st_corpus = gr.State()
             build_btn = gr.Button("Build Index", variant="primary")
         with gr.Column(scale=3):
             question = gr.Textbox(
                 label="Ask a question",
@@ -614,7 +509,6 @@ with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) a
             ask_btn = gr.Button("Ask", variant="primary")
             answer_box = gr.Markdown("")
-    # Convert gr.Files (paths) to expected dicts
     def _collect_files(paths: List[str]):
         out = []
         for p in paths or []:
@@ -630,11 +524,7 @@ with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) a
         files = _collect_files(files_paths)
         return build_index(files)
-    build_btn.click(
-        _build,
-        inputs=[file_in],
-        outputs=[index_status, st_index, st_matrix, st_corpus]
-    )
     ask_btn.click(
         answer_faq_or_approach_detailed,

 VMware On-Prem → Azure Local Migration Assistant (Gradio)
 - Upload design/migration docs (PDF/DOCX/TXT/MD).
 - Ask questions; get reliable, detailed, and relevant answers.
+- Intent-aware (definitions | how-to | plans | comparisons) with topic-aware details.
 - No external APIs. No scikit-learn.
 Run locally:
 import re
 import math
 from typing import List, Tuple, Dict, Any
+from collections import Counter
 import gradio as gr
     # Core guidance
     ("Cloud Adoption Framework (CAF)", "https://learn.microsoft.com/azure/cloud-adoption-framework/"),
     ("Azure Well-Architected Framework (WAF)", "https://learn.microsoft.com/azure/architecture/framework/"),
+    # Networking / SDN (used when question is about SDN)
     ("Azure Virtual Network", "https://learn.microsoft.com/azure/virtual-network/"),
     ("Azure SDN concepts (HCI)", "https://learn.microsoft.com/azure-stack/hci/concepts/software-defined-networking"),
     ("Azure Arc (overview)", "https://learn.microsoft.com/azure/azure-arc/"),
 # =========================
+# Intent & topic detection
 # =========================
 _DEF_RE = re.compile(r"^\s*(what\s+is|what's|define|explain|tell\s+me\s+about)\b", re.I)
 # =========================
+# Tiny TF-IDF Index
 # =========================
 class TinyTfidfIndex:
 # =========================
+# File Parsing
 # =========================
 def read_pdf_bytes(b: bytes) -> str:
 # =========================
+# Strong definition composer (for “what is …”)
 # =========================
+_DEF_RE_LEAD = re.compile(r"^\s*(what\s+is|what's|define|explain|tell\s+me\s+about)\s+", re.I)
 def _extract_subject_from_question(q: str) -> str:
+    s = _DEF_RE_LEAD.sub("", q).strip()
     s = re.sub(r"[?.!]+$", "", s).strip()
     s = re.sub(r"^(an?|the)\s+", "", s, flags=re.I)
+    return s if s else "the topic"
+def _definition_for_subject(subject: str, topic: str) -> Tuple[str, List[str], List[str], List[str], List[str], List[str]]:
     """
+    Returns: (definition, capabilities[], how[], best_practices[], use_cases[], refs_list)
+    Provides a specific definition for SDN; otherwise a generic but detailed scaffold using the subject.
     """
+    # SDN-specific, as per your example (paraphrased, not reused verbatim for all topics)
+    if topic == "sdn" or "sdn" in subject.lower():
+        definition = (
+            f"{subject} is Microsoft's implementation of software-defined networking: "
+            "a model that shifts network control into software so you can centrally design, automate, "
+            "and protect virtual networks across Azure and Azure Local (Azure Stack HCI). "
+            "By separating the control plane from underlying hardware, it enables programmability and "
+            "policy-driven management of components such as virtual networks, subnets, firewalls/ACLs, "
+            "load balancers, and gateways—well-suited for dynamic cloud and hybrid environments."
+        )
+        capabilities = [
+            "Programmatic creation of VNets, subnets, routing, and address spaces.",
+            "Micro-segmentation and policy enforcement for east–west traffic.",
+            "Software load balancing and gateway services for app connectivity.",
+            "Consistency across Azure and Azure Local (Azure Stack HCI) via Azure Arc.",
         ]
+        how = [
+            "A centralized control plane applies intent (network topology and policies) to host virtual switches.",
+            "Agents/controllers translate intent into concrete configuration on each host.",
+            "Telemetry and logs feed monitoring, governance, and troubleshooting workflows.",
         ]
+        best = [
+            "Use Infrastructure-as-Code (Bicep/Terraform) and GitOps to standardize changes.",
+            "Apply least-privilege and RBAC; review segmentation policies regularly.",
+            "Integrate with logging/monitoring; alert on drift and policy violations.",
         ]
+        uses = [
+            "Rapidly provisioning isolated app environments and tiers.",
+            "Zero-trust segmentation between workloads and environments.",
+            "Hybrid designs spanning Azure and Azure Local with consistent constructs.",
         ]
+        refs_list = topic_refs("sdn")
+        return definition, capabilities, how, best, uses, refs_list
+    # Generic detailed definition for other subjects
+    sub = subject.strip()
+    definition = (
+        f"{sub} is a service/technology that centralizes control through software and policy so teams can "
+        f"create, operate, and secure resources consistently across environments."
+    )
+    capabilities = [
+        "Automation and policy-driven configuration to reduce manual effort and errors.",
+        "Governance integration (RBAC, tagging, policy) for consistency and compliance.",
+        "Observability hooks (logs/metrics) for reliability and performance tuning.",
     ]
+    how = [
+        "A control plane captures intent (configuration/policies) and applies it to managed resources.",
+        "Providers/agents on the platform translate intent into changes at runtime.",
+        "Feedback loops via telemetry inform continuous improvement.",
     ]
+    best = [
+        "Adopt Infrastructure-as-Code and peer reviews for change control.",
+        "Define tagging, RBAC roles, and policy baselines early.",
+        "Pilot in a non-prod environment before broad rollout.",
+    ]
+    uses = [
+        "Faster, repeatable environment provisioning.",
+        "Improved security posture through standardized controls.",
+        "Hybrid scenarios requiring consistent management across sites.",
+    ]
+    refs_list = topic_refs(detect_topic(sub))
+    return definition, capabilities, how, best, uses, refs_list
+def _compose_definition_markdown(query: str, subject: str, topic: str) -> str:
+    definition, capabilities, how, best, uses, refs_list = _definition_for_subject(subject, topic)
+    refs = list_refs(refs_list)
+    md = [f"### {subject} — Detailed definition",
+          f"**Your question:** {query}", "",
+          f"**Definition:** {definition}", "",
+          "**Key capabilities:**"]
+    md += [f"- {c}" for c in capabilities]
+    md += ["", "**How it works:**"]
+    md += [f"- {h}" for h in how]
+    md += ["", "**Best practices:**"]
+    md += [f"- {b}" for b in best]
+    md += ["", "**Common use cases:**"]
+    md += [f"- {u}" for u in uses]
     md += ["", f"**Trusted sources:** {refs}"]
     return "\n".join(md)
+# =========================
+# RAG: build a detailed answer from uploaded docs
+# =========================
+def _extract_points(text: str, max_points: int = 6) -> List[str]:
+    parts = re.split(r"(?<=[.!?])\s+", (text or "").strip())
+    pts = []
+    for p in parts:
+        p = p.strip()
+        if 40 <= len(p) <= 280 and p not in pts:
+            pts.append(p)
+        if len(pts) >= max_points:
+            break
+    return pts
+def _compose_rag_answer(query: str, snippets: List[str], topic: str) -> str:
+    combined = " ".join(snippets)
+    points = _extract_points(combined, max_points=6)
     refs = list_refs(topic_refs(topic))
+    md = ["### Answer (detailed)", f"**Your question:** {query}", ""]
+    if points:
+        md += ["**Executive summary:**"] + [f"- {p}" for p in points]
+    else:
+        md += ["**Executive summary:**", "- Here are key considerations synthesized from your documents."]
+    # Add a short topic-aware checklist
+    checklist = {
+        "sdn": [
+            "Define VNets/subnets and segmentation policy.",
+            "Automate with IaC (Bicep/Terraform) and GitOps.",
+            "Harden east–west traffic with micro-segmentation.",
+            "Plan ingress/egress with LBs and gateways."
+        ],
+        "migration": [
+            "Establish landing zone (Policy, RBAC, logging).",
+            "Connect networks (ER/VPN), validate DNS/MTU.",
+            "Discover/assess with Azure Migrate; pilot a few VMs.",
+            "Choose HCX or Azure Migrate for cutover; migrate in waves."
+        ],
+        "dr": [
+            "Define RTO/RPO; choose replication targets.",
+            "Run planned/unplanned failover drills.",
+            "Ensure immutable backups and soft-delete."
+        ],
+        "security": [
+            "Enable RBAC/PIM/MFA and Key Vault.",
+            "Turn on Defender for Cloud; set policies and alerts.",
+            "Collect logs; restrict lateral movement."
+        ],
+        "cost": [
+            "Right-size; use Reservations/Savings Plans.",
+            "Tag resources; set budgets/alerts.",
+            "Automate non-prod shutdowns."
+        ],
+        "general": [
+            "Clarify objectives and constraints.",
+            "Pilot changes; define rollback and verification."
+        ]
+    }.get(topic, ["Clarify objectives and constraints.", "Pilot changes; define rollback and verification."])
+    md += ["", "**Recommended steps:**"] + [f"- {s}" for s in checklist]
+    md += ["", f"**Trusted sources:** {refs}"]
     return "\n".join(md)
 # Main Answer Function
 # =========================
+def answer_faq_or_approach_detailed(question: str, use_uploaded_docs: bool, index_obj: Any, _matrix_unused: Any, corpus: List[Dict[str,str]]) -> str:
     q = (question or "").strip()
     if not q:
         return "Please enter a question."
     intent = detect_intent(q)
     topic = detect_topic(q)
+    # A) Definitions: build a strong, subject-specific definition (e.g., "What is Azure SDN?")
+    if intent == "define":
+        subject = _extract_subject_from_question(q)
+        return _compose_definition_markdown(q, subject, topic)
+    # B) Migration FAQs (only if the question is migration-like to avoid hijacking)
+    q_tokens = set(tokenize(q))
+    if {"migrate", "migration", "hcx", "avs"} & q_tokens:
+        for item in FAQ_SEEDS:
+            seed_tokens = set(tokenize(item["q"]))
+            if seed_tokens and (len(seed_tokens & q_tokens) / float(len(seed_tokens))) >= 0.5:
+                return (
+                    "### Answer (detailed)\n"
+                    f"{item['a']}\n\n"
+                    f"**Trusted sources:** {list_refs(item.get('refs', []))}"
+                )
+    # C) RAG over uploaded docs → detailed synthesized answer
     if use_uploaded_docs and index_obj is not None and corpus:
         top = index_obj.query(q, k=6)
         snippets = []
             excerpt = (item["text"] or "").strip()
             if len(excerpt) > 700:
                 excerpt = excerpt[:700] + "..."
+            if excerpt:
+                snippets.append(excerpt)
         if snippets:
+            return _compose_rag_answer(q, snippets, topic)
+    # D) Topic-aware fallback (short but relevant)
+    subject = _extract_subject_from_question(q) if intent in {"how", "plan", "compare"} else q
+    return _compose_definition_markdown(q, subject, topic)
 # =========================
+# Index Builder
 # =========================
 def build_index(files: List[Dict[str, Any]]):
     if not files:
         return None, None, [], "No files uploaded yet."
+    corpus = [parse_file(f) for f in files if parse_file(f)["text"]]
     if not corpus:
         return None, None, [], "No text extracted."
     tokenized = [tokenize(c["text"]) for c in corpus]
         "- Click **Build Index**\n"
         "- Ask a question. Answers are **detailed** and **topic-relevant**\n"
     )
     with gr.Row():
         with gr.Column(scale=2):
             file_in = gr.Files(label="Upload docs", file_count="multiple", type="filepath")
             index_status = gr.Markdown("No index yet.")
+            st_index = gr.State(); st_matrix = gr.State(); st_corpus = gr.State()
             build_btn = gr.Button("Build Index", variant="primary")
         with gr.Column(scale=3):
             question = gr.Textbox(
                 label="Ask a question",
             ask_btn = gr.Button("Ask", variant="primary")
             answer_box = gr.Markdown("")
     def _collect_files(paths: List[str]):
         out = []
         for p in paths or []:
         files = _collect_files(files_paths)
         return build_index(files)
+    build_btn.click(_build, inputs=[file_in], outputs=[index_status, st_index, st_matrix, st_corpus])
     ask_btn.click(
         answer_faq_or_approach_detailed,