Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,10 +5,11 @@
|
|
| 5 |
VMware On-Prem → Azure Local Migration Assistant (Gradio)
|
| 6 |
- No external API calls. No scikit-learn.
|
| 7 |
- Upload design/migration docs (PDF/DOCX/TXT/MD).
|
| 8 |
-
- Ask questions; get RELIABLE, DETAILED answers:
|
| 9 |
-
•
|
| 10 |
-
•
|
| 11 |
-
•
|
|
|
|
| 12 |
Run locally:
|
| 13 |
pip install gradio PyPDF2 python-docx
|
| 14 |
python app.py
|
|
@@ -18,7 +19,7 @@ import os
|
|
| 18 |
import io
|
| 19 |
import re
|
| 20 |
import math
|
| 21 |
-
from typing import List, Tuple, Dict, Any
|
| 22 |
from collections import Counter, defaultdict
|
| 23 |
|
| 24 |
import gradio as gr
|
|
@@ -42,14 +43,24 @@ except Exception:
|
|
| 42 |
# =========================
|
| 43 |
|
| 44 |
TRUSTED_SOURCES: List[Tuple[str, str]] = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
("Azure Arc (overview)", "https://learn.microsoft.com/azure/azure-arc/"),
|
| 46 |
("Azure Stack HCI (Azure Local)", "https://learn.microsoft.com/azure-stack/hci/"),
|
| 47 |
-
("Azure
|
|
|
|
| 48 |
("Azure VMware Solution (AVS)", "https://learn.microsoft.com/azure/azure-vmware/"),
|
| 49 |
("Azure Migrate", "https://learn.microsoft.com/azure/migrate/"),
|
| 50 |
-
("Cloud Adoption Framework (CAF)", "https://learn.microsoft.com/azure/cloud-adoption-framework/"),
|
| 51 |
-
("Azure Well-Architected Framework (WAF)", "https://learn.microsoft.com/azure/architecture/framework/"),
|
| 52 |
("VMware HCX Docs", "https://docs.vmware.com/en/VMware-HCX/index.html"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
]
|
| 54 |
|
| 55 |
FAQ_SEEDS: List[Dict[str, Any]] = [
|
|
@@ -77,7 +88,7 @@ FAQ_SEEDS: List[Dict[str, Any]] = [
|
|
| 77 |
"Define RTO/RPO per app. Use immutable backups and soft-delete. "
|
| 78 |
"Leverage ASR for DR where appropriate, run failover drills, and document rollback."
|
| 79 |
),
|
| 80 |
-
"refs": ["Azure
|
| 81 |
},
|
| 82 |
]
|
| 83 |
|
|
@@ -100,6 +111,41 @@ def list_refs(ref_names: List[str]) -> str:
|
|
| 100 |
return " | ".join(links) if links else ""
|
| 101 |
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
# =========================
|
| 104 |
# Tiny TF-IDF Index (no sklearn)
|
| 105 |
# =========================
|
|
@@ -160,7 +206,7 @@ class TinyTfidfIndex:
|
|
| 160 |
|
| 161 |
|
| 162 |
# =========================
|
| 163 |
-
# Rubric for RAG
|
| 164 |
# =========================
|
| 165 |
|
| 166 |
CHECKS = [
|
|
@@ -196,118 +242,6 @@ def score_text_against_checks(text: str) -> Tuple[Dict[str, float], List[Dict[st
|
|
| 196 |
return scores, gaps
|
| 197 |
|
| 198 |
|
| 199 |
-
# =========================
|
| 200 |
-
# Built-in Concept KB (for definitional questions)
|
| 201 |
-
# =========================
|
| 202 |
-
|
| 203 |
-
class Concept:
|
| 204 |
-
def __init__(self, name: str, aliases: List[str], builder):
|
| 205 |
-
self.name = name
|
| 206 |
-
self.aliases = [tokenize(a) for a in aliases]
|
| 207 |
-
self.builder = builder # function(query:str)->str
|
| 208 |
-
|
| 209 |
-
def _kb_ans_azure_sdn(_: str) -> str:
|
| 210 |
-
refs = list_refs(["Azure SDN concepts (HCI)", "Azure Arc (overview)", "Azure Stack HCI (Azure Local)"])
|
| 211 |
-
return (
|
| 212 |
-
"### Azure SDN — What it is and why it matters\n"
|
| 213 |
-
"**Definition:** Azure SDN is Microsoft's software-defined networking stack that centralizes network control in software, "
|
| 214 |
-
"decoupling policy and management from physical hardware. It lets you programmatically create and secure virtual networks, "
|
| 215 |
-
"subnets, microsegmentation (ACL/NSG-like policies), load balancers and gateways across Azure and Azure Local (Azure Stack HCI) environments.\n\n"
|
| 216 |
-
"**Key capabilities**\n"
|
| 217 |
-
"- Central, policy-driven control plane for virtual networking resources.\n"
|
| 218 |
-
"- Automation & GitOps-friendly configuration for repeatable environments.\n"
|
| 219 |
-
"- Microsegmentation and traffic filtering for east–west security.\n"
|
| 220 |
-
"- Software load balancing and gateway services for app connectivity.\n"
|
| 221 |
-
"- Consistent constructs across cloud and on-prem (with Azure Local).\n\n"
|
| 222 |
-
"**How it works (high level)**\n"
|
| 223 |
-
"- A software control plane programs host virtual switches and network functions.\n"
|
| 224 |
-
"- Network intent (VNets, subnets, policies) is applied consistently across hosts.\n"
|
| 225 |
-
"- Integrates with Azure identity/management for RBAC and governance.\n\n"
|
| 226 |
-
"**Common use cases**\n"
|
| 227 |
-
"- Rapidly provisioning isolated app environments.\n"
|
| 228 |
-
"- Enforcing zero-trust style segmentation between tiers.\n"
|
| 229 |
-
"- Hybrid apps spanning Azure and Azure Local.\n\n"
|
| 230 |
-
f"**Trusted sources:** {refs}"
|
| 231 |
-
)
|
| 232 |
-
|
| 233 |
-
def _kb_ans_arc_enabled_sdn(_: str) -> str:
|
| 234 |
-
refs = list_refs(["Azure SDN concepts (HCI)", "Azure Arc (overview)", "Azure Stack HCI (Azure Local)"])
|
| 235 |
-
return (
|
| 236 |
-
"### Azure Arc-enabled SDN — Definition & details\n"
|
| 237 |
-
"**Definition:** Azure Arc-enabled SDN brings Azure's software-defined networking to on-premises Azure Local (Azure Stack HCI) clusters, "
|
| 238 |
-
"managed through Azure Arc. It decouples network control from hardware so you can centrally define, automate, and secure "
|
| 239 |
-
"virtual networks, subnets, and policies in your datacenter using Azure-consistent tools.\n\n"
|
| 240 |
-
"**Why it matters**\n"
|
| 241 |
-
"- Gives you Azure-like VNet constructs and policy management on-prem.\n"
|
| 242 |
-
"- Enables consistent security and segmentation across hybrid estates.\n"
|
| 243 |
-
"- Supports rapid, software-driven changes without touching physical fabric.\n\n"
|
| 244 |
-
"**Key capabilities**\n"
|
| 245 |
-
"- Create/modify on-prem VNets, subnets, and routing policies from Azure.\n"
|
| 246 |
-
"- Apply microsegmentation rules (policy/ACL-style) for east–west security.\n"
|
| 247 |
-
"- Software load balancing and gateway services for north–south/east–west flows.\n"
|
| 248 |
-
"- Integration with Azure RBAC, tagging, and governance for change control.\n\n"
|
| 249 |
-
"**Core components (conceptual)**\n"
|
| 250 |
-
"- **Arc resource bridge & agents** — connect your HCI cluster to Azure control.\n"
|
| 251 |
-
"- **SDN controller & host agents** — program the Hyper-V vSwitch and network functions.\n"
|
| 252 |
-
"- **Azure portal/CLI/GitOps** — define intent (VNets, subnets, policies) and deploy.\n\n"
|
| 253 |
-
"**Prerequisites (typical)**\n"
|
| 254 |
-
"- Azure Local (Azure Stack HCI) cluster connected to Azure Arc.\n"
|
| 255 |
-
"- Arc resource bridge onboarded; network requirements met.\n"
|
| 256 |
-
"- Appropriate RBAC roles to manage networking resources.\n\n"
|
| 257 |
-
"**Use cases**\n"
|
| 258 |
-
"- Host Azure-consistent app networks on-prem for data locality/regulatory needs.\n"
|
| 259 |
-
"- Hybrid deployments with identical network constructs across Azure and HCI.\n"
|
| 260 |
-
"- Rapid rollout of segmented networks for dev/test/prod without hardware changes.\n\n"
|
| 261 |
-
"**Notes & limitations (high level)**\n"
|
| 262 |
-
"- Physical underlay still matters (IP design, routing, bandwidth, HA).\n"
|
| 263 |
-
"- Feature parity with public Azure services may vary; validate per release.\n\n"
|
| 264 |
-
f"**Trusted sources:** {refs}"
|
| 265 |
-
)
|
| 266 |
-
|
| 267 |
-
KB_CONCEPTS: List[Concept] = [
|
| 268 |
-
Concept(
|
| 269 |
-
name="azure sdn",
|
| 270 |
-
aliases=[
|
| 271 |
-
"azure sdn",
|
| 272 |
-
"software defined networking azure",
|
| 273 |
-
"sdn in azure",
|
| 274 |
-
"azure local sdn",
|
| 275 |
-
"azure stack hci sdn",
|
| 276 |
-
],
|
| 277 |
-
builder=_kb_ans_azure_sdn,
|
| 278 |
-
),
|
| 279 |
-
Concept(
|
| 280 |
-
name="azure arc enabled sdn",
|
| 281 |
-
aliases=[
|
| 282 |
-
"azure arc enabled sdn",
|
| 283 |
-
"azure arc-enabled sdn",
|
| 284 |
-
"arc enabled sdn",
|
| 285 |
-
"arc-enabled sdn",
|
| 286 |
-
"arc sdn",
|
| 287 |
-
"azure local arc sdn",
|
| 288 |
-
"azure stack hci arc sdn",
|
| 289 |
-
],
|
| 290 |
-
builder=_kb_ans_arc_enabled_sdn,
|
| 291 |
-
),
|
| 292 |
-
]
|
| 293 |
-
|
| 294 |
-
def lookup_concept(query: str) -> Optional[Concept]:
|
| 295 |
-
q_tokens = set(tokenize(query))
|
| 296 |
-
best: Optional[Concept] = None
|
| 297 |
-
best_score = 0.0
|
| 298 |
-
for c in KB_CONCEPTS:
|
| 299 |
-
for alias_tokens in c.aliases:
|
| 300 |
-
if not alias_tokens:
|
| 301 |
-
continue
|
| 302 |
-
overlap = len(q_tokens & set(alias_tokens))
|
| 303 |
-
score = overlap / float(len(set(alias_tokens)))
|
| 304 |
-
if score > best_score:
|
| 305 |
-
best_score = score
|
| 306 |
-
best = c
|
| 307 |
-
# threshold: intentional but tolerant
|
| 308 |
-
return best if best_score >= 0.5 else None
|
| 309 |
-
|
| 310 |
-
|
| 311 |
# =========================
|
| 312 |
# File Parsing
|
| 313 |
# =========================
|
|
@@ -360,31 +294,146 @@ def parse_file(file_obj: Dict[str, Any]) -> Dict[str, str]:
|
|
| 360 |
|
| 361 |
|
| 362 |
# =========================
|
| 363 |
-
#
|
| 364 |
# =========================
|
| 365 |
|
| 366 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
combined = "\n\n".join([s.get("excerpt", "") for s in snippets])
|
| 368 |
scores, gaps = score_text_against_checks(combined)
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
f"**Your question:** {query}
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
for s in snippets:
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
|
| 386 |
# =========================
|
| 387 |
-
# Main Answer Function
|
| 388 |
# =========================
|
| 389 |
|
| 390 |
def answer_faq_or_approach_detailed(
|
|
@@ -398,15 +447,15 @@ def answer_faq_or_approach_detailed(
|
|
| 398 |
if not q:
|
| 399 |
return "Please enter a question."
|
| 400 |
|
| 401 |
-
|
| 402 |
-
concept = lookup_concept(q)
|
| 403 |
-
if concept is not None:
|
| 404 |
-
return concept.builder(q)
|
| 405 |
|
| 406 |
-
# 1) Seeded FAQs → detailed plan
|
| 407 |
q_tokens = set(tokenize(q))
|
| 408 |
for item in FAQ_SEEDS:
|
| 409 |
seed_tokens = set(tokenize(item["q"]))
|
|
|
|
|
|
|
|
|
|
| 410 |
if seed_tokens and (len(seed_tokens & q_tokens) / float(len(seed_tokens))) >= 0.5:
|
| 411 |
return (
|
| 412 |
"### Answer (detailed)\n"
|
|
@@ -429,20 +478,10 @@ def answer_faq_or_approach_detailed(
|
|
| 429 |
"excerpt": excerpt
|
| 430 |
})
|
| 431 |
if snippets:
|
| 432 |
-
return _compose_detailed_from_snippets(q, snippets)
|
| 433 |
-
|
| 434 |
-
# 3)
|
| 435 |
-
|
| 436 |
-
return (
|
| 437 |
-
"### Answer (detailed)\n"
|
| 438 |
-
"I couldn't match a specific concept or supporting excerpts, so here's a structured overview you can refine:\n\n"
|
| 439 |
-
"**Definition:** Describe what the service/feature is, what problems it solves, and where it runs (Azure / Azure Local).\n\n"
|
| 440 |
-
"**Key capabilities:** automation, policy-driven control, security segmentation, connectivity services.\n\n"
|
| 441 |
-
"**How it works:** control plane programs host/network functions; policies applied consistently; integrates with RBAC/governance.\n\n"
|
| 442 |
-
"**Prerequisites:** identity/RBAC, connectivity to Azure (for Arc), supported host/cluster versions.\n\n"
|
| 443 |
-
"**Use cases:** hybrid deployments, zero-trust segmentation, rapid environment provisioning.\n\n"
|
| 444 |
-
f"**Trusted sources:** {refs}"
|
| 445 |
-
)
|
| 446 |
|
| 447 |
|
| 448 |
# =========================
|
|
@@ -472,7 +511,9 @@ def build_index(files: List[Dict[str, Any]]):
|
|
| 472 |
with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) as demo:
|
| 473 |
gr.Markdown(
|
| 474 |
"## VMware On-Prem → Azure Local Migration Assistant\n"
|
| 475 |
-
"Upload documents
|
|
|
|
|
|
|
| 476 |
)
|
| 477 |
|
| 478 |
with gr.Row():
|
|
@@ -485,7 +526,7 @@ with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) a
|
|
| 485 |
build_btn = gr.Button("Build Index", variant="primary")
|
| 486 |
|
| 487 |
with gr.Column(scale=3):
|
| 488 |
-
question = gr.Textbox(label="Ask a question", placeholder="e.g., What is Azure Arc-enabled SDN
|
| 489 |
use_docs = gr.Checkbox(label="Use uploaded docs (RAG)", value=True)
|
| 490 |
ask_btn = gr.Button("Ask", variant="primary")
|
| 491 |
answer_box = gr.Markdown("")
|
|
|
|
| 5 |
VMware On-Prem → Azure Local Migration Assistant (Gradio)
|
| 6 |
- No external API calls. No scikit-learn.
|
| 7 |
- Upload design/migration docs (PDF/DOCX/TXT/MD).
|
| 8 |
+
- Ask questions; get RELIABLE, DETAILED, and RELEVANT answers:
|
| 9 |
+
• RAG on uploaded docs (excerpts + topic-tailored structure)
|
| 10 |
+
• Seeded FAQs (for migration flows)
|
| 11 |
+
• Topic-aware fallbacks (no more SDN leakage into unrelated topics)
|
| 12 |
+
|
| 13 |
Run locally:
|
| 14 |
pip install gradio PyPDF2 python-docx
|
| 15 |
python app.py
|
|
|
|
| 19 |
import io
|
| 20 |
import re
|
| 21 |
import math
|
| 22 |
+
from typing import List, Tuple, Dict, Any
|
| 23 |
from collections import Counter, defaultdict
|
| 24 |
|
| 25 |
import gradio as gr
|
|
|
|
| 43 |
# =========================
|
| 44 |
|
| 45 |
TRUSTED_SOURCES: List[Tuple[str, str]] = [
|
| 46 |
+
# Core Azure landing/ops
|
| 47 |
+
("Cloud Adoption Framework (CAF)", "https://learn.microsoft.com/azure/cloud-adoption-framework/"),
|
| 48 |
+
("Azure Well-Architected Framework (WAF)", "https://learn.microsoft.com/azure/architecture/framework/"),
|
| 49 |
+
# Networking / SDN (used ONLY when topic == 'sdn')
|
| 50 |
+
("Azure SDN concepts (HCI)", "https://learn.microsoft.com/azure-stack/hci/concepts/software-defined-networking"),
|
| 51 |
("Azure Arc (overview)", "https://learn.microsoft.com/azure/azure-arc/"),
|
| 52 |
("Azure Stack HCI (Azure Local)", "https://learn.microsoft.com/azure-stack/hci/"),
|
| 53 |
+
("Azure Virtual Network", "https://learn.microsoft.com/azure/virtual-network/"),
|
| 54 |
+
# Migration
|
| 55 |
("Azure VMware Solution (AVS)", "https://learn.microsoft.com/azure/azure-vmware/"),
|
| 56 |
("Azure Migrate", "https://learn.microsoft.com/azure/migrate/"),
|
|
|
|
|
|
|
| 57 |
("VMware HCX Docs", "https://docs.vmware.com/en/VMware-HCX/index.html"),
|
| 58 |
+
# DR
|
| 59 |
+
("Azure Site Recovery (ASR)", "https://learn.microsoft.com/azure/site-recovery/"),
|
| 60 |
+
# Security
|
| 61 |
+
("Microsoft Defender for Cloud", "https://learn.microsoft.com/azure/defender-for-cloud/"),
|
| 62 |
+
# Cost
|
| 63 |
+
("Azure Cost Management", "https://learn.microsoft.com/azure/cost-management-billing/"),
|
| 64 |
]
|
| 65 |
|
| 66 |
FAQ_SEEDS: List[Dict[str, Any]] = [
|
|
|
|
| 88 |
"Define RTO/RPO per app. Use immutable backups and soft-delete. "
|
| 89 |
"Leverage ASR for DR where appropriate, run failover drills, and document rollback."
|
| 90 |
),
|
| 91 |
+
"refs": ["Azure Site Recovery (ASR)"],
|
| 92 |
},
|
| 93 |
]
|
| 94 |
|
|
|
|
| 111 |
return " | ".join(links) if links else ""
|
| 112 |
|
| 113 |
|
| 114 |
+
# =========================
|
| 115 |
+
# Topic detection (keeps answers relevant)
|
| 116 |
+
# =========================
|
| 117 |
+
|
| 118 |
+
def detect_topic(q: str) -> str:
|
| 119 |
+
"""
|
| 120 |
+
Returns one of: 'sdn', 'migration', 'dr', 'security', 'cost', 'general'
|
| 121 |
+
"""
|
| 122 |
+
toks = set(tokenize(q))
|
| 123 |
+
if "sdn" in toks or "software-defined" in toks or "softwaredefined" in toks:
|
| 124 |
+
return "sdn"
|
| 125 |
+
if {"migrate", "migration", "hcx", "avs", "vmotion", "cutover"} & toks:
|
| 126 |
+
return "migration"
|
| 127 |
+
if {"dr", "disaster", "asr", "rto", "rpo", "failover"} & toks:
|
| 128 |
+
return "dr"
|
| 129 |
+
if {"defender", "sentinel", "pim", "mfa", "vault", "identity", "entra"} & toks:
|
| 130 |
+
return "security"
|
| 131 |
+
if {"cost", "reservation", "savings", "rightsizing", "tagging"} & toks:
|
| 132 |
+
return "cost"
|
| 133 |
+
return "general"
|
| 134 |
+
|
| 135 |
+
def topic_refs(topic: str) -> List[str]:
|
| 136 |
+
if topic == "sdn":
|
| 137 |
+
return ["Azure SDN concepts (HCI)", "Azure Arc (overview)", "Azure Stack HCI (Azure Local)", "Azure Virtual Network"]
|
| 138 |
+
if topic == "migration":
|
| 139 |
+
return ["Azure Migrate", "Azure VMware Solution (AVS)", "VMware HCX Docs", "Cloud Adoption Framework (CAF)"]
|
| 140 |
+
if topic == "dr":
|
| 141 |
+
return ["Azure Site Recovery (ASR)", "Azure Well-Architected Framework (WAF)"]
|
| 142 |
+
if topic == "security":
|
| 143 |
+
return ["Microsoft Defender for Cloud", "Azure Well-Architected Framework (WAF)"]
|
| 144 |
+
if topic == "cost":
|
| 145 |
+
return ["Azure Cost Management", "Azure Well-Architected Framework (WAF)"]
|
| 146 |
+
return ["Cloud Adoption Framework (CAF)", "Azure Well-Architected Framework (WAF)"]
|
| 147 |
+
|
| 148 |
+
|
| 149 |
# =========================
|
| 150 |
# Tiny TF-IDF Index (no sklearn)
|
| 151 |
# =========================
|
|
|
|
| 206 |
|
| 207 |
|
| 208 |
# =========================
|
| 209 |
+
# Rubric for tailoring RAG output
|
| 210 |
# =========================
|
| 211 |
|
| 212 |
CHECKS = [
|
|
|
|
| 242 |
return scores, gaps
|
| 243 |
|
| 244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
# =========================
|
| 246 |
# File Parsing
|
| 247 |
# =========================
|
|
|
|
| 294 |
|
| 295 |
|
| 296 |
# =========================
|
| 297 |
+
# Helpers for composing detailed answers
|
| 298 |
# =========================
|
| 299 |
|
| 300 |
+
def _extract_key_points(text: str, max_points: int = 6) -> List[str]:
|
| 301 |
+
# naive sentence splitter
|
| 302 |
+
parts = re.split(r"(?<=[.!?])\s+", text.strip())
|
| 303 |
+
points = []
|
| 304 |
+
for p in parts:
|
| 305 |
+
p = p.strip()
|
| 306 |
+
if 30 <= len(p) <= 300 and p not in points:
|
| 307 |
+
points.append(p)
|
| 308 |
+
if len(points) >= max_points:
|
| 309 |
+
break
|
| 310 |
+
return points
|
| 311 |
+
|
| 312 |
+
def _topic_steps(topic: str) -> List[str]:
|
| 313 |
+
if topic == "sdn":
|
| 314 |
+
return [
|
| 315 |
+
"Define VNets/subnets and segmentation policy.",
|
| 316 |
+
"Automate configuration (ARM/Bicep/Terraform/GitOps).",
|
| 317 |
+
"Harden east–west flows with policy-based filtering.",
|
| 318 |
+
"Plan ingress/egress with load balancers and gateways.",
|
| 319 |
+
"Integrate with RBAC, logging, and change control.",
|
| 320 |
+
]
|
| 321 |
+
if topic == "migration":
|
| 322 |
+
return [
|
| 323 |
+
"Establish governed landing zone (Policy, RBAC, logging).",
|
| 324 |
+
"Connect networks (ExpressRoute/VPN), validate DNS/MTU.",
|
| 325 |
+
"Discover/assess with Azure Migrate; classify apps.",
|
| 326 |
+
"Pilot 2–3 VMs; choose HCX or Azure Migrate cutover.",
|
| 327 |
+
"Migrate in waves; document rollback and success criteria.",
|
| 328 |
+
]
|
| 329 |
+
if topic == "dr":
|
| 330 |
+
return [
|
| 331 |
+
"Define business RTO/RPO per workload.",
|
| 332 |
+
"Enable ASR where applicable; set up replication.",
|
| 333 |
+
"Run planned/unplanned failover drills; validate runbooks.",
|
| 334 |
+
"Harden backups (immutability, soft-delete).",
|
| 335 |
+
"Document recovery steps and responsibilities.",
|
| 336 |
+
]
|
| 337 |
+
if topic == "security":
|
| 338 |
+
return [
|
| 339 |
+
"Centralize secrets in Key Vault; enable RBAC/PIM/MFA.",
|
| 340 |
+
"Enable Defender for Cloud and configure policies.",
|
| 341 |
+
"Collect/monitor logs; set alerts and playbooks.",
|
| 342 |
+
"Segment networks; restrict lateral movement.",
|
| 343 |
+
"Review identity hygiene and conditional access.",
|
| 344 |
+
]
|
| 345 |
+
if topic == "cost":
|
| 346 |
+
return [
|
| 347 |
+
"Right-size compute/storage based on metrics.",
|
| 348 |
+
"Use reservations or Savings Plans where stable.",
|
| 349 |
+
"Automate tagging for showback/chargeback.",
|
| 350 |
+
"Schedule shutdowns for non-prod.",
|
| 351 |
+
"Monitor cost anomalies and budgets.",
|
| 352 |
+
]
|
| 353 |
+
return [
|
| 354 |
+
"Clarify objective, constraints, and success criteria.",
|
| 355 |
+
"Assess current state and dependencies.",
|
| 356 |
+
"Choose the minimal viable approach first; pilot.",
|
| 357 |
+
"Define rollout plan, rollback, and verification.",
|
| 358 |
+
"Measure results and iterate.",
|
| 359 |
+
]
|
| 360 |
+
|
| 361 |
+
def _compose_detailed_from_snippets(query: str, snippets: List[Dict[str, str]], topic: str) -> str:
|
| 362 |
combined = "\n\n".join([s.get("excerpt", "") for s in snippets])
|
| 363 |
scores, gaps = score_text_against_checks(combined)
|
| 364 |
+
points = _extract_key_points(combined, max_points=6)
|
| 365 |
+
refs = list_refs(topic_refs(topic))
|
| 366 |
+
|
| 367 |
+
md = [
|
| 368 |
+
"### Answer (detailed)",
|
| 369 |
+
f"**Your question:** {query}",
|
| 370 |
+
"",
|
| 371 |
+
"**Executive summary:**",
|
| 372 |
+
]
|
| 373 |
+
if points:
|
| 374 |
+
for p in points:
|
| 375 |
+
md.append(f"- {p}")
|
| 376 |
+
else:
|
| 377 |
+
md.append("- Based on your documents, here is a structured plan and key considerations.")
|
| 378 |
+
|
| 379 |
+
md += [
|
| 380 |
+
"",
|
| 381 |
+
"#### Recommended steps",
|
| 382 |
+
]
|
| 383 |
+
for step in _topic_steps(topic):
|
| 384 |
+
md.append(f"- {step}")
|
| 385 |
+
|
| 386 |
+
md += [
|
| 387 |
+
"",
|
| 388 |
+
"#### Supporting excerpts",
|
| 389 |
+
]
|
| 390 |
for s in snippets:
|
| 391 |
+
md.append(f"- **{s['file']}** (relevance {s['relevance']:.2f}): {s['excerpt']}")
|
| 392 |
+
|
| 393 |
+
md += [
|
| 394 |
+
"",
|
| 395 |
+
f"**Trusted sources:** {refs}"
|
| 396 |
+
]
|
| 397 |
+
|
| 398 |
+
return "\n".join(md)
|
| 399 |
+
|
| 400 |
+
def _compose_topic_fallback(query: str, topic: str) -> str:
|
| 401 |
+
refs = list_refs(topic_refs(topic))
|
| 402 |
+
headline = {
|
| 403 |
+
"sdn": "Azure SDN — Overview",
|
| 404 |
+
"migration": "Azure VMware / Azure Migrate — Overview",
|
| 405 |
+
"dr": "Azure Site Recovery (DR) — Overview",
|
| 406 |
+
"security": "Security & Governance in Azure — Overview",
|
| 407 |
+
"cost": "Cost Optimization in Azure — Overview",
|
| 408 |
+
"general": "Overview",
|
| 409 |
+
}[topic]
|
| 410 |
+
|
| 411 |
+
md = [
|
| 412 |
+
f"### {headline}",
|
| 413 |
+
f"**Your question:** {query}",
|
| 414 |
+
"",
|
| 415 |
+
"**Definition/Context:**",
|
| 416 |
+
"- What it is, the problem it solves, and where it runs (Azure / Azure Local).",
|
| 417 |
+
"",
|
| 418 |
+
"**Key capabilities / success factors:**",
|
| 419 |
+
]
|
| 420 |
+
for step in _topic_steps(topic):
|
| 421 |
+
md.append(f"- {step}")
|
| 422 |
+
|
| 423 |
+
md += [
|
| 424 |
+
"",
|
| 425 |
+
"**Notes & caveats:**",
|
| 426 |
+
"- Validate limits and prerequisites for your environment.",
|
| 427 |
+
"- Align with governance and security baselines.",
|
| 428 |
+
"- Pilot before broad rollout.",
|
| 429 |
+
"",
|
| 430 |
+
f"**Trusted sources:** {refs}",
|
| 431 |
+
]
|
| 432 |
+
return "\n".join(md)
|
| 433 |
|
| 434 |
|
| 435 |
# =========================
|
| 436 |
+
# Main Answer Function (no SDN bias)
|
| 437 |
# =========================
|
| 438 |
|
| 439 |
def answer_faq_or_approach_detailed(
|
|
|
|
| 447 |
if not q:
|
| 448 |
return "Please enter a question."
|
| 449 |
|
| 450 |
+
topic = detect_topic(q)
|
|
|
|
|
|
|
|
|
|
| 451 |
|
| 452 |
+
# 1) Seeded FAQs → detailed plan (only for migration-like questions)
|
| 453 |
q_tokens = set(tokenize(q))
|
| 454 |
for item in FAQ_SEEDS:
|
| 455 |
seed_tokens = set(tokenize(item["q"]))
|
| 456 |
+
# require at least one migration-specific token to avoid hijacking definitional questions
|
| 457 |
+
if not ({"migrate", "migration", "hcx", "avs"} & q_tokens):
|
| 458 |
+
continue
|
| 459 |
if seed_tokens and (len(seed_tokens & q_tokens) / float(len(seed_tokens))) >= 0.5:
|
| 460 |
return (
|
| 461 |
"### Answer (detailed)\n"
|
|
|
|
| 478 |
"excerpt": excerpt
|
| 479 |
})
|
| 480 |
if snippets:
|
| 481 |
+
return _compose_detailed_from_snippets(q, snippets, topic)
|
| 482 |
+
|
| 483 |
+
# 3) Topic-aware fallback (no SDN unless you asked about SDN)
|
| 484 |
+
return _compose_topic_fallback(q, topic)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
|
| 486 |
|
| 487 |
# =========================
|
|
|
|
| 511 |
with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) as demo:
|
| 512 |
gr.Markdown(
|
| 513 |
"## VMware On-Prem → Azure Local Migration Assistant\n"
|
| 514 |
+
"- Upload documents (PDF/DOCX/TXT/MD)\n"
|
| 515 |
+
"- Click **Build Index**\n"
|
| 516 |
+
"- Ask a question. Answers are **detailed** and **topic-relevant**\n"
|
| 517 |
)
|
| 518 |
|
| 519 |
with gr.Row():
|
|
|
|
| 526 |
build_btn = gr.Button("Build Index", variant="primary")
|
| 527 |
|
| 528 |
with gr.Column(scale=3):
|
| 529 |
+
question = gr.Textbox(label="Ask a question", placeholder="e.g., What is Azure Arc-enabled SDN? or What's the best way to minimize downtime for our AVS migration?")
|
| 530 |
use_docs = gr.Checkbox(label="Use uploaded docs (RAG)", value=True)
|
| 531 |
ask_btn = gr.Button("Ask", variant="primary")
|
| 532 |
answer_box = gr.Markdown("")
|