ajayinsac commited on
Commit
e5a16a2
·
verified ·
1 Parent(s): bdd12dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -160
app.py CHANGED
@@ -5,10 +5,11 @@
5
  VMware On-Prem → Azure Local Migration Assistant (Gradio)
6
  - No external API calls. No scikit-learn.
7
  - Upload design/migration docs (PDF/DOCX/TXT/MD).
8
- - Ask questions; get RELIABLE, DETAILED answers:
9
- Concept KB (for definitions like “What is Azure Arc-enabled SDN?”)
10
- RAG on uploaded docs (excerpts + gaps/fixes)
11
- Seeded FAQs (migration flows)
 
12
  Run locally:
13
  pip install gradio PyPDF2 python-docx
14
  python app.py
@@ -18,7 +19,7 @@ import os
18
  import io
19
  import re
20
  import math
21
- from typing import List, Tuple, Dict, Any, Optional
22
  from collections import Counter, defaultdict
23
 
24
  import gradio as gr
@@ -42,14 +43,24 @@ except Exception:
42
  # =========================
43
 
44
  TRUSTED_SOURCES: List[Tuple[str, str]] = [
 
 
 
 
 
45
  ("Azure Arc (overview)", "https://learn.microsoft.com/azure/azure-arc/"),
46
  ("Azure Stack HCI (Azure Local)", "https://learn.microsoft.com/azure-stack/hci/"),
47
- ("Azure SDN concepts (HCI)", "https://learn.microsoft.com/azure-stack/hci/concepts/software-defined-networking"),
 
48
  ("Azure VMware Solution (AVS)", "https://learn.microsoft.com/azure/azure-vmware/"),
49
  ("Azure Migrate", "https://learn.microsoft.com/azure/migrate/"),
50
- ("Cloud Adoption Framework (CAF)", "https://learn.microsoft.com/azure/cloud-adoption-framework/"),
51
- ("Azure Well-Architected Framework (WAF)", "https://learn.microsoft.com/azure/architecture/framework/"),
52
  ("VMware HCX Docs", "https://docs.vmware.com/en/VMware-HCX/index.html"),
 
 
 
 
 
 
53
  ]
54
 
55
  FAQ_SEEDS: List[Dict[str, Any]] = [
@@ -77,7 +88,7 @@ FAQ_SEEDS: List[Dict[str, Any]] = [
77
  "Define RTO/RPO per app. Use immutable backups and soft-delete. "
78
  "Leverage ASR for DR where appropriate, run failover drills, and document rollback."
79
  ),
80
- "refs": ["Azure Well-Architected Framework (WAF)"],
81
  },
82
  ]
83
 
@@ -100,6 +111,41 @@ def list_refs(ref_names: List[str]) -> str:
100
  return " | ".join(links) if links else ""
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  # =========================
104
  # Tiny TF-IDF Index (no sklearn)
105
  # =========================
@@ -160,7 +206,7 @@ class TinyTfidfIndex:
160
 
161
 
162
  # =========================
163
- # Rubric for RAG-tailoring
164
  # =========================
165
 
166
  CHECKS = [
@@ -196,118 +242,6 @@ def score_text_against_checks(text: str) -> Tuple[Dict[str, float], List[Dict[st
196
  return scores, gaps
197
 
198
 
199
- # =========================
200
- # Built-in Concept KB (for definitional questions)
201
- # =========================
202
-
203
- class Concept:
204
- def __init__(self, name: str, aliases: List[str], builder):
205
- self.name = name
206
- self.aliases = [tokenize(a) for a in aliases]
207
- self.builder = builder # function(query:str)->str
208
-
209
- def _kb_ans_azure_sdn(_: str) -> str:
210
- refs = list_refs(["Azure SDN concepts (HCI)", "Azure Arc (overview)", "Azure Stack HCI (Azure Local)"])
211
- return (
212
- "### Azure SDN — What it is and why it matters\n"
213
- "**Definition:** Azure SDN is Microsoft's software-defined networking stack that centralizes network control in software, "
214
- "decoupling policy and management from physical hardware. It lets you programmatically create and secure virtual networks, "
215
- "subnets, microsegmentation (ACL/NSG-like policies), load balancers and gateways across Azure and Azure Local (Azure Stack HCI) environments.\n\n"
216
- "**Key capabilities**\n"
217
- "- Central, policy-driven control plane for virtual networking resources.\n"
218
- "- Automation & GitOps-friendly configuration for repeatable environments.\n"
219
- "- Microsegmentation and traffic filtering for east–west security.\n"
220
- "- Software load balancing and gateway services for app connectivity.\n"
221
- "- Consistent constructs across cloud and on-prem (with Azure Local).\n\n"
222
- "**How it works (high level)**\n"
223
- "- A software control plane programs host virtual switches and network functions.\n"
224
- "- Network intent (VNets, subnets, policies) is applied consistently across hosts.\n"
225
- "- Integrates with Azure identity/management for RBAC and governance.\n\n"
226
- "**Common use cases**\n"
227
- "- Rapidly provisioning isolated app environments.\n"
228
- "- Enforcing zero-trust style segmentation between tiers.\n"
229
- "- Hybrid apps spanning Azure and Azure Local.\n\n"
230
- f"**Trusted sources:** {refs}"
231
- )
232
-
233
- def _kb_ans_arc_enabled_sdn(_: str) -> str:
234
- refs = list_refs(["Azure SDN concepts (HCI)", "Azure Arc (overview)", "Azure Stack HCI (Azure Local)"])
235
- return (
236
- "### Azure Arc-enabled SDN — Definition & details\n"
237
- "**Definition:** Azure Arc-enabled SDN brings Azure's software-defined networking to on-premises Azure Local (Azure Stack HCI) clusters, "
238
- "managed through Azure Arc. It decouples network control from hardware so you can centrally define, automate, and secure "
239
- "virtual networks, subnets, and policies in your datacenter using Azure-consistent tools.\n\n"
240
- "**Why it matters**\n"
241
- "- Gives you Azure-like VNet constructs and policy management on-prem.\n"
242
- "- Enables consistent security and segmentation across hybrid estates.\n"
243
- "- Supports rapid, software-driven changes without touching physical fabric.\n\n"
244
- "**Key capabilities**\n"
245
- "- Create/modify on-prem VNets, subnets, and routing policies from Azure.\n"
246
- "- Apply microsegmentation rules (policy/ACL-style) for east–west security.\n"
247
- "- Software load balancing and gateway services for north–south/east–west flows.\n"
248
- "- Integration with Azure RBAC, tagging, and governance for change control.\n\n"
249
- "**Core components (conceptual)**\n"
250
- "- **Arc resource bridge & agents** — connect your HCI cluster to Azure control.\n"
251
- "- **SDN controller & host agents** — program the Hyper-V vSwitch and network functions.\n"
252
- "- **Azure portal/CLI/GitOps** — define intent (VNets, subnets, policies) and deploy.\n\n"
253
- "**Prerequisites (typical)**\n"
254
- "- Azure Local (Azure Stack HCI) cluster connected to Azure Arc.\n"
255
- "- Arc resource bridge onboarded; network requirements met.\n"
256
- "- Appropriate RBAC roles to manage networking resources.\n\n"
257
- "**Use cases**\n"
258
- "- Host Azure-consistent app networks on-prem for data locality/regulatory needs.\n"
259
- "- Hybrid deployments with identical network constructs across Azure and HCI.\n"
260
- "- Rapid rollout of segmented networks for dev/test/prod without hardware changes.\n\n"
261
- "**Notes & limitations (high level)**\n"
262
- "- Physical underlay still matters (IP design, routing, bandwidth, HA).\n"
263
- "- Feature parity with public Azure services may vary; validate per release.\n\n"
264
- f"**Trusted sources:** {refs}"
265
- )
266
-
267
- KB_CONCEPTS: List[Concept] = [
268
- Concept(
269
- name="azure sdn",
270
- aliases=[
271
- "azure sdn",
272
- "software defined networking azure",
273
- "sdn in azure",
274
- "azure local sdn",
275
- "azure stack hci sdn",
276
- ],
277
- builder=_kb_ans_azure_sdn,
278
- ),
279
- Concept(
280
- name="azure arc enabled sdn",
281
- aliases=[
282
- "azure arc enabled sdn",
283
- "azure arc-enabled sdn",
284
- "arc enabled sdn",
285
- "arc-enabled sdn",
286
- "arc sdn",
287
- "azure local arc sdn",
288
- "azure stack hci arc sdn",
289
- ],
290
- builder=_kb_ans_arc_enabled_sdn,
291
- ),
292
- ]
293
-
294
- def lookup_concept(query: str) -> Optional[Concept]:
295
- q_tokens = set(tokenize(query))
296
- best: Optional[Concept] = None
297
- best_score = 0.0
298
- for c in KB_CONCEPTS:
299
- for alias_tokens in c.aliases:
300
- if not alias_tokens:
301
- continue
302
- overlap = len(q_tokens & set(alias_tokens))
303
- score = overlap / float(len(set(alias_tokens)))
304
- if score > best_score:
305
- best_score = score
306
- best = c
307
- # threshold: intentional but tolerant
308
- return best if best_score >= 0.5 else None
309
-
310
-
311
  # =========================
312
  # File Parsing
313
  # =========================
@@ -360,31 +294,146 @@ def parse_file(file_obj: Dict[str, Any]) -> Dict[str, str]:
360
 
361
 
362
  # =========================
363
- # Detailed Answer Composer (for RAG path)
364
  # =========================
365
 
366
- def _compose_detailed_from_snippets(query: str, snippets: List[Dict[str, str]]) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  combined = "\n\n".join([s.get("excerpt", "") for s in snippets])
368
  scores, gaps = score_text_against_checks(combined)
369
- def _mk_gaps(glist):
370
- return "\n".join([f"- ({g['severity']}) {g['id']}: {g['fix']}" for g in glist]) or "- No major issues detected."
371
- refs = list_refs([s[0] for s in TRUSTED_SOURCES])
372
- details = (
373
- f"### Answer (detailed)\n"
374
- f"**Your question:** {query}\n\n"
375
- f"**Summary:** Migration planning must cover landing zone, connectivity, tooling, security, DR, and cost.\n\n"
376
- f"#### Scores\nOverall: {scores.get('overall', 0)}/5.0\n\n"
377
- f"#### Gaps & Fixes\n{_mk_gaps(gaps)}\n\n"
378
- f"#### Supporting Excerpts\n"
379
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  for s in snippets:
381
- details += f"- {s['file']} (rel {s['relevance']:.2f}): {s['excerpt']}\n"
382
- details += f"\n**Trusted sources:** {refs}"
383
- return details
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
 
386
  # =========================
387
- # Main Answer Function
388
  # =========================
389
 
390
  def answer_faq_or_approach_detailed(
@@ -398,15 +447,15 @@ def answer_faq_or_approach_detailed(
398
  if not q:
399
  return "Please enter a question."
400
 
401
- # 0) Concept KB for definitional questions (e.g., "What is Azure Arc-enabled SDN?")
402
- concept = lookup_concept(q)
403
- if concept is not None:
404
- return concept.builder(q)
405
 
406
- # 1) Seeded FAQs → detailed plan when relevant (>=50% overlap with seed)
407
  q_tokens = set(tokenize(q))
408
  for item in FAQ_SEEDS:
409
  seed_tokens = set(tokenize(item["q"]))
 
 
 
410
  if seed_tokens and (len(seed_tokens & q_tokens) / float(len(seed_tokens))) >= 0.5:
411
  return (
412
  "### Answer (detailed)\n"
@@ -429,20 +478,10 @@ def answer_faq_or_approach_detailed(
429
  "excerpt": excerpt
430
  })
431
  if snippets:
432
- return _compose_detailed_from_snippets(q, snippets)
433
-
434
- # 3) Fallback (no docs) generic, but structured overview (not migration-only)
435
- refs = list_refs(["Azure Arc (overview)", "Azure Stack HCI (Azure Local)", "Azure SDN concepts (HCI)"])
436
- return (
437
- "### Answer (detailed)\n"
438
- "I couldn't match a specific concept or supporting excerpts, so here's a structured overview you can refine:\n\n"
439
- "**Definition:** Describe what the service/feature is, what problems it solves, and where it runs (Azure / Azure Local).\n\n"
440
- "**Key capabilities:** automation, policy-driven control, security segmentation, connectivity services.\n\n"
441
- "**How it works:** control plane programs host/network functions; policies applied consistently; integrates with RBAC/governance.\n\n"
442
- "**Prerequisites:** identity/RBAC, connectivity to Azure (for Arc), supported host/cluster versions.\n\n"
443
- "**Use cases:** hybrid deployments, zero-trust segmentation, rapid environment provisioning.\n\n"
444
- f"**Trusted sources:** {refs}"
445
- )
446
 
447
 
448
  # =========================
@@ -472,7 +511,9 @@ def build_index(files: List[Dict[str, Any]]):
472
  with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) as demo:
473
  gr.Markdown(
474
  "## VMware On-Prem → Azure Local Migration Assistant\n"
475
- "Upload documents and ask questions. Detailed answers will be provided."
 
 
476
  )
477
 
478
  with gr.Row():
@@ -485,7 +526,7 @@ with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) a
485
  build_btn = gr.Button("Build Index", variant="primary")
486
 
487
  with gr.Column(scale=3):
488
- question = gr.Textbox(label="Ask a question", placeholder="e.g., What is Azure Arc-enabled SDN, and why would I use it?")
489
  use_docs = gr.Checkbox(label="Use uploaded docs (RAG)", value=True)
490
  ask_btn = gr.Button("Ask", variant="primary")
491
  answer_box = gr.Markdown("")
 
5
  VMware On-Prem → Azure Local Migration Assistant (Gradio)
6
  - No external API calls. No scikit-learn.
7
  - Upload design/migration docs (PDF/DOCX/TXT/MD).
8
+ - Ask questions; get RELIABLE, DETAILED, and RELEVANT answers:
9
+ RAG on uploaded docs (excerpts + topic-tailored structure)
10
+ Seeded FAQs (for migration flows)
11
+ Topic-aware fallbacks (no more SDN leakage into unrelated topics)
12
+
13
  Run locally:
14
  pip install gradio PyPDF2 python-docx
15
  python app.py
 
19
  import io
20
  import re
21
  import math
22
+ from typing import List, Tuple, Dict, Any
23
  from collections import Counter, defaultdict
24
 
25
  import gradio as gr
 
43
  # =========================
44
 
45
  TRUSTED_SOURCES: List[Tuple[str, str]] = [
46
+ # Core Azure landing/ops
47
+ ("Cloud Adoption Framework (CAF)", "https://learn.microsoft.com/azure/cloud-adoption-framework/"),
48
+ ("Azure Well-Architected Framework (WAF)", "https://learn.microsoft.com/azure/architecture/framework/"),
49
+ # Networking / SDN (used ONLY when topic == 'sdn')
50
+ ("Azure SDN concepts (HCI)", "https://learn.microsoft.com/azure-stack/hci/concepts/software-defined-networking"),
51
  ("Azure Arc (overview)", "https://learn.microsoft.com/azure/azure-arc/"),
52
  ("Azure Stack HCI (Azure Local)", "https://learn.microsoft.com/azure-stack/hci/"),
53
+ ("Azure Virtual Network", "https://learn.microsoft.com/azure/virtual-network/"),
54
+ # Migration
55
  ("Azure VMware Solution (AVS)", "https://learn.microsoft.com/azure/azure-vmware/"),
56
  ("Azure Migrate", "https://learn.microsoft.com/azure/migrate/"),
 
 
57
  ("VMware HCX Docs", "https://docs.vmware.com/en/VMware-HCX/index.html"),
58
+ # DR
59
+ ("Azure Site Recovery (ASR)", "https://learn.microsoft.com/azure/site-recovery/"),
60
+ # Security
61
+ ("Microsoft Defender for Cloud", "https://learn.microsoft.com/azure/defender-for-cloud/"),
62
+ # Cost
63
+ ("Azure Cost Management", "https://learn.microsoft.com/azure/cost-management-billing/"),
64
  ]
65
 
66
  FAQ_SEEDS: List[Dict[str, Any]] = [
 
88
  "Define RTO/RPO per app. Use immutable backups and soft-delete. "
89
  "Leverage ASR for DR where appropriate, run failover drills, and document rollback."
90
  ),
91
+ "refs": ["Azure Site Recovery (ASR)"],
92
  },
93
  ]
94
 
 
111
  return " | ".join(links) if links else ""
112
 
113
 
114
+ # =========================
115
+ # Topic detection (keeps answers relevant)
116
+ # =========================
117
+
118
+ def detect_topic(q: str) -> str:
119
+ """
120
+ Returns one of: 'sdn', 'migration', 'dr', 'security', 'cost', 'general'
121
+ """
122
+ toks = set(tokenize(q))
123
+ if "sdn" in toks or "software-defined" in toks or "softwaredefined" in toks:
124
+ return "sdn"
125
+ if {"migrate", "migration", "hcx", "avs", "vmotion", "cutover"} & toks:
126
+ return "migration"
127
+ if {"dr", "disaster", "asr", "rto", "rpo", "failover"} & toks:
128
+ return "dr"
129
+ if {"defender", "sentinel", "pim", "mfa", "vault", "identity", "entra"} & toks:
130
+ return "security"
131
+ if {"cost", "reservation", "savings", "rightsizing", "tagging"} & toks:
132
+ return "cost"
133
+ return "general"
134
+
135
+ def topic_refs(topic: str) -> List[str]:
136
+ if topic == "sdn":
137
+ return ["Azure SDN concepts (HCI)", "Azure Arc (overview)", "Azure Stack HCI (Azure Local)", "Azure Virtual Network"]
138
+ if topic == "migration":
139
+ return ["Azure Migrate", "Azure VMware Solution (AVS)", "VMware HCX Docs", "Cloud Adoption Framework (CAF)"]
140
+ if topic == "dr":
141
+ return ["Azure Site Recovery (ASR)", "Azure Well-Architected Framework (WAF)"]
142
+ if topic == "security":
143
+ return ["Microsoft Defender for Cloud", "Azure Well-Architected Framework (WAF)"]
144
+ if topic == "cost":
145
+ return ["Azure Cost Management", "Azure Well-Architected Framework (WAF)"]
146
+ return ["Cloud Adoption Framework (CAF)", "Azure Well-Architected Framework (WAF)"]
147
+
148
+
149
  # =========================
150
  # Tiny TF-IDF Index (no sklearn)
151
  # =========================
 
206
 
207
 
208
  # =========================
209
+ # Rubric for tailoring RAG output
210
  # =========================
211
 
212
  CHECKS = [
 
242
  return scores, gaps
243
 
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  # =========================
246
  # File Parsing
247
  # =========================
 
294
 
295
 
296
  # =========================
297
+ # Helpers for composing detailed answers
298
  # =========================
299
 
300
+ def _extract_key_points(text: str, max_points: int = 6) -> List[str]:
301
+ # naive sentence splitter
302
+ parts = re.split(r"(?<=[.!?])\s+", text.strip())
303
+ points = []
304
+ for p in parts:
305
+ p = p.strip()
306
+ if 30 <= len(p) <= 300 and p not in points:
307
+ points.append(p)
308
+ if len(points) >= max_points:
309
+ break
310
+ return points
311
+
312
+ def _topic_steps(topic: str) -> List[str]:
313
+ if topic == "sdn":
314
+ return [
315
+ "Define VNets/subnets and segmentation policy.",
316
+ "Automate configuration (ARM/Bicep/Terraform/GitOps).",
317
+ "Harden east–west flows with policy-based filtering.",
318
+ "Plan ingress/egress with load balancers and gateways.",
319
+ "Integrate with RBAC, logging, and change control.",
320
+ ]
321
+ if topic == "migration":
322
+ return [
323
+ "Establish governed landing zone (Policy, RBAC, logging).",
324
+ "Connect networks (ExpressRoute/VPN), validate DNS/MTU.",
325
+ "Discover/assess with Azure Migrate; classify apps.",
326
+ "Pilot 2–3 VMs; choose HCX or Azure Migrate cutover.",
327
+ "Migrate in waves; document rollback and success criteria.",
328
+ ]
329
+ if topic == "dr":
330
+ return [
331
+ "Define business RTO/RPO per workload.",
332
+ "Enable ASR where applicable; set up replication.",
333
+ "Run planned/unplanned failover drills; validate runbooks.",
334
+ "Harden backups (immutability, soft-delete).",
335
+ "Document recovery steps and responsibilities.",
336
+ ]
337
+ if topic == "security":
338
+ return [
339
+ "Centralize secrets in Key Vault; enable RBAC/PIM/MFA.",
340
+ "Enable Defender for Cloud and configure policies.",
341
+ "Collect/monitor logs; set alerts and playbooks.",
342
+ "Segment networks; restrict lateral movement.",
343
+ "Review identity hygiene and conditional access.",
344
+ ]
345
+ if topic == "cost":
346
+ return [
347
+ "Right-size compute/storage based on metrics.",
348
+ "Use reservations or Savings Plans where stable.",
349
+ "Automate tagging for showback/chargeback.",
350
+ "Schedule shutdowns for non-prod.",
351
+ "Monitor cost anomalies and budgets.",
352
+ ]
353
+ return [
354
+ "Clarify objective, constraints, and success criteria.",
355
+ "Assess current state and dependencies.",
356
+ "Choose the minimal viable approach first; pilot.",
357
+ "Define rollout plan, rollback, and verification.",
358
+ "Measure results and iterate.",
359
+ ]
360
+
361
+ def _compose_detailed_from_snippets(query: str, snippets: List[Dict[str, str]], topic: str) -> str:
362
  combined = "\n\n".join([s.get("excerpt", "") for s in snippets])
363
  scores, gaps = score_text_against_checks(combined)
364
+ points = _extract_key_points(combined, max_points=6)
365
+ refs = list_refs(topic_refs(topic))
366
+
367
+ md = [
368
+ "### Answer (detailed)",
369
+ f"**Your question:** {query}",
370
+ "",
371
+ "**Executive summary:**",
372
+ ]
373
+ if points:
374
+ for p in points:
375
+ md.append(f"- {p}")
376
+ else:
377
+ md.append("- Based on your documents, here is a structured plan and key considerations.")
378
+
379
+ md += [
380
+ "",
381
+ "#### Recommended steps",
382
+ ]
383
+ for step in _topic_steps(topic):
384
+ md.append(f"- {step}")
385
+
386
+ md += [
387
+ "",
388
+ "#### Supporting excerpts",
389
+ ]
390
  for s in snippets:
391
+ md.append(f"- **{s['file']}** (relevance {s['relevance']:.2f}): {s['excerpt']}")
392
+
393
+ md += [
394
+ "",
395
+ f"**Trusted sources:** {refs}"
396
+ ]
397
+
398
+ return "\n".join(md)
399
+
400
+ def _compose_topic_fallback(query: str, topic: str) -> str:
401
+ refs = list_refs(topic_refs(topic))
402
+ headline = {
403
+ "sdn": "Azure SDN — Overview",
404
+ "migration": "Azure VMware / Azure Migrate — Overview",
405
+ "dr": "Azure Site Recovery (DR) — Overview",
406
+ "security": "Security & Governance in Azure — Overview",
407
+ "cost": "Cost Optimization in Azure — Overview",
408
+ "general": "Overview",
409
+ }[topic]
410
+
411
+ md = [
412
+ f"### {headline}",
413
+ f"**Your question:** {query}",
414
+ "",
415
+ "**Definition/Context:**",
416
+ "- What it is, the problem it solves, and where it runs (Azure / Azure Local).",
417
+ "",
418
+ "**Key capabilities / success factors:**",
419
+ ]
420
+ for step in _topic_steps(topic):
421
+ md.append(f"- {step}")
422
+
423
+ md += [
424
+ "",
425
+ "**Notes & caveats:**",
426
+ "- Validate limits and prerequisites for your environment.",
427
+ "- Align with governance and security baselines.",
428
+ "- Pilot before broad rollout.",
429
+ "",
430
+ f"**Trusted sources:** {refs}",
431
+ ]
432
+ return "\n".join(md)
433
 
434
 
435
  # =========================
436
+ # Main Answer Function (no SDN bias)
437
  # =========================
438
 
439
  def answer_faq_or_approach_detailed(
 
447
  if not q:
448
  return "Please enter a question."
449
 
450
+ topic = detect_topic(q)
 
 
 
451
 
452
+ # 1) Seeded FAQs → detailed plan (only for migration-like questions)
453
  q_tokens = set(tokenize(q))
454
  for item in FAQ_SEEDS:
455
  seed_tokens = set(tokenize(item["q"]))
456
+ # require at least one migration-specific token to avoid hijacking definitional questions
457
+ if not ({"migrate", "migration", "hcx", "avs"} & q_tokens):
458
+ continue
459
  if seed_tokens and (len(seed_tokens & q_tokens) / float(len(seed_tokens))) >= 0.5:
460
  return (
461
  "### Answer (detailed)\n"
 
478
  "excerpt": excerpt
479
  })
480
  if snippets:
481
+ return _compose_detailed_from_snippets(q, snippets, topic)
482
+
483
+ # 3) Topic-aware fallback (no SDN unless you asked about SDN)
484
+ return _compose_topic_fallback(q, topic)
 
 
 
 
 
 
 
 
 
 
485
 
486
 
487
  # =========================
 
511
  with gr.Blocks(title="VMware → Azure Migration Assistant", fill_height=True) as demo:
512
  gr.Markdown(
513
  "## VMware On-Prem → Azure Local Migration Assistant\n"
514
+ "- Upload documents (PDF/DOCX/TXT/MD)\n"
515
+ "- Click **Build Index**\n"
516
+ "- Ask a question. Answers are **detailed** and **topic-relevant**\n"
517
  )
518
 
519
  with gr.Row():
 
526
  build_btn = gr.Button("Build Index", variant="primary")
527
 
528
  with gr.Column(scale=3):
529
+ question = gr.Textbox(label="Ask a question", placeholder="e.g., What is Azure Arc-enabled SDN? or What's the best way to minimize downtime for our AVS migration?")
530
  use_docs = gr.Checkbox(label="Use uploaded docs (RAG)", value=True)
531
  ask_btn = gr.Button("Ask", variant="primary")
532
  answer_box = gr.Markdown("")