Pete Dunn commited on
Commit
a81f011
·
1 Parent(s): cb489da

Tighten package-sensitive legacy fallback paths

Browse files
backend/app/knowledgebase/core.py CHANGED
@@ -38612,6 +38612,9 @@ class UnifiedKnowledgebaseCore:
38612
  return str(catalog_row.get("display_name") or model_text)
38613
  return str(model_text or "")
38614
 
 
 
 
38615
  def _fallback_recommendation_label(model_text: str, *, include_manufacturer: bool = False) -> str:
38616
  display = _fallback_display(model_text)
38617
  if not include_manufacturer:
@@ -38769,8 +38772,35 @@ class UnifiedKnowledgebaseCore:
38769
  token in query.normalized_message
38770
  for token in ("which replacement path is cleaner", "cleaner path", "cleaner for")
38771
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
38772
  suppress_family_branding = bool(match.get("_family_collapsed") or analysis.get("_family_safe") or family_safe_note)
38773
  has_only_lifecycle_fallback = fallback_source or ((not primary_rows) and (not backup_rows) and bool(lifecycle_fallback_texts or legacy_5g or legacy_4g))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38774
  resolved_subject_label = _norm(
38775
  match.get("_canonical_resolved_label")
38776
  or _as_dict(analysis.get("product")).get("_canonical_resolved_label")
@@ -38803,9 +38833,24 @@ class UnifiedKnowledgebaseCore:
38803
  f"If that normalization is wrong, stop here and send the exact device label before using these recommendations.",
38804
  "",
38805
  ]
38806
- )
38807
  if family_safe_note:
38808
  lines.extend([family_safe_note, ""])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38809
  if (not bool(query.current_only)) or any(
38810
  token in query.normalized_message
38811
  for token in ("legacy", "older", "old", "historical", "similar")
@@ -38977,13 +39022,13 @@ class UnifiedKnowledgebaseCore:
38977
  lines.append("Fallback note: the lifecycle rows below are sourced migration hints, not workbook-ready replacement lanes, so feature fit still needs validation before quoting.")
38978
  lines.append("")
38979
  lines.append("Fallback recommendations:")
38980
- if lifecycle_fallback_texts:
38981
  lines.append(
38982
- f"- Best sourced current path: `{_fallback_display(lifecycle_fallback_texts[0])}`. {_fallback_lineage_note(lifecycle_fallback_texts[0], include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(lifecycle_fallback_texts[0])}."
38983
  )
38984
- if legacy_5g:
38985
  lines.append(f"- Best sourced 5G path: `{_fallback_display(legacy_5g)}`. {_fallback_lineage_note(legacy_5g, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(legacy_5g)}.")
38986
- if legacy_4g:
38987
  lines.append(f"- Best sourced 4G bridge: `{_fallback_display(legacy_4g)}`. {_fallback_lineage_note(legacy_4g, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(legacy_4g, bridge_mode=True)}.")
38988
  lines.append("")
38989
  lines.extend(
@@ -39054,7 +39099,7 @@ class UnifiedKnowledgebaseCore:
39054
  lines.append(
39055
  f"| {lane_label} | {_md_cell(_display_name(row))} | {_md_cell(combined_note)} |"
39056
  )
39057
- else:
39058
  lines.append(
39059
  f"| {'Sourced same-manufacturer backup' if fallback_source else 'Same-manufacturer backup'} | None listed | {_md_cell(_same_brand_empty_note())} |"
39060
  )
@@ -39071,19 +39116,19 @@ class UnifiedKnowledgebaseCore:
39071
  lines.append(
39072
  f"| {'Sourced cross-vendor backup' if fallback_source else 'Cross-vendor backup'} | None listed | {_md_cell(_backup_empty_note())} |"
39073
  )
39074
- if show_sourced_fallback_rows and lifecycle_fallback_texts:
39075
  lines.append(
39076
- f"| Sourced lifecycle fallback | {_md_cell(_fallback_display(lifecycle_fallback_texts[0]))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(lifecycle_fallback_texts[0], include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(lifecycle_fallback_texts[0]))} |"
39077
  )
39078
- elif show_sourced_fallback_rows and (legacy_5g or legacy_4g):
39079
- if legacy_5g and not emitted_requested_5g_path:
39080
  legacy_5g_label = "Sourced lifecycle 5G fallback"
39081
  if asks_5g_replacement:
39082
  legacy_5g_label = "Requested 5G same-brand path" if _fallback_is_same_vendor_5g(legacy_5g) else "Requested 5G path"
39083
  lines.append(
39084
  f"| {legacy_5g_label} | {_md_cell(_fallback_recommendation_label(legacy_5g, include_manufacturer=False))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(legacy_5g, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(legacy_5g))} |"
39085
  )
39086
- if legacy_4g and not requested_5g_priority:
39087
  lines.append(
39088
  f"| Sourced lifecycle 4G bridge | {_md_cell(_fallback_display(legacy_4g))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(legacy_4g, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(legacy_4g, bridge_mode=True))} |"
39089
  )
 
38612
  return str(catalog_row.get("display_name") or model_text)
38613
  return str(model_text or "")
38614
 
38615
+ def _fallback_choice_key(model_text: str) -> str:
38616
+ return _compact_model(_fallback_display(model_text) or model_text)
38617
+
38618
  def _fallback_recommendation_label(model_text: str, *, include_manufacturer: bool = False) -> str:
38619
  display = _fallback_display(model_text)
38620
  if not include_manufacturer:
 
38772
  token in query.normalized_message
38773
  for token in ("which replacement path is cleaner", "cleaner path", "cleaner for")
38774
  )
38775
+ asks_package_confirmation = any(
38776
+ token in query.normalized_message
38777
+ for token in (
38778
+ "exact variant",
38779
+ "exact sku",
38780
+ "exact package",
38781
+ "sku/package",
38782
+ "variant/package",
38783
+ "package still matters",
38784
+ "variant still matters",
38785
+ "sku still matters",
38786
+ )
38787
+ )
38788
  suppress_family_branding = bool(match.get("_family_collapsed") or analysis.get("_family_safe") or family_safe_note)
38789
  has_only_lifecycle_fallback = fallback_source or ((not primary_rows) and (not backup_rows) and bool(lifecycle_fallback_texts or legacy_5g or legacy_4g))
38790
+ lifecycle_fallback_primary = lifecycle_fallback_texts[0] if lifecycle_fallback_texts else ""
38791
+ lifecycle_fallback_primary_key = _fallback_choice_key(lifecycle_fallback_primary)
38792
+ legacy_5g_key = _fallback_choice_key(legacy_5g)
38793
+ legacy_4g_key = _fallback_choice_key(legacy_4g)
38794
+ show_legacy_5g_recommendation = bool(legacy_5g) and (
38795
+ asks_5g_replacement or (not lifecycle_fallback_primary and not primary_rows and not same_manufacturer_backup_rows)
38796
+ )
38797
+ if show_legacy_5g_recommendation and legacy_5g_key and legacy_5g_key == lifecycle_fallback_primary_key:
38798
+ show_legacy_5g_recommendation = False
38799
+ show_legacy_4g_recommendation = bool(legacy_4g)
38800
+ if show_legacy_4g_recommendation and legacy_4g_key and legacy_4g_key == lifecycle_fallback_primary_key:
38801
+ show_legacy_4g_recommendation = False
38802
+ if show_legacy_4g_recommendation and legacy_4g_key and legacy_4g_key == legacy_5g_key:
38803
+ show_legacy_4g_recommendation = False
38804
  resolved_subject_label = _norm(
38805
  match.get("_canonical_resolved_label")
38806
  or _as_dict(analysis.get("product")).get("_canonical_resolved_label")
 
38833
  f"If that normalization is wrong, stop here and send the exact device label before using these recommendations.",
38834
  "",
38835
  ]
38836
+ )
38837
  if family_safe_note:
38838
  lines.extend([family_safe_note, ""])
38839
+ if asks_package_confirmation:
38840
+ if suppress_family_branding or has_only_lifecycle_fallback:
38841
+ lines.extend(
38842
+ [
38843
+ "Exact variant/package still matters: yes. This answer stays family-safe, so confirm the final SKU/package before quote lock.",
38844
+ "",
38845
+ ]
38846
+ )
38847
+ else:
38848
+ lines.extend(
38849
+ [
38850
+ "Exact variant/package still matters: not for the replacement lane itself, but confirm the final SKU/package before quoting feature-sensitive details.",
38851
+ "",
38852
+ ]
38853
+ )
38854
  if (not bool(query.current_only)) or any(
38855
  token in query.normalized_message
38856
  for token in ("legacy", "older", "old", "historical", "similar")
 
39022
  lines.append("Fallback note: the lifecycle rows below are sourced migration hints, not workbook-ready replacement lanes, so feature fit still needs validation before quoting.")
39023
  lines.append("")
39024
  lines.append("Fallback recommendations:")
39025
+ if lifecycle_fallback_primary:
39026
  lines.append(
39027
+ f"- Best sourced current path: `{_fallback_display(lifecycle_fallback_primary)}`. {_fallback_lineage_note(lifecycle_fallback_primary, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(lifecycle_fallback_primary)}."
39028
  )
39029
+ if show_legacy_5g_recommendation:
39030
  lines.append(f"- Best sourced 5G path: `{_fallback_display(legacy_5g)}`. {_fallback_lineage_note(legacy_5g, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(legacy_5g)}.")
39031
+ if show_legacy_4g_recommendation:
39032
  lines.append(f"- Best sourced 4G bridge: `{_fallback_display(legacy_4g)}`. {_fallback_lineage_note(legacy_4g, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(legacy_4g, bridge_mode=True)}.")
39033
  lines.append("")
39034
  lines.extend(
 
39099
  lines.append(
39100
  f"| {lane_label} | {_md_cell(_display_name(row))} | {_md_cell(combined_note)} |"
39101
  )
39102
+ elif not (show_sourced_fallback_rows and (lifecycle_fallback_primary or show_legacy_5g_recommendation or show_legacy_4g_recommendation)):
39103
  lines.append(
39104
  f"| {'Sourced same-manufacturer backup' if fallback_source else 'Same-manufacturer backup'} | None listed | {_md_cell(_same_brand_empty_note())} |"
39105
  )
 
39116
  lines.append(
39117
  f"| {'Sourced cross-vendor backup' if fallback_source else 'Cross-vendor backup'} | None listed | {_md_cell(_backup_empty_note())} |"
39118
  )
39119
+ if show_sourced_fallback_rows and lifecycle_fallback_primary:
39120
  lines.append(
39121
+ f"| Sourced lifecycle fallback | {_md_cell(_fallback_display(lifecycle_fallback_primary))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(lifecycle_fallback_primary, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(lifecycle_fallback_primary))} |"
39122
  )
39123
+ elif show_sourced_fallback_rows and (show_legacy_5g_recommendation or show_legacy_4g_recommendation):
39124
+ if show_legacy_5g_recommendation and not emitted_requested_5g_path:
39125
  legacy_5g_label = "Sourced lifecycle 5G fallback"
39126
  if asks_5g_replacement:
39127
  legacy_5g_label = "Requested 5G same-brand path" if _fallback_is_same_vendor_5g(legacy_5g) else "Requested 5G path"
39128
  lines.append(
39129
  f"| {legacy_5g_label} | {_md_cell(_fallback_recommendation_label(legacy_5g, include_manufacturer=False))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(legacy_5g, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(legacy_5g))} |"
39130
  )
39131
+ if show_legacy_4g_recommendation and not requested_5g_priority:
39132
  lines.append(
39133
  f"| Sourced lifecycle 4G bridge | {_md_cell(_fallback_display(legacy_4g))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(legacy_4g, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(legacy_4g, bridge_mode=True))} |"
39134
  )
backend/app/test_unified_kb_core.py CHANGED
@@ -11677,10 +11677,76 @@ def test_unified_kb_mbr1400_fallback_replacement_prefers_current_fallback_before
11677
  assistant = str(out.get("assistant") or "")
11678
  assert out["meta"]["retrieval_mode"] == "deterministic_router_workbook_replacements"
11679
  assert "1. `Same-brand path` -> `E300`" in assistant
11680
- assert "Best sourced 5G path: `E400`" in assistant
 
11681
  assert "1. `Same-brand path` -> `E400`" not in assistant
11682
  assert "| Sourced same-manufacturer path | E300 |" in assistant
11683
  assert "| Same manufacturer | None workbook-ready |" not in assistant
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11684
 
11685
 
11686
  def test_unified_kb_rut950_replacement_drops_na_backup_and_uses_primary_only_note(tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
 
11677
  assistant = str(out.get("assistant") or "")
11678
  assert out["meta"]["retrieval_mode"] == "deterministic_router_workbook_replacements"
11679
  assert "1. `Same-brand path` -> `E300`" in assistant
11680
+ assert "Best sourced 5G path: `E400`" not in assistant
11681
+ assert "Fallback recommendations:" in assistant
11682
  assert "1. `Same-brand path` -> `E400`" not in assistant
11683
  assert "| Sourced same-manufacturer path | E300 |" in assistant
11684
  assert "| Same manufacturer | None workbook-ready |" not in assistant
11685
+ assert "| Sourced same-manufacturer backup | None listed |" not in assistant
11686
+
11687
+
11688
+ def test_unified_kb_br1_mini_package_sensitive_fallback_answers_variant_question_cleanly(tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
11689
+ core = _build_core_with_loaded_workbook(tmp_path, router_core=_synthetic_workbook_router_core(tmp_path))
11690
+ workbook_core = core._rapid_router_intelligence_core()
11691
+ assert workbook_core is not None
11692
+ monkeypatch.setattr(
11693
+ core,
11694
+ "_router_workbook_resolve_replacement_analysis",
11695
+ lambda _workbook_core, manufacturer_text, product_text: {
11696
+ "ok": True,
11697
+ "resolution_mode": "family_alias_provisional",
11698
+ "analysis": {
11699
+ "match": {
11700
+ "product_key": "prod_peplink_br1_mini_family",
11701
+ "product_id": "BR1MINI",
11702
+ "display_name": "BR1 Mini",
11703
+ "manufacturer_group": "Peplink",
11704
+ "status_bucket": "End of Life",
11705
+ "_requested_label": "BR1 Mini",
11706
+ "_family_collapsed": True,
11707
+ },
11708
+ "replacements": {
11709
+ "primary_candidates": [],
11710
+ "primary_replacement": None,
11711
+ "same_manufacturer_backup_replacements": [],
11712
+ "backup_replacements": [],
11713
+ "historical_only_replacements": [],
11714
+ "review_blocked_count": 0,
11715
+ "candidate_count": 0,
11716
+ "no_replacement": False,
11717
+ },
11718
+ "_replacement_source_mode": "lifecycle_fallback",
11719
+ "_replacement_subject_lifecycle": {
11720
+ "events": [
11721
+ {"recommended_replacement_text": "MAX BR1 Mini 5G"},
11722
+ ]
11723
+ },
11724
+ "_replacement_legacy_lifecycle": {
11725
+ "rep5g": "MAX BR1 Mini 5G",
11726
+ "alt4g": "BR1 Mini LTEA",
11727
+ },
11728
+ "review_required": True,
11729
+ "manual_review_reasons": [],
11730
+ "_family_safe_note": "Requested `BR1 Mini` matched a workbook family row; exact variant still needs confirmation.",
11731
+ },
11732
+ },
11733
+ )
11734
+
11735
+ out = core.handle_message(
11736
+ "Give the same-brand and backup upgrade path for BR1 Mini, and say if the exact variant/package still matters.",
11737
+ {},
11738
+ mode="router_lifecycle",
11739
+ audience="auto",
11740
+ show_citations=True,
11741
+ )
11742
+
11743
+ assistant = str(out.get("assistant") or "")
11744
+ assert out["meta"]["retrieval_mode"] == "deterministic_router_workbook_replacements"
11745
+ assert "Exact variant/package still matters: yes." in assistant
11746
+ assert "Best sourced current path:" in assistant
11747
+ assert "Best sourced 5G path:" not in assistant
11748
+ assert "Best sourced 4G bridge:" in assistant
11749
+ assert "| Sourced same-manufacturer backup | None listed |" not in assistant
11750
 
11751
 
11752
  def test_unified_kb_rut950_replacement_drops_na_backup_and_uses_primary_only_note(tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
docs/testing/router_regression_packs/README.md CHANGED
@@ -161,3 +161,42 @@ Scored outputs will land next to the chosen `--out-csv` path:
161
  How to use the expectations file:
162
  - Treat `router_compare_family_edge_expected.json` as the behavior snapshot for these compares.
163
  - If the scorer passes but the answer still collapses models, drifts to sparse alias rows, or hides behind generic snippet noise instead of a family-safe compare or conservative clarification, treat that as a real regression.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  How to use the expectations file:
162
  - Treat `router_compare_family_edge_expected.json` as the behavior snapshot for these compares.
163
  - If the scorer passes but the answer still collapses models, drifts to sparse alias rows, or hides behind generic snippet noise instead of a family-safe compare or conservative clarification, treat that as a real regression.
164
+
165
+ ### Router Package-Sensitive Legacy Edge Pack
166
+
167
+ Files:
168
+ - `router_package_sensitive_legacy_edge_pack.csv`
169
+ - `router_package_sensitive_legacy_edge_expected.json`
170
+
171
+ Purpose:
172
+ - Track legacy or package-sensitive replacement prompts where the app can drift from workbook-backed current paths, overstate bundle-level certainty, or fall back into generic lifecycle/details wording.
173
+ - Re-run them against hosted canary without perturbing the broader release-gate packs.
174
+
175
+ Current legacy/package-sensitive targets:
176
+ - `BR1 Mini`
177
+ - `MBR1400`
178
+ - `CBA850`
179
+ - `CBA250`
180
+ - `AER1650`
181
+ - `C1101-4P`
182
+
183
+ Recommended hosted canary run:
184
+
185
+ ```bash
186
+ cd backend
187
+ python3 scripts/run_router_canary_ab_eval_shard.py \
188
+ --in-csv ../docs/testing/router_regression_packs/router_package_sensitive_legacy_edge_pack.csv \
189
+ --out-csv /tmp/router_package_sensitive_legacy_edge_results.csv \
190
+ --base-url https://crazycrazypete-masters-four-tab-openai-canary.hf.space \
191
+ --auth-env-file ../frontend/.env.e2e \
192
+ --expected-git-sha <live-canary-sha> \
193
+ --score-after
194
+ ```
195
+
196
+ Scored outputs will land next to the chosen `--out-csv` path:
197
+ - `/tmp/router_package_sensitive_legacy_edge_results_scored.csv`
198
+ - `/tmp/router_package_sensitive_legacy_edge_results_scored_summary.json`
199
+
200
+ How to use the expectations file:
201
+ - Treat `router_package_sensitive_legacy_edge_expected.json` as the behavior snapshot for this package-sensitive legacy wave.
202
+ - A conservative family-safe answer is acceptable when the workbook still requires exact package confirmation, but it should not invent bundle-level certainty or hide the current workbook-backed path.
docs/testing/router_regression_packs/router_package_sensitive_legacy_edge_expected.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "package_legacy_001": {
3
+ "models": ["BR1 Mini"],
4
+ "expected_behavior": "package_sensitive_same_brand_replacement_with_variant_caution",
5
+ "must_not": [
6
+ "pretend one BR1 Mini package is universally correct",
7
+ "generic lifecycle-only answer with no replacement guidance"
8
+ ],
9
+ "notes": [
10
+ "A current workbook-backed path is acceptable if package caution stays visible.",
11
+ "A conservative clarify-first answer is acceptable if the workbook is still too variant-sensitive."
12
+ ]
13
+ },
14
+ "package_legacy_002": {
15
+ "models": ["MBR1400"],
16
+ "expected_behavior": "primary_same_brand_fallback_e300_before_5g_lane",
17
+ "must_not": [
18
+ "E400 outranking E300 as the primary same-brand path",
19
+ "contradictory same-brand-none-workbook-ready wording"
20
+ ],
21
+ "notes": [
22
+ "This is a replacement-ordering guardrail.",
23
+ "Backup path can still exist, but E300 should stay primary."
24
+ ]
25
+ },
26
+ "package_legacy_003": {
27
+ "models": ["CBA850"],
28
+ "expected_behavior": "current_family_safe_same_brand_upgrade_with_package_caution",
29
+ "must_not": [
30
+ "false exact-bundle certainty",
31
+ "historical-only replacement presented as the current path"
32
+ ],
33
+ "notes": [
34
+ "Family-safe replacement guidance is acceptable here.",
35
+ "The answer should say exact package/SKU can still matter."
36
+ ]
37
+ },
38
+ "package_legacy_004": {
39
+ "models": ["CBA250"],
40
+ "expected_behavior": "current_path_s700_not_historical_r980",
41
+ "must_not": [
42
+ "R980 displacing S700 as the current workbook-backed target",
43
+ "unclear current-vs-historical ordering"
44
+ ],
45
+ "notes": [
46
+ "The answer should keep the current target explicit.",
47
+ "Historical paths may still be mentioned if clearly labeled."
48
+ ]
49
+ },
50
+ "package_legacy_005": {
51
+ "models": ["AER1650"],
52
+ "expected_behavior": "replacement_lane_surfaces_e300_primary",
53
+ "must_not": [
54
+ "generic lifecycle/details answer with no replacement path",
55
+ "E400-only drift"
56
+ ],
57
+ "notes": [
58
+ "This stays a replacement-lane routing guardrail.",
59
+ "The answer should remain workbook-backed."
60
+ ]
61
+ },
62
+ "package_legacy_006": {
63
+ "models": ["C1101-4P"],
64
+ "expected_behavior": "same_brand_primary_row_c1121_4p",
65
+ "must_not": [
66
+ "generic details fallback",
67
+ "cross-vendor replacement suggestion"
68
+ ],
69
+ "notes": [
70
+ "The answer should keep the same-brand workbook primary row visible.",
71
+ "Current-path framing should be explicit."
72
+ ]
73
+ }
74
+ }
docs/testing/router_regression_packs/router_package_sensitive_legacy_edge_pack.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ exported_at,global_index,shard_id,shard_case_index,case_id,family,subfamily,difficulty,mode_hint,audience,setup_kind,run_readiness,setup_summary,api_endpoint,api_method,api_payload_template_json,conversation_steps_json,prompt,judge_focus,notes,run_status,http_status,request_id,latency_ms,response_assistant,response_sources_json,response_files_json,response_meta_json,response_state_json,response_error,judge_model,judge_fact_score,judge_instruction_score,judge_coverage_score,judge_readability_score,judge_safety_score,judge_overall_score,judge_grade,judge_pass,judge_issues_json,judge_rationale
2
+ 2026-04-02T02:12:00+00:00,1,package_sensitive_legacy_pack,1,package_legacy_001,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""Give the same-brand and backup upgrade path for BR1 Mini, and say if the exact variant/package still matters."", ""mode"": null, ""request_id"": ""router-package-pack-001"", ""show_citations"": true, ""state"": {}}","[""Give the same-brand and backup upgrade path for BR1 Mini, and say if the exact variant/package still matters.""]","Give the same-brand and backup upgrade path for BR1 Mini, and say if the exact variant/package still matters.","Verify the answer stays conservative for BR1 Mini package-sensitive ambiguity, surfaces current workbook-backed replacement guidance if available, and explicitly says whether exact package confirmation still matters.","Package-sensitive Peplink replacement probe.",,,,,,,,,,,,,,,,,,
3
+ 2026-04-02T02:12:00+00:00,2,package_sensitive_legacy_pack,2,package_legacy_002,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""What is the primary same-brand replacement and backup path for MBR1400, workbook-only?"", ""mode"": null, ""request_id"": ""router-package-pack-002"", ""show_citations"": true, ""state"": {}}","[""What is the primary same-brand replacement and backup path for MBR1400, workbook-only?""]","What is the primary same-brand replacement and backup path for MBR1400, workbook-only?","Verify the answer keeps E300 as the primary same-brand fallback path, avoids contradictory E400 drift, and stays workbook-backed.","Legacy Cradlepoint replacement-ordering probe.",,,,,,,,,,,,,,,,,,
4
+ 2026-04-02T02:12:00+00:00,3,package_sensitive_legacy_pack,3,package_legacy_003,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""Give the current same-brand upgrade path for CBA850, and tell me if package/SKU confirmation still matters."", ""mode"": null, ""request_id"": ""router-package-pack-003"", ""show_citations"": true, ""state"": {}}","[""Give the current same-brand upgrade path for CBA850, and tell me if package/SKU confirmation still matters.""]","Give the current same-brand upgrade path for CBA850, and tell me if package/SKU confirmation still matters.","Verify the answer surfaces the current same-brand CBA850 family path conservatively and keeps package/SKU caution visible instead of pretending one exact bundle is universal.","Package-sensitive CBA850 family replacement probe.",,,,,,,,,,,,,,,,,,
5
+ 2026-04-02T02:12:00+00:00,4,package_sensitive_legacy_pack,4,package_legacy_004,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""Give the replacement path for CBA250 and tell me whether the current workbook-backed target is still current."", ""mode"": null, ""request_id"": ""router-package-pack-004"", ""show_citations"": true, ""state"": {}}","[""Give the replacement path for CBA250 and tell me whether the current workbook-backed target is still current.""]","Give the replacement path for CBA250 and tell me whether the current workbook-backed target is still current.","Verify the answer keeps S700 as the current path, distinguishes it from historical paths, and does not let R980 displace the current workbook-backed recommendation.","Current-vs-historical replacement ordering probe.",,,,,,,,,,,,,,,,,,
6
+ 2026-04-02T02:12:00+00:00,5,package_sensitive_legacy_pack,5,package_legacy_005,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""Give the same-brand and backup upgrade path for AER1650, workbook-only."", ""mode"": null, ""request_id"": ""router-package-pack-005"", ""show_citations"": true, ""state"": {}}","[""Give the same-brand and backup upgrade path for AER1650, workbook-only.""]","Give the same-brand and backup upgrade path for AER1650, workbook-only.","Verify the answer stays in the replacement lane and surfaces the workbook-backed E300 primary path instead of falling back into generic lifecycle/details copy.","Legacy AirLink replacement routing probe.",,,,,,,,,,,,,,,,,,
7
+ 2026-04-02T02:12:00+00:00,6,package_sensitive_legacy_pack,6,package_legacy_006,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""What is the same-brand upgrade path for C1101-4P, and is the workbook primary row current?"", ""mode"": null, ""request_id"": ""router-package-pack-006"", ""show_citations"": true, ""state"": {}}","[""What is the same-brand upgrade path for C1101-4P, and is the workbook primary row current?""]","What is the same-brand upgrade path for C1101-4P, and is the workbook primary row current?","Verify the answer surfaces the workbook-backed C1121-4P path directly, stays same-brand, and avoids collapsing into generic details/lifecycle fallback.","Legacy Cisco replacement-lane probe.",,,,,,,,,,,,,,,,,,