Pete Dunn commited on
Commit ·
a81f011
1
Parent(s): cb489da
Tighten package-sensitive legacy fallback paths
Browse files- backend/app/knowledgebase/core.py +56 -11
- backend/app/test_unified_kb_core.py +67 -1
- docs/testing/router_regression_packs/README.md +39 -0
- docs/testing/router_regression_packs/router_package_sensitive_legacy_edge_expected.json +74 -0
- docs/testing/router_regression_packs/router_package_sensitive_legacy_edge_pack.csv +7 -0
backend/app/knowledgebase/core.py
CHANGED
|
@@ -38612,6 +38612,9 @@ class UnifiedKnowledgebaseCore:
|
|
| 38612 |
return str(catalog_row.get("display_name") or model_text)
|
| 38613 |
return str(model_text or "")
|
| 38614 |
|
|
|
|
|
|
|
|
|
|
| 38615 |
def _fallback_recommendation_label(model_text: str, *, include_manufacturer: bool = False) -> str:
|
| 38616 |
display = _fallback_display(model_text)
|
| 38617 |
if not include_manufacturer:
|
|
@@ -38769,8 +38772,35 @@ class UnifiedKnowledgebaseCore:
|
|
| 38769 |
token in query.normalized_message
|
| 38770 |
for token in ("which replacement path is cleaner", "cleaner path", "cleaner for")
|
| 38771 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38772 |
suppress_family_branding = bool(match.get("_family_collapsed") or analysis.get("_family_safe") or family_safe_note)
|
| 38773 |
has_only_lifecycle_fallback = fallback_source or ((not primary_rows) and (not backup_rows) and bool(lifecycle_fallback_texts or legacy_5g or legacy_4g))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38774 |
resolved_subject_label = _norm(
|
| 38775 |
match.get("_canonical_resolved_label")
|
| 38776 |
or _as_dict(analysis.get("product")).get("_canonical_resolved_label")
|
|
@@ -38803,9 +38833,24 @@ class UnifiedKnowledgebaseCore:
|
|
| 38803 |
f"If that normalization is wrong, stop here and send the exact device label before using these recommendations.",
|
| 38804 |
"",
|
| 38805 |
]
|
| 38806 |
-
|
| 38807 |
if family_safe_note:
|
| 38808 |
lines.extend([family_safe_note, ""])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38809 |
if (not bool(query.current_only)) or any(
|
| 38810 |
token in query.normalized_message
|
| 38811 |
for token in ("legacy", "older", "old", "historical", "similar")
|
|
@@ -38977,13 +39022,13 @@ class UnifiedKnowledgebaseCore:
|
|
| 38977 |
lines.append("Fallback note: the lifecycle rows below are sourced migration hints, not workbook-ready replacement lanes, so feature fit still needs validation before quoting.")
|
| 38978 |
lines.append("")
|
| 38979 |
lines.append("Fallback recommendations:")
|
| 38980 |
-
if
|
| 38981 |
lines.append(
|
| 38982 |
-
f"- Best sourced current path: `{_fallback_display(
|
| 38983 |
)
|
| 38984 |
-
if
|
| 38985 |
lines.append(f"- Best sourced 5G path: `{_fallback_display(legacy_5g)}`. {_fallback_lineage_note(legacy_5g, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(legacy_5g)}.")
|
| 38986 |
-
if
|
| 38987 |
lines.append(f"- Best sourced 4G bridge: `{_fallback_display(legacy_4g)}`. {_fallback_lineage_note(legacy_4g, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(legacy_4g, bridge_mode=True)}.")
|
| 38988 |
lines.append("")
|
| 38989 |
lines.extend(
|
|
@@ -39054,7 +39099,7 @@ class UnifiedKnowledgebaseCore:
|
|
| 39054 |
lines.append(
|
| 39055 |
f"| {lane_label} | {_md_cell(_display_name(row))} | {_md_cell(combined_note)} |"
|
| 39056 |
)
|
| 39057 |
-
|
| 39058 |
lines.append(
|
| 39059 |
f"| {'Sourced same-manufacturer backup' if fallback_source else 'Same-manufacturer backup'} | None listed | {_md_cell(_same_brand_empty_note())} |"
|
| 39060 |
)
|
|
@@ -39071,19 +39116,19 @@ class UnifiedKnowledgebaseCore:
|
|
| 39071 |
lines.append(
|
| 39072 |
f"| {'Sourced cross-vendor backup' if fallback_source else 'Cross-vendor backup'} | None listed | {_md_cell(_backup_empty_note())} |"
|
| 39073 |
)
|
| 39074 |
-
if show_sourced_fallback_rows and
|
| 39075 |
lines.append(
|
| 39076 |
-
f"| Sourced lifecycle fallback | {_md_cell(_fallback_display(
|
| 39077 |
)
|
| 39078 |
-
elif show_sourced_fallback_rows and (
|
| 39079 |
-
if
|
| 39080 |
legacy_5g_label = "Sourced lifecycle 5G fallback"
|
| 39081 |
if asks_5g_replacement:
|
| 39082 |
legacy_5g_label = "Requested 5G same-brand path" if _fallback_is_same_vendor_5g(legacy_5g) else "Requested 5G path"
|
| 39083 |
lines.append(
|
| 39084 |
f"| {legacy_5g_label} | {_md_cell(_fallback_recommendation_label(legacy_5g, include_manufacturer=False))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(legacy_5g, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(legacy_5g))} |"
|
| 39085 |
)
|
| 39086 |
-
if
|
| 39087 |
lines.append(
|
| 39088 |
f"| Sourced lifecycle 4G bridge | {_md_cell(_fallback_display(legacy_4g))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(legacy_4g, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(legacy_4g, bridge_mode=True))} |"
|
| 39089 |
)
|
|
|
|
| 38612 |
return str(catalog_row.get("display_name") or model_text)
|
| 38613 |
return str(model_text or "")
|
| 38614 |
|
| 38615 |
+
def _fallback_choice_key(model_text: str) -> str:
|
| 38616 |
+
return _compact_model(_fallback_display(model_text) or model_text)
|
| 38617 |
+
|
| 38618 |
def _fallback_recommendation_label(model_text: str, *, include_manufacturer: bool = False) -> str:
|
| 38619 |
display = _fallback_display(model_text)
|
| 38620 |
if not include_manufacturer:
|
|
|
|
| 38772 |
token in query.normalized_message
|
| 38773 |
for token in ("which replacement path is cleaner", "cleaner path", "cleaner for")
|
| 38774 |
)
|
| 38775 |
+
asks_package_confirmation = any(
|
| 38776 |
+
token in query.normalized_message
|
| 38777 |
+
for token in (
|
| 38778 |
+
"exact variant",
|
| 38779 |
+
"exact sku",
|
| 38780 |
+
"exact package",
|
| 38781 |
+
"sku/package",
|
| 38782 |
+
"variant/package",
|
| 38783 |
+
"package still matters",
|
| 38784 |
+
"variant still matters",
|
| 38785 |
+
"sku still matters",
|
| 38786 |
+
)
|
| 38787 |
+
)
|
| 38788 |
suppress_family_branding = bool(match.get("_family_collapsed") or analysis.get("_family_safe") or family_safe_note)
|
| 38789 |
has_only_lifecycle_fallback = fallback_source or ((not primary_rows) and (not backup_rows) and bool(lifecycle_fallback_texts or legacy_5g or legacy_4g))
|
| 38790 |
+
lifecycle_fallback_primary = lifecycle_fallback_texts[0] if lifecycle_fallback_texts else ""
|
| 38791 |
+
lifecycle_fallback_primary_key = _fallback_choice_key(lifecycle_fallback_primary)
|
| 38792 |
+
legacy_5g_key = _fallback_choice_key(legacy_5g)
|
| 38793 |
+
legacy_4g_key = _fallback_choice_key(legacy_4g)
|
| 38794 |
+
show_legacy_5g_recommendation = bool(legacy_5g) and (
|
| 38795 |
+
asks_5g_replacement or (not lifecycle_fallback_primary and not primary_rows and not same_manufacturer_backup_rows)
|
| 38796 |
+
)
|
| 38797 |
+
if show_legacy_5g_recommendation and legacy_5g_key and legacy_5g_key == lifecycle_fallback_primary_key:
|
| 38798 |
+
show_legacy_5g_recommendation = False
|
| 38799 |
+
show_legacy_4g_recommendation = bool(legacy_4g)
|
| 38800 |
+
if show_legacy_4g_recommendation and legacy_4g_key and legacy_4g_key == lifecycle_fallback_primary_key:
|
| 38801 |
+
show_legacy_4g_recommendation = False
|
| 38802 |
+
if show_legacy_4g_recommendation and legacy_4g_key and legacy_4g_key == legacy_5g_key:
|
| 38803 |
+
show_legacy_4g_recommendation = False
|
| 38804 |
resolved_subject_label = _norm(
|
| 38805 |
match.get("_canonical_resolved_label")
|
| 38806 |
or _as_dict(analysis.get("product")).get("_canonical_resolved_label")
|
|
|
|
| 38833 |
f"If that normalization is wrong, stop here and send the exact device label before using these recommendations.",
|
| 38834 |
"",
|
| 38835 |
]
|
| 38836 |
+
)
|
| 38837 |
if family_safe_note:
|
| 38838 |
lines.extend([family_safe_note, ""])
|
| 38839 |
+
if asks_package_confirmation:
|
| 38840 |
+
if suppress_family_branding or has_only_lifecycle_fallback:
|
| 38841 |
+
lines.extend(
|
| 38842 |
+
[
|
| 38843 |
+
"Exact variant/package still matters: yes. This answer stays family-safe, so confirm the final SKU/package before quote lock.",
|
| 38844 |
+
"",
|
| 38845 |
+
]
|
| 38846 |
+
)
|
| 38847 |
+
else:
|
| 38848 |
+
lines.extend(
|
| 38849 |
+
[
|
| 38850 |
+
"Exact variant/package still matters: not for the replacement lane itself, but confirm the final SKU/package before quoting feature-sensitive details.",
|
| 38851 |
+
"",
|
| 38852 |
+
]
|
| 38853 |
+
)
|
| 38854 |
if (not bool(query.current_only)) or any(
|
| 38855 |
token in query.normalized_message
|
| 38856 |
for token in ("legacy", "older", "old", "historical", "similar")
|
|
|
|
| 39022 |
lines.append("Fallback note: the lifecycle rows below are sourced migration hints, not workbook-ready replacement lanes, so feature fit still needs validation before quoting.")
|
| 39023 |
lines.append("")
|
| 39024 |
lines.append("Fallback recommendations:")
|
| 39025 |
+
if lifecycle_fallback_primary:
|
| 39026 |
lines.append(
|
| 39027 |
+
f"- Best sourced current path: `{_fallback_display(lifecycle_fallback_primary)}`. {_fallback_lineage_note(lifecycle_fallback_primary, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(lifecycle_fallback_primary)}."
|
| 39028 |
)
|
| 39029 |
+
if show_legacy_5g_recommendation:
|
| 39030 |
lines.append(f"- Best sourced 5G path: `{_fallback_display(legacy_5g)}`. {_fallback_lineage_note(legacy_5g, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(legacy_5g)}.")
|
| 39031 |
+
if show_legacy_4g_recommendation:
|
| 39032 |
lines.append(f"- Best sourced 4G bridge: `{_fallback_display(legacy_4g)}`. {_fallback_lineage_note(legacy_4g, include_manufacturer=False)}. Tradeoff: {_fallback_tradeoff(legacy_4g, bridge_mode=True)}.")
|
| 39033 |
lines.append("")
|
| 39034 |
lines.extend(
|
|
|
|
| 39099 |
lines.append(
|
| 39100 |
f"| {lane_label} | {_md_cell(_display_name(row))} | {_md_cell(combined_note)} |"
|
| 39101 |
)
|
| 39102 |
+
elif not (show_sourced_fallback_rows and (lifecycle_fallback_primary or show_legacy_5g_recommendation or show_legacy_4g_recommendation)):
|
| 39103 |
lines.append(
|
| 39104 |
f"| {'Sourced same-manufacturer backup' if fallback_source else 'Same-manufacturer backup'} | None listed | {_md_cell(_same_brand_empty_note())} |"
|
| 39105 |
)
|
|
|
|
| 39116 |
lines.append(
|
| 39117 |
f"| {'Sourced cross-vendor backup' if fallback_source else 'Cross-vendor backup'} | None listed | {_md_cell(_backup_empty_note())} |"
|
| 39118 |
)
|
| 39119 |
+
if show_sourced_fallback_rows and lifecycle_fallback_primary:
|
| 39120 |
lines.append(
|
| 39121 |
+
f"| Sourced lifecycle fallback | {_md_cell(_fallback_display(lifecycle_fallback_primary))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(lifecycle_fallback_primary, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(lifecycle_fallback_primary))} |"
|
| 39122 |
)
|
| 39123 |
+
elif show_sourced_fallback_rows and (show_legacy_5g_recommendation or show_legacy_4g_recommendation):
|
| 39124 |
+
if show_legacy_5g_recommendation and not emitted_requested_5g_path:
|
| 39125 |
legacy_5g_label = "Sourced lifecycle 5G fallback"
|
| 39126 |
if asks_5g_replacement:
|
| 39127 |
legacy_5g_label = "Requested 5G same-brand path" if _fallback_is_same_vendor_5g(legacy_5g) else "Requested 5G path"
|
| 39128 |
lines.append(
|
| 39129 |
f"| {legacy_5g_label} | {_md_cell(_fallback_recommendation_label(legacy_5g, include_manufacturer=False))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(legacy_5g, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(legacy_5g))} |"
|
| 39130 |
)
|
| 39131 |
+
if show_legacy_4g_recommendation and not requested_5g_priority:
|
| 39132 |
lines.append(
|
| 39133 |
f"| Sourced lifecycle 4G bridge | {_md_cell(_fallback_display(legacy_4g))} | {_md_cell('Lifecycle mapping fallback only; ' + _fallback_lineage_note(legacy_4g, include_manufacturer=False) + '; tradeoff: ' + _fallback_tradeoff(legacy_4g, bridge_mode=True))} |"
|
| 39134 |
)
|
backend/app/test_unified_kb_core.py
CHANGED
|
@@ -11677,10 +11677,76 @@ def test_unified_kb_mbr1400_fallback_replacement_prefers_current_fallback_before
|
|
| 11677 |
assistant = str(out.get("assistant") or "")
|
| 11678 |
assert out["meta"]["retrieval_mode"] == "deterministic_router_workbook_replacements"
|
| 11679 |
assert "1. `Same-brand path` -> `E300`" in assistant
|
| 11680 |
-
assert "Best sourced 5G path: `E400`" in assistant
|
|
|
|
| 11681 |
assert "1. `Same-brand path` -> `E400`" not in assistant
|
| 11682 |
assert "| Sourced same-manufacturer path | E300 |" in assistant
|
| 11683 |
assert "| Same manufacturer | None workbook-ready |" not in assistant
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11684 |
|
| 11685 |
|
| 11686 |
def test_unified_kb_rut950_replacement_drops_na_backup_and_uses_primary_only_note(tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
|
|
|
|
| 11677 |
assistant = str(out.get("assistant") or "")
|
| 11678 |
assert out["meta"]["retrieval_mode"] == "deterministic_router_workbook_replacements"
|
| 11679 |
assert "1. `Same-brand path` -> `E300`" in assistant
|
| 11680 |
+
assert "Best sourced 5G path: `E400`" not in assistant
|
| 11681 |
+
assert "Fallback recommendations:" in assistant
|
| 11682 |
assert "1. `Same-brand path` -> `E400`" not in assistant
|
| 11683 |
assert "| Sourced same-manufacturer path | E300 |" in assistant
|
| 11684 |
assert "| Same manufacturer | None workbook-ready |" not in assistant
|
| 11685 |
+
assert "| Sourced same-manufacturer backup | None listed |" not in assistant
|
| 11686 |
+
|
| 11687 |
+
|
| 11688 |
+
def test_unified_kb_br1_mini_package_sensitive_fallback_answers_variant_question_cleanly(tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
|
| 11689 |
+
core = _build_core_with_loaded_workbook(tmp_path, router_core=_synthetic_workbook_router_core(tmp_path))
|
| 11690 |
+
workbook_core = core._rapid_router_intelligence_core()
|
| 11691 |
+
assert workbook_core is not None
|
| 11692 |
+
monkeypatch.setattr(
|
| 11693 |
+
core,
|
| 11694 |
+
"_router_workbook_resolve_replacement_analysis",
|
| 11695 |
+
lambda _workbook_core, manufacturer_text, product_text: {
|
| 11696 |
+
"ok": True,
|
| 11697 |
+
"resolution_mode": "family_alias_provisional",
|
| 11698 |
+
"analysis": {
|
| 11699 |
+
"match": {
|
| 11700 |
+
"product_key": "prod_peplink_br1_mini_family",
|
| 11701 |
+
"product_id": "BR1MINI",
|
| 11702 |
+
"display_name": "BR1 Mini",
|
| 11703 |
+
"manufacturer_group": "Peplink",
|
| 11704 |
+
"status_bucket": "End of Life",
|
| 11705 |
+
"_requested_label": "BR1 Mini",
|
| 11706 |
+
"_family_collapsed": True,
|
| 11707 |
+
},
|
| 11708 |
+
"replacements": {
|
| 11709 |
+
"primary_candidates": [],
|
| 11710 |
+
"primary_replacement": None,
|
| 11711 |
+
"same_manufacturer_backup_replacements": [],
|
| 11712 |
+
"backup_replacements": [],
|
| 11713 |
+
"historical_only_replacements": [],
|
| 11714 |
+
"review_blocked_count": 0,
|
| 11715 |
+
"candidate_count": 0,
|
| 11716 |
+
"no_replacement": False,
|
| 11717 |
+
},
|
| 11718 |
+
"_replacement_source_mode": "lifecycle_fallback",
|
| 11719 |
+
"_replacement_subject_lifecycle": {
|
| 11720 |
+
"events": [
|
| 11721 |
+
{"recommended_replacement_text": "MAX BR1 Mini 5G"},
|
| 11722 |
+
]
|
| 11723 |
+
},
|
| 11724 |
+
"_replacement_legacy_lifecycle": {
|
| 11725 |
+
"rep5g": "MAX BR1 Mini 5G",
|
| 11726 |
+
"alt4g": "BR1 Mini LTEA",
|
| 11727 |
+
},
|
| 11728 |
+
"review_required": True,
|
| 11729 |
+
"manual_review_reasons": [],
|
| 11730 |
+
"_family_safe_note": "Requested `BR1 Mini` matched a workbook family row; exact variant still needs confirmation.",
|
| 11731 |
+
},
|
| 11732 |
+
},
|
| 11733 |
+
)
|
| 11734 |
+
|
| 11735 |
+
out = core.handle_message(
|
| 11736 |
+
"Give the same-brand and backup upgrade path for BR1 Mini, and say if the exact variant/package still matters.",
|
| 11737 |
+
{},
|
| 11738 |
+
mode="router_lifecycle",
|
| 11739 |
+
audience="auto",
|
| 11740 |
+
show_citations=True,
|
| 11741 |
+
)
|
| 11742 |
+
|
| 11743 |
+
assistant = str(out.get("assistant") or "")
|
| 11744 |
+
assert out["meta"]["retrieval_mode"] == "deterministic_router_workbook_replacements"
|
| 11745 |
+
assert "Exact variant/package still matters: yes." in assistant
|
| 11746 |
+
assert "Best sourced current path:" in assistant
|
| 11747 |
+
assert "Best sourced 5G path:" not in assistant
|
| 11748 |
+
assert "Best sourced 4G bridge:" in assistant
|
| 11749 |
+
assert "| Sourced same-manufacturer backup | None listed |" not in assistant
|
| 11750 |
|
| 11751 |
|
| 11752 |
def test_unified_kb_rut950_replacement_drops_na_backup_and_uses_primary_only_note(tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
|
docs/testing/router_regression_packs/README.md
CHANGED
|
@@ -161,3 +161,42 @@ Scored outputs will land next to the chosen `--out-csv` path:
|
|
| 161 |
How to use the expectations file:
|
| 162 |
- Treat `router_compare_family_edge_expected.json` as the behavior snapshot for these compares.
|
| 163 |
- If the scorer passes but the answer still collapses models, drifts to sparse alias rows, or hides behind generic snippet noise instead of a family-safe compare or conservative clarification, treat that as a real regression.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
How to use the expectations file:
|
| 162 |
- Treat `router_compare_family_edge_expected.json` as the behavior snapshot for these compares.
|
| 163 |
- If the scorer passes but the answer still collapses models, drifts to sparse alias rows, or hides behind generic snippet noise instead of a family-safe compare or conservative clarification, treat that as a real regression.
|
| 164 |
+
|
| 165 |
+
### Router Package-Sensitive Legacy Edge Pack
|
| 166 |
+
|
| 167 |
+
Files:
|
| 168 |
+
- `router_package_sensitive_legacy_edge_pack.csv`
|
| 169 |
+
- `router_package_sensitive_legacy_edge_expected.json`
|
| 170 |
+
|
| 171 |
+
Purpose:
|
| 172 |
+
- Track legacy or package-sensitive replacement prompts where the app can drift from workbook-backed current paths, overstate bundle-level certainty, or fall back into generic lifecycle/details wording.
|
| 173 |
+
- Re-run them against hosted canary without perturbing the broader release-gate packs.
|
| 174 |
+
|
| 175 |
+
Current legacy/package-sensitive targets:
|
| 176 |
+
- `BR1 Mini`
|
| 177 |
+
- `MBR1400`
|
| 178 |
+
- `CBA850`
|
| 179 |
+
- `CBA250`
|
| 180 |
+
- `AER1650`
|
| 181 |
+
- `C1101-4P`
|
| 182 |
+
|
| 183 |
+
Recommended hosted canary run:
|
| 184 |
+
|
| 185 |
+
```bash
|
| 186 |
+
cd backend
|
| 187 |
+
python3 scripts/run_router_canary_ab_eval_shard.py \
|
| 188 |
+
--in-csv ../docs/testing/router_regression_packs/router_package_sensitive_legacy_edge_pack.csv \
|
| 189 |
+
--out-csv /tmp/router_package_sensitive_legacy_edge_results.csv \
|
| 190 |
+
--base-url https://crazycrazypete-masters-four-tab-openai-canary.hf.space \
|
| 191 |
+
--auth-env-file ../frontend/.env.e2e \
|
| 192 |
+
--expected-git-sha <live-canary-sha> \
|
| 193 |
+
--score-after
|
| 194 |
+
```
|
| 195 |
+
|
| 196 |
+
Scored outputs will land next to the chosen `--out-csv` path:
|
| 197 |
+
- `/tmp/router_package_sensitive_legacy_edge_results_scored.csv`
|
| 198 |
+
- `/tmp/router_package_sensitive_legacy_edge_results_scored_summary.json`
|
| 199 |
+
|
| 200 |
+
How to use the expectations file:
|
| 201 |
+
- Treat `router_package_sensitive_legacy_edge_expected.json` as the behavior snapshot for this package-sensitive legacy wave.
|
| 202 |
+
- A conservative family-safe answer is acceptable when the workbook still requires exact package confirmation, but it should not invent bundle-level certainty or hide the current workbook-backed path.
|
docs/testing/router_regression_packs/router_package_sensitive_legacy_edge_expected.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"package_legacy_001": {
|
| 3 |
+
"models": ["BR1 Mini"],
|
| 4 |
+
"expected_behavior": "package_sensitive_same_brand_replacement_with_variant_caution",
|
| 5 |
+
"must_not": [
|
| 6 |
+
"pretend one BR1 Mini package is universally correct",
|
| 7 |
+
"generic lifecycle-only answer with no replacement guidance"
|
| 8 |
+
],
|
| 9 |
+
"notes": [
|
| 10 |
+
"A current workbook-backed path is acceptable if package caution stays visible.",
|
| 11 |
+
"A conservative clarify-first answer is acceptable if the workbook is still too variant-sensitive."
|
| 12 |
+
]
|
| 13 |
+
},
|
| 14 |
+
"package_legacy_002": {
|
| 15 |
+
"models": ["MBR1400"],
|
| 16 |
+
"expected_behavior": "primary_same_brand_fallback_e300_before_5g_lane",
|
| 17 |
+
"must_not": [
|
| 18 |
+
"E400 outranking E300 as the primary same-brand path",
|
| 19 |
+
"contradictory same-brand-none-workbook-ready wording"
|
| 20 |
+
],
|
| 21 |
+
"notes": [
|
| 22 |
+
"This is a replacement-ordering guardrail.",
|
| 23 |
+
"Backup path can still exist, but E300 should stay primary."
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
"package_legacy_003": {
|
| 27 |
+
"models": ["CBA850"],
|
| 28 |
+
"expected_behavior": "current_family_safe_same_brand_upgrade_with_package_caution",
|
| 29 |
+
"must_not": [
|
| 30 |
+
"false exact-bundle certainty",
|
| 31 |
+
"historical-only replacement presented as the current path"
|
| 32 |
+
],
|
| 33 |
+
"notes": [
|
| 34 |
+
"Family-safe replacement guidance is acceptable here.",
|
| 35 |
+
"The answer should say exact package/SKU can still matter."
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
"package_legacy_004": {
|
| 39 |
+
"models": ["CBA250"],
|
| 40 |
+
"expected_behavior": "current_path_s700_not_historical_r980",
|
| 41 |
+
"must_not": [
|
| 42 |
+
"R980 displacing S700 as the current workbook-backed target",
|
| 43 |
+
"unclear current-vs-historical ordering"
|
| 44 |
+
],
|
| 45 |
+
"notes": [
|
| 46 |
+
"The answer should keep the current target explicit.",
|
| 47 |
+
"Historical paths may still be mentioned if clearly labeled."
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
"package_legacy_005": {
|
| 51 |
+
"models": ["AER1650"],
|
| 52 |
+
"expected_behavior": "replacement_lane_surfaces_e300_primary",
|
| 53 |
+
"must_not": [
|
| 54 |
+
"generic lifecycle/details answer with no replacement path",
|
| 55 |
+
"E400-only drift"
|
| 56 |
+
],
|
| 57 |
+
"notes": [
|
| 58 |
+
"This stays a replacement-lane routing guardrail.",
|
| 59 |
+
"The answer should remain workbook-backed."
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
"package_legacy_006": {
|
| 63 |
+
"models": ["C1101-4P"],
|
| 64 |
+
"expected_behavior": "same_brand_primary_row_c1121_4p",
|
| 65 |
+
"must_not": [
|
| 66 |
+
"generic details fallback",
|
| 67 |
+
"cross-vendor replacement suggestion"
|
| 68 |
+
],
|
| 69 |
+
"notes": [
|
| 70 |
+
"The answer should keep the same-brand workbook primary row visible.",
|
| 71 |
+
"Current-path framing should be explicit."
|
| 72 |
+
]
|
| 73 |
+
}
|
| 74 |
+
}
|
docs/testing/router_regression_packs/router_package_sensitive_legacy_edge_pack.csv
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
exported_at,global_index,shard_id,shard_case_index,case_id,family,subfamily,difficulty,mode_hint,audience,setup_kind,run_readiness,setup_summary,api_endpoint,api_method,api_payload_template_json,conversation_steps_json,prompt,judge_focus,notes,run_status,http_status,request_id,latency_ms,response_assistant,response_sources_json,response_files_json,response_meta_json,response_state_json,response_error,judge_model,judge_fact_score,judge_instruction_score,judge_coverage_score,judge_readability_score,judge_safety_score,judge_overall_score,judge_grade,judge_pass,judge_issues_json,judge_rationale
|
| 2 |
+
2026-04-02T02:12:00+00:00,1,package_sensitive_legacy_pack,1,package_legacy_001,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""Give the same-brand and backup upgrade path for BR1 Mini, and say if the exact variant/package still matters."", ""mode"": null, ""request_id"": ""router-package-pack-001"", ""show_citations"": true, ""state"": {}}","[""Give the same-brand and backup upgrade path for BR1 Mini, and say if the exact variant/package still matters.""]","Give the same-brand and backup upgrade path for BR1 Mini, and say if the exact variant/package still matters.","Verify the answer stays conservative for BR1 Mini package-sensitive ambiguity, surfaces current workbook-backed replacement guidance if available, and explicitly says whether exact package confirmation still matters.","Package-sensitive Peplink replacement probe.",,,,,,,,,,,,,,,,,,
|
| 3 |
+
2026-04-02T02:12:00+00:00,2,package_sensitive_legacy_pack,2,package_legacy_002,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""What is the primary same-brand replacement and backup path for MBR1400, workbook-only?"", ""mode"": null, ""request_id"": ""router-package-pack-002"", ""show_citations"": true, ""state"": {}}","[""What is the primary same-brand replacement and backup path for MBR1400, workbook-only?""]","What is the primary same-brand replacement and backup path for MBR1400, workbook-only?","Verify the answer keeps E300 as the primary same-brand fallback path, avoids contradictory E400 drift, and stays workbook-backed.","Legacy Cradlepoint replacement-ordering probe.",,,,,,,,,,,,,,,,,,
|
| 4 |
+
2026-04-02T02:12:00+00:00,3,package_sensitive_legacy_pack,3,package_legacy_003,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""Give the current same-brand upgrade path for CBA850, and tell me if package/SKU confirmation still matters."", ""mode"": null, ""request_id"": ""router-package-pack-003"", ""show_citations"": true, ""state"": {}}","[""Give the current same-brand upgrade path for CBA850, and tell me if package/SKU confirmation still matters.""]","Give the current same-brand upgrade path for CBA850, and tell me if package/SKU confirmation still matters.","Verify the answer surfaces the current same-brand CBA850 family path conservatively and keeps package/SKU caution visible instead of pretending one exact bundle is universal.","Package-sensitive CBA850 family replacement probe.",,,,,,,,,,,,,,,,,,
|
| 5 |
+
2026-04-02T02:12:00+00:00,4,package_sensitive_legacy_pack,4,package_legacy_004,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""Give the replacement path for CBA250 and tell me whether the current workbook-backed target is still current."", ""mode"": null, ""request_id"": ""router-package-pack-004"", ""show_citations"": true, ""state"": {}}","[""Give the replacement path for CBA250 and tell me whether the current workbook-backed target is still current.""]","Give the replacement path for CBA250 and tell me whether the current workbook-backed target is still current.","Verify the answer keeps S700 as the current path, distinguishes it from historical paths, and does not let R980 displace the current workbook-backed recommendation.","Current-vs-historical replacement ordering probe.",,,,,,,,,,,,,,,,,,
|
| 6 |
+
2026-04-02T02:12:00+00:00,5,package_sensitive_legacy_pack,5,package_legacy_005,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""Give the same-brand and backup upgrade path for AER1650, workbook-only."", ""mode"": null, ""request_id"": ""router-package-pack-005"", ""show_citations"": true, ""state"": {}}","[""Give the same-brand and backup upgrade path for AER1650, workbook-only.""]","Give the same-brand and backup upgrade path for AER1650, workbook-only.","Verify the answer stays in the replacement lane and surfaces the workbook-backed E300 primary path instead of falling back into generic lifecycle/details copy.","Legacy AirLink replacement routing probe.",,,,,,,,,,,,,,,,,,
|
| 7 |
+
2026-04-02T02:12:00+00:00,6,package_sensitive_legacy_pack,6,package_legacy_006,router_lifecycle,package_sensitive_replacement,moderate,auto,auto,none,ready,Direct single-turn case. No pre-seeded KB state required.,/api/knowledgebase/message,POST,"{""audience"": ""auto"", ""message"": ""What is the same-brand upgrade path for C1101-4P, and is the workbook primary row current?"", ""mode"": null, ""request_id"": ""router-package-pack-006"", ""show_citations"": true, ""state"": {}}","[""What is the same-brand upgrade path for C1101-4P, and is the workbook primary row current?""]","What is the same-brand upgrade path for C1101-4P, and is the workbook primary row current?","Verify the answer surfaces the workbook-backed C1121-4P path directly, stays same-brand, and avoids collapsing into generic details/lifecycle fallback.","Legacy Cisco replacement-lane probe.",,,,,,,,,,,,,,,,,,
|