| from __future__ import annotations |
|
|
| import numpy as np |
| import streamlit as st |
|
|
| from src.analytics import policy_at_threshold, policy_curve |
| from src.charts import policy_curve_chart |
| from src.formatting import fmt_int, fmt_pct |
| from src.ui import callout, section_title |
|
|
|
|
| class PolicySimulatorViewMixin: |
| """Offline review-policy simulator page.""" |
|
|
| def _page_policy_simulator(self) -> None: |
| section_title("Policy Simulator", "Explore offline review thresholds using an evidence-strength proxy derived from retrieval-side evaluation signals.") |
| c1, c2, c3 = st.columns(3) |
| with c1: |
| threshold_min = st.slider("Curve minimum threshold", 0.00, 0.90, 0.05, step=0.05, key="policy_min") |
| with c2: |
| threshold_max = st.slider("Curve maximum threshold", 0.10, 1.00, 0.95, step=0.05, key="policy_max") |
| with c3: |
| threshold_step = st.select_slider("Curve step", options=[0.01, 0.02, 0.05, 0.10], value=0.05, key="policy_step") |
|
|
| if threshold_max <= threshold_min: |
| threshold_max = min(1.0, threshold_min + threshold_step) |
| callout("warn", "Threshold range adjusted", "Maximum threshold must be greater than minimum threshold.") |
|
|
| threshold_default = min(max(0.55, threshold_min), threshold_max) |
| threshold = st.slider("Selected evidence-strength threshold", threshold_min, threshold_max, threshold_default, step=threshold_step, key="policy_selected") |
| thresholds = np.round(np.arange(threshold_min, threshold_max + threshold_step / 2, threshold_step), 4) |
| curve = policy_curve(self.ctx.filtered_eval, thresholds=thresholds, reference_df=self.data.eval_runs) |
| selected_policy = policy_at_threshold(self.ctx.filtered_eval, threshold, reference_df=self.data.eval_runs) |
| self._render_policy_metrics(selected_policy) |
| self._plot(policy_curve_chart(curve), "policy_curve") |
| callout( |
| "warn", |
| "Offline policy only", |
| "This simulator uses evaluation-time signals and full-dataset normalization anchors. It is not a deployable production gate without live-available features and fresh validation.", |
| ) |
|
|
| @staticmethod |
| def _render_policy_metrics(selected_policy: dict[str, float]) -> None: |
| cols = st.columns(4) |
| specs = [ |
| ("Auto-approve rate", fmt_pct(selected_policy.get("auto_approve_rate", np.nan))), |
| ("Review queue", fmt_int(selected_policy.get("review_queue_size", np.nan))), |
| ("Auto correct rate", fmt_pct(selected_policy.get("auto_correct_rate", np.nan))), |
| ("Risk captured in review", fmt_pct(selected_policy.get("risk_captured_in_review", np.nan))), |
| ] |
| for col, (label, value) in zip(cols, specs, strict=False): |
| with col: |
| st.metric(label, value) |
|
|
|
|