Spaces:

tarekmasryo
/

rag-qa-command-cente

Running

rag-qa-command-cente / src /views /policy_simulator.py

Tarek Masryo

chore: update project files

6bef416 about 1 month ago

2.85 kB

	from __future__ import annotations

	import numpy as np
	import streamlit as st

	from src.analytics import policy_at_threshold, policy_curve
	from src.charts import policy_curve_chart
	from src.formatting import fmt_int, fmt_pct
	from src.ui import callout, section_title


	class PolicySimulatorViewMixin:
	"""Offline review-policy simulator page."""

	def _page_policy_simulator(self) -> None:
	section_title("Policy Simulator", "Explore offline review thresholds using an evidence-strength proxy derived from retrieval-side evaluation signals.")
	c1, c2, c3 = st.columns(3)
	with c1:
	threshold_min = st.slider("Curve minimum threshold", 0.00, 0.90, 0.05, step=0.05, key="policy_min")
	with c2:
	threshold_max = st.slider("Curve maximum threshold", 0.10, 1.00, 0.95, step=0.05, key="policy_max")
	with c3:
	threshold_step = st.select_slider("Curve step", options=[0.01, 0.02, 0.05, 0.10], value=0.05, key="policy_step")

	if threshold_max <= threshold_min:
	threshold_max = min(1.0, threshold_min + threshold_step)
	callout("warn", "Threshold range adjusted", "Maximum threshold must be greater than minimum threshold.")

	threshold_default = min(max(0.55, threshold_min), threshold_max)
	threshold = st.slider("Selected evidence-strength threshold", threshold_min, threshold_max, threshold_default, step=threshold_step, key="policy_selected")
	thresholds = np.round(np.arange(threshold_min, threshold_max + threshold_step / 2, threshold_step), 4)
	curve = policy_curve(self.ctx.filtered_eval, thresholds=thresholds, reference_df=self.data.eval_runs)
	selected_policy = policy_at_threshold(self.ctx.filtered_eval, threshold, reference_df=self.data.eval_runs)
	self._render_policy_metrics(selected_policy)
	self._plot(policy_curve_chart(curve), "policy_curve")
	callout(
	"warn",
	"Offline policy only",
	"This simulator uses evaluation-time signals and full-dataset normalization anchors. It is not a deployable production gate without live-available features and fresh validation.",
	)

	@staticmethod
	def _render_policy_metrics(selected_policy: dict[str, float]) -> None:
	cols = st.columns(4)
	specs = [
	("Auto-approve rate", fmt_pct(selected_policy.get("auto_approve_rate", np.nan))),
	("Review queue", fmt_int(selected_policy.get("review_queue_size", np.nan))),
	("Auto correct rate", fmt_pct(selected_policy.get("auto_correct_rate", np.nan))),
	("Risk captured in review", fmt_pct(selected_policy.get("risk_captured_in_review", np.nan))),
	]
	for col, (label, value) in zip(cols, specs, strict=False):
	with col:
	st.metric(label, value)