Spaces:

axelsirota
/

metrics-explainer

Build error

App Files Files Community

metrics-explainer / app.py

axelsirota

Upload folder using huggingface_hub

c0c59bd verified about 1 month ago

raw

history blame contribute delete

7.24 kB

	"""
	Metrics Explainer — AI for Product Managers
	Interactive confusion matrix with $ cost of errors.
	"""

	import gradio as gr
	import plotly.graph_objects as go
	import numpy as np

	# ── Pre-loaded Scenarios ──────────────────────────────────────────────────────

	SCENARIOS = {
	"Custom": {"tp": 100, "fp": 20, "fn": 10, "tn": 870, "fp_cost": 50, "fn_cost": 5000, "tp_revenue": 0},
	"Fraud Detection": {"tp": 950, "fp": 407, "fn": 50, "tn": 98593, "fp_cost": 50, "fn_cost": 5000, "tp_revenue": 5000},
	"Cancer Screening": {"tp": 90, "fp": 150, "fn": 10, "tn": 9750, "fp_cost": 500, "fn_cost": 100000, "tp_revenue": 0},
	"Spam Filter": {"tp": 800, "fp": 5, "fn": 200, "tn": 9000, "fp_cost": 1000, "fn_cost": 0.10, "tp_revenue": 0},
	"Credit Approval": {"tp": 450, "fp": 50, "fn": 30, "tn": 470, "fp_cost": 500, "fn_cost": 5000, "tp_revenue": 200},
	}


	def load_scenario(name):
	s = SCENARIOS.get(name, SCENARIOS["Custom"])
	return s["tp"], s["fp"], s["fn"], s["tn"], s["fp_cost"], s["fn_cost"], s["tp_revenue"]


	def calculate_metrics(tp, fp, fn, tn, fp_cost, fn_cost, tp_revenue):
	tp, fp, fn, tn = int(tp), int(fp), int(fn), int(tn)
	total = tp + fp + fn + tn

	# Metrics
	accuracy = (tp + tn) / total if total > 0 else 0
	precision = tp / (tp + fp) if (tp + fp) > 0 else 0
	recall = tp / (tp + fn) if (tp + fn) > 0 else 0
	f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
	specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

	# Business impact
	total_fp_cost = fp * fp_cost
	total_fn_cost = fn * fn_cost
	total_saved = tp * tp_revenue
	total_error_cost = total_fp_cost + total_fn_cost
	net_impact = total_saved - total_error_cost

	# Confusion matrix heatmap
	cm = np.array([[tp, fp], [fn, tn]])
	labels = [
	[f"TP: {tp}<br>Correctly caught", f"FP: {fp}<br>False alarm"],
	[f"FN: {fn}<br>Missed!", f"TN: {tn}<br>Correctly cleared"]
	]
	colors = [[0.7, 0.2], [0.3, 0.6]] # green-ish for TP/TN, red-ish for FP/FN

	fig = go.Figure(data=go.Heatmap(
	z=colors,
	text=[[labels[0][0], labels[0][1]], [labels[1][0], labels[1][1]]],
	texttemplate="%{text}",
	textfont={"size": 14},
	colorscale=[[0, "#fecaca"], [0.4, "#fed7aa"], [0.6, "#bbf7d0"], [1, "#bbf7d0"]],
	showscale=False,
	xgap=3, ygap=3
	))
	fig.update_layout(
	title="Confusion Matrix",
	xaxis=dict(tickvals=[0, 1], ticktext=["Predicted Positive", "Predicted Negative"], side="top"),
	yaxis=dict(tickvals=[0, 1], ticktext=["Actually Positive", "Actually Negative"]),
	height=350, width=450,
	margin=dict(l=20, r=20, t=80, b=20)
	)

	# Metrics bar chart
	metric_names = ["Accuracy", "Precision", "Recall", "F1", "Specificity"]
	metric_vals = [accuracy, precision, recall, f1, specificity]
	colors_bar = ["#6b7280", "#3b82f6", "#10b981", "#8b5cf6", "#f59e0b"]

	fig_metrics = go.Figure(go.Bar(
	x=metric_names, y=metric_vals,
	marker_color=colors_bar,
	text=[f"{v:.1%}" for v in metric_vals],
	textposition="outside"
	))
	fig_metrics.update_layout(
	title="Model Metrics",
	yaxis=dict(range=[0, 1.15], tickformat=".0%"),
	height=350,
	margin=dict(l=20, r=20, t=50, b=30)
	)

	# Recommendation
	if fn_cost > fp_cost * 10:
	rec = "Optimize for RECALL — missed cases cost far more than false alarms."
	rec_color = "#10b981"
	elif fp_cost > fn_cost * 10:
	rec = "Optimize for PRECISION — false alarms are the bigger cost."
	rec_color = "#3b82f6"
	else:
	rec = "Optimize for F1 — both error types have similar costs."
	rec_color = "#8b5cf6"

	# Summary text
	summary = f"""## Metrics Summary

	\| Metric \| Value \|
	\|--------\|-------\|
	\| Accuracy \| {accuracy:.1%} \|
	\| Precision \| {precision:.1%} \|
	\| Recall \| {recall:.1%} \|
	\| F1 Score \| {f1:.1%} \|
	\| Specificity \| {specificity:.1%} \|

	## Business Impact

	\| Item \| Amount \|
	\|------\|--------\|
	\| False Positive Cost \| {fp} x ${fp_cost:,.0f} = ${total_fp_cost:,.0f} \|
	\| False Negative Cost \| {fn} x ${fn_cost:,.0f} = ${total_fn_cost:,.0f} \|
	\| Total Error Cost \| ${total_error_cost:,.0f} \|
	\| Value Captured (TP) \| {tp} x ${tp_revenue:,.0f} = ${total_saved:,.0f} \|
	\| Net Impact \| ${net_impact:,.0f} \|

	## Recommendation

	{rec}
	"""
	return fig, fig_metrics, summary


	# ── Gradio UI ─────────────────────────────────────────────────────────────────

	with gr.Blocks(title="Metrics Explainer", theme=gr.themes.Soft(primary_hue="blue")) as demo:
	gr.Markdown(
	"# Metrics Explainer\n\n"
	"PM Decision: When your team reports 'accuracy is 95%,' ask: what's the cost "
	"of the 5% errors? This tool helps you translate technical metrics into dollar "
	"amounts stakeholders understand. Use it to decide whether to optimize for precision or recall.\n\n"
	"Adjust the confusion matrix and costs to see how metrics and business impact change. "
	"Every metric is a business decision."
	)

	scenario_dd = gr.Dropdown(
	choices=list(SCENARIOS.keys()),
	value="Fraud Detection",
	label="Load Scenario"
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Confusion Matrix Counts")
	tp = gr.Slider(0, 5000, value=950, step=1, label="True Positives (correctly caught)")
	fp = gr.Slider(0, 5000, value=407, step=1, label="False Positives (false alarms)")
	fn = gr.Slider(0, 5000, value=50, step=1, label="False Negatives (missed!)")
	tn = gr.Slider(0, 100000, value=98593, step=1, label="True Negatives (correctly cleared)")

	with gr.Column():
	gr.Markdown("### Business Costs ($)")
	fp_cost = gr.Number(value=50, label="Cost per False Positive ($)")
	fn_cost = gr.Number(value=5000, label="Cost per False Negative ($)")
	tp_revenue = gr.Number(value=5000, label="Revenue per True Positive ($)")

	calc_btn = gr.Button("Calculate", variant="primary")

	with gr.Row():
	cm_plot = gr.Plot(label="Confusion Matrix")
	metrics_plot = gr.Plot(label="Metrics")

	summary_md = gr.Markdown()

	# Wire events
	scenario_dd.change(load_scenario, [scenario_dd], [tp, fp, fn, tn, fp_cost, fn_cost, tp_revenue])

	inputs = [tp, fp, fn, tn, fp_cost, fn_cost, tp_revenue]
	outputs = [cm_plot, metrics_plot, summary_md]

	calc_btn.click(calculate_metrics, inputs, outputs)

	# Auto-calculate on load
	demo.load(calculate_metrics, inputs, outputs)

	gr.Markdown(
	"---\n"
	"PM Takeaway: Always ask: what does a false positive cost vs a false negative? "
	"The answer determines which metric to optimize and how to set the threshold.\n\n"
	"AI for Product Managers"
	)


	if __name__ == "__main__":
	demo.launch()