Spaces:

coredipper
/

operon-escalation-lab

Running

App Files Files Community

operon-escalation-lab / app.py

coredipper

fix: guard parents[2] for HF runtime

d70ca0a verified 2 days ago

raw

history blame contribute delete

11.1 kB

	"""
	Operon Escalation Lab -- Quality-Based Model Escalation
	========================================================

	Interactive demo of the adaptive immune layer:
	VerifierComponent evaluates output quality via a rubric, and
	WatcherComponent escalates from fast -> deep model when quality
	falls below threshold.

	Run locally: pip install gradio && python space-escalation-lab/app.py
	"""

	import sys
	from dataclasses import dataclass
	from pathlib import Path

	import gradio as gr

	try:
	_repo_root = Path(__file__).resolve().parents[2]
	if str(_repo_root) not in sys.path:
	sys.path.insert(0, str(_repo_root))
	except IndexError:
	pass # Running on HF — operon-ai installed via requirements.txt

	from operon_ai import ATP_Store, MockProvider, Nucleus, SkillStage, skill_organism
	from operon_ai.patterns.verifier import VerifierComponent, VerifierConfig
	from operon_ai.patterns.watcher import WatcherComponent, WatcherConfig

	# ---------------------------------------------------------------------------
	# Scenario definitions
	# ---------------------------------------------------------------------------

	@dataclass
	class Scenario:
	name: str
	task: str
	fast_response: str
	deep_response: str
	fast_quality: float # expected quality of fast response
	description: str


	SCENARIOS = {
	"Shallow bug fix": Scenario(
	name="Shallow bug fix",
	task="Fix the login crash after session timeout",
	fast_response="Add try/except around the login call.",
	deep_response=(
	"Root-cause analysis: the session token is not refreshed on 401 "
	"retry. Fix: add token refresh in the retry interceptor with "
	"exponential backoff. Added regression test for expired-token path."
	),
	fast_quality=0.25,
	description="Fast model produces a shallow patch; deep model finds root cause.",
	),
	"Vague summary": Scenario(
	name="Vague summary",
	task="Summarize the Q3 performance report",
	fast_response="Performance was good in Q3.",
	deep_response=(
	"Q3 highlights: revenue up 12% YoY driven by enterprise segment "
	"(+23%). Churn decreased from 4.1% to 3.2% after onboarding "
	"redesign. Two risks: APAC pipeline softening (-8%) and delayed "
	"SOC2 certification (ETA pushed to Q4)."
	),
	fast_quality=0.15,
	description="Fast model gives a vague one-liner; deep model gives structured detail.",
	),
	"Adequate response": Scenario(
	name="Adequate response",
	task="List the three main HTTP status code categories",
	fast_response=(
	"1xx Informational, 2xx Success, 3xx Redirection, 4xx Client Error, "
	"5xx Server Error. The three main categories are 2xx, 4xx, and 5xx."
	),
	deep_response=(
	"The three main HTTP status code categories are 2xx (Success), "
	"4xx (Client Error), and 5xx (Server Error)."
	),
	fast_quality=0.85,
	description="Fast model gives a good enough answer. No escalation expected.",
	),
	}

	# ---------------------------------------------------------------------------
	# Core logic
	# ---------------------------------------------------------------------------

	def _badge(text, color):
	return (f'<span style="background:{color};color:white;padding:3px 10px;'
	f'border-radius:4px;font-size:0.85em;font-weight:600;">{text}</span>')


	def _card(title, content, border_color="#e5e7eb"):
	return (
	f'<div style="border:2px solid {border_color};border-radius:8px;'
	f'margin-bottom:12px;overflow:hidden;">'
	f'<div style="padding:8px 14px;background:{border_color}15;'
	f'border-bottom:1px solid {border_color};">'
	f'<span style="font-weight:700;">{title}</span></div>'
	f'<div style="padding:12px 14px;">{content}</div></div>'
	)


	def run_escalation(scenario_name, threshold):
	scenario = SCENARIOS.get(scenario_name)
	if scenario is None:
	return "<p>Select a scenario.</p>"

	threshold = float(threshold)

	# Build rubric that scores based on output length + specificity
	def rubric(output: str, stage_name: str) -> float:
	if stage_name != "respond":
	return 0.8
	if output == scenario.fast_response:
	return scenario.fast_quality
	return 0.95 # deep response always scores high

	# Build organism
	fast = Nucleus(provider=MockProvider(responses={
	"respond": scenario.fast_response,
	}))
	deep = Nucleus(provider=MockProvider(responses={
	"respond": scenario.deep_response,
	}))

	watcher = WatcherComponent(config=WatcherConfig())
	verifier = VerifierComponent(
	rubric=rubric,
	config=VerifierConfig(quality_low_threshold=threshold),
	)

	org = skill_organism(
	stages=[
	SkillStage(
	name="respond",
	role="Responder",
	instructions="Respond to the task.",
	mode="fixed",
	),
	],
	fast_nucleus=fast,
	deep_nucleus=deep,
	budget=ATP_Store(budget=1000, silent=True),
	components=[watcher, verifier],
	)

	result = org.run(scenario.task)

	# Collect results
	escalated = any(
	i.kind.value == "escalate" for i in watcher.interventions
	)
	fix_scores = [(s, q) for s, q in verifier.quality_scores if s == "respond"]
	initial_quality = fix_scores[0][1] if fix_scores else 0.0

	verifier_signals = [s for s in watcher.signals if s.source == "verifier"]

	# Build HTML output
	html_parts = []

	# Scenario info
	html_parts.append(_card(
	f"Scenario: {scenario.name}",
	f'<p style="color:#6b7280;">{scenario.description}</p>'
	f'<p><b>Task:</b> {scenario.task}</p>'
	f'<p><b>Threshold:</b> {threshold:.2f}</p>',
	"#6366f1",
	))

	# Fast model output
	fast_badge = _badge(f"quality: {initial_quality:.2f}",
	"#ef4444" if initial_quality < threshold else "#22c55e")
	below = initial_quality < threshold
	html_parts.append(_card(
	f"Fast Model Output {fast_badge}",
	f'<p style="font-family:monospace;white-space:pre-wrap;">'
	f'{scenario.fast_response}</p>'
	f'<p style="margin-top:8px;color:#6b7280;">'
	f'{"Below threshold" if below else "Above threshold"} '
	f'({initial_quality:.2f} {"<" if below else ">="} {threshold:.2f})</p>',
	"#ef4444" if below else "#22c55e",
	))

	# Escalation decision
	if escalated:
	intv = watcher.interventions[0]
	html_parts.append(_card(
	f"Watcher Decision: {_badge('ESCALATE', '#f59e0b')}",
	f'<p><b>Reason:</b> {intv.reason}</p>'
	f'<p style="color:#6b7280;">Fast model quality ({initial_quality:.2f}) '
	f'fell below threshold ({threshold:.2f}). '
	f'Watcher escalated to deep model.</p>',
	"#f59e0b",
	))

	html_parts.append(_card(
	f"Deep Model Output {_badge('quality: 0.95', '#22c55e')}",
	f'<p style="font-family:monospace;white-space:pre-wrap;">'
	f'{scenario.deep_response}</p>',
	"#22c55e",
	))
	else:
	html_parts.append(_card(
	f"Watcher Decision: {_badge('NO ESCALATION', '#22c55e')}",
	f'<p>Quality ({initial_quality:.2f}) met threshold ({threshold:.2f}). '
	f'Fast model output accepted.</p>',
	"#22c55e",
	))

	# Final output
	final_badge = _badge("ESCALATED", "#f59e0b") if escalated else _badge("DIRECT", "#22c55e")
	html_parts.append(_card(
	f"Final Output {final_badge}",
	f'<p style="font-family:monospace;white-space:pre-wrap;font-weight:600;">'
	f'{result.final_output}</p>',
	"#3b82f6",
	))

	# Signal trace
	sig_rows = ""
	for sig in verifier_signals:
	q = sig.detail.get("quality", 0)
	bt = sig.detail.get("below_threshold", False)
	status = _badge("BELOW", "#ef4444") if bt else _badge("OK", "#22c55e")
	sig_rows += (
	f'<tr style="border-bottom:1px solid #f3f4f6;">'
	f'<td style="padding:4px 8px;">{sig.stage_name}</td>'
	f'<td style="padding:4px 8px;">{q:.2f}</td>'
	f'<td style="padding:4px 8px;">{sig.value:.2f}</td>'
	f'<td style="padding:4px 8px;">{status}</td></tr>')

	if sig_rows:
	html_parts.append(_card(
	"Signal Trace",
	'<table style="width:100%;border-collapse:collapse;">'
	'<tr style="border-bottom:2px solid #e5e7eb;color:#6b7280;">'
	'<th style="text-align:left;padding:4px 8px;">Stage</th>'
	'<th style="text-align:left;padding:4px 8px;">Quality</th>'
	'<th style="text-align:left;padding:4px 8px;">Severity</th>'
	'<th style="text-align:left;padding:4px 8px;">Status</th></tr>'
	f'{sig_rows}</table>',
	"#8b5cf6",
	))

	return "\n".join(html_parts)


	def load_scenario(name):
	s = SCENARIOS.get(name)
	if s:
	return s.description
	return ""


	# ---------------------------------------------------------------------------
	# Gradio UI
	# ---------------------------------------------------------------------------

	def build_app() -> gr.Blocks:
	with gr.Blocks(title="Operon Escalation Lab") as app:
	gr.Markdown(
	"# Operon Escalation Lab\n"
	"Explore quality-based escalation: the VerifierComponent scores "
	"each stage's output, and the WatcherComponent escalates from the "
	"fast model to the deep model when quality drops below threshold.\n\n"
	"Biological analogy: Innate immunity (Watcher) detects generic anomalies. "
	"Adaptive immunity (Verifier) evaluates against a specific rubric.\n\n"
	"[GitHub](https://github.com/coredipper/operon) \| "
	"[Paper](https://github.com/coredipper/operon/tree/main/article)")

	with gr.Row():
	scenario_dd = gr.Dropdown(
	choices=list(SCENARIOS.keys()),
	value="Shallow bug fix",
	label="Scenario", scale=2)
	run_btn = gr.Button("Run", variant="primary", scale=1)

	scenario_desc = gr.Markdown("Fast model produces a shallow patch; deep model finds root cause.")

	threshold_slider = gr.Slider(
	minimum=0.1, maximum=0.95, value=0.5, step=0.05,
	label="Quality Threshold (below this = escalate)")

	gr.Markdown("### Results")
	results_output = gr.HTML()

	run_btn.click(
	fn=run_escalation,
	inputs=[scenario_dd, threshold_slider],
	outputs=[results_output])
	scenario_dd.change(
	fn=load_scenario,
	inputs=[scenario_dd],
	outputs=[scenario_desc])

	return app


	if __name__ == "__main__":
	app = build_app()
	app.launch(theme=gr.themes.Soft())