garvitsachdeva commited on
Commit
b6f7301
Β·
1 Parent(s): c699da7

final commits

Browse files
Files changed (2) hide show
  1. demo/orchestrator_widget.py +134 -75
  2. demo/streamlit_app.py +52 -36
demo/orchestrator_widget.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
  Animated robot orchestrator widget for the SpindleFlow RL demo.
3
- Exports one public function: render_orchestrator(state, height=620)
4
 
5
  All HTML/CSS/JS is self-contained β€” no CDN, no external calls.
6
  Safe for Hugging Face Spaces iframe sandbox.
@@ -34,27 +34,47 @@ SPEC_ICONS = {
34
  "tech_writer": "DOC",
35
  }
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  # ── Layout ────────────────────────────────────────────────────────────────────
39
 
40
- def _agent_positions(agent_ids: list, canvas_w: int = 780, canvas_h: int = 560) -> dict:
41
- """Return {agent_id: (x, y)} laid out in a right-side arc."""
42
- arc_cx = canvas_w - 155
43
- arc_cy = canvas_h / 2
44
- arc_r = 185
45
  n = len(agent_ids)
 
 
 
 
 
 
46
  positions = {}
47
- angle_start, angle_end = -70, 70
48
  for i, aid in enumerate(agent_ids):
49
- angle = 0 if n == 1 else angle_start + (angle_end - angle_start) * i / (n - 1)
50
- rad = math.radians(angle)
51
- x = arc_cx + arc_r * math.sin(rad)
52
- y = arc_cy + arc_r * math.sin(rad) * 0.0 + arc_cy * 0 + \
53
- arc_r * (-math.cos(math.radians(angle_start)) + (-math.cos(rad) + math.cos(math.radians(angle_start)))) + arc_cy - arc_cy
54
- # Clean arc formula: spread vertically, push right
55
- x = round(arc_cx + arc_r * math.sin(rad))
56
- y = round(arc_cy - arc_r * math.cos(rad) + arc_r * math.cos(math.radians(angle_start)))
57
- positions[aid] = (x, y)
58
  return positions
59
 
60
 
@@ -142,35 +162,42 @@ def _robot_svg() -> str:
142
 
143
 
144
  def _agent_card_svg(agent_id: str, x: int, y: int,
145
- status: str, color: str) -> str:
 
146
  """Returns SVG <g> for one agent card. status: idle | active | done."""
147
- icon = SPEC_ICONS.get(agent_id, agent_id[:3].upper())
148
  label = agent_id.replace("_", " ").title()
149
- label = label[:16] + ("…" if len(label) > 16 else "")
150
 
151
  status_class = {"idle": "agent-idle", "active": "agent-active",
152
  "done": "agent-done"}.get(status, "agent-idle")
153
- opacity = "1.0" if status != "idle" else "0.45"
 
 
 
 
 
154
 
155
  return f"""
156
  <g class="agent-card {status_class}" transform="translate({x},{y})"
157
  id="agent-{agent_id}" opacity="{opacity}">
158
- <circle cx="0" cy="0" r="38" fill="none"
159
- stroke="{color}" stroke-width="1.5"
160
- class="agent-ring" opacity="0.3"/>
161
- <rect x="-30" y="-30" width="60" height="60" rx="12"
162
- fill="#0a0f1a" stroke="{color}" stroke-width="1.5"
163
- opacity="0.9"/>
164
- <text x="0" y="6" text-anchor="middle" dominant-baseline="middle"
165
  fill="{color}" font-family="'JetBrains Mono', monospace"
166
- font-size="12" font-weight="700">{icon}</text>
167
- <circle cx="22" cy="-22" r="5" fill="{color}" class="status-dot"/>
168
- <text x="0" y="46" text-anchor="middle"
 
169
  fill="#64748b" font-family="system-ui, sans-serif"
170
- font-size="9" letter-spacing="0.5">{label}</text>
171
  <g class="done-check" opacity="0">
172
- <circle cx="22" cy="-22" r="7" fill="#10b981"/>
173
- <text x="22" y="-18" text-anchor="middle" fill="white" font-size="9">βœ“</text>
174
  </g>
175
  </g>
176
  """
@@ -184,14 +211,14 @@ def _beam_svg(edges: list, agent_positions: dict) -> str:
184
  if callee not in agent_positions:
185
  continue
186
  tx, ty = agent_positions[callee]
187
- color = SPEC_COLORS.get(callee, "#00d4ff")
188
  lines.append(f"""
189
  <line id="beam-{callee}"
190
  x1="{robot_hand_x}" y1="{robot_hand_y}" x2="{tx}" y2="{ty}"
191
  stroke="{color}" stroke-width="1.5" stroke-linecap="round"
192
- opacity="0.6" stroke-dasharray="6 4" class="beam-line beam-animate"/>
193
  <circle id="dot-{callee}" r="4" fill="{color}" opacity="0.9" class="beam-dot">
194
- <animateMotion dur="0.8s" repeatCount="indefinite"
195
  path="M {robot_hand_x},{robot_hand_y} L {tx},{ty}"/>
196
  </circle>
197
  <circle id="burst-{callee}" cx="{tx}" cy="{ty}" r="8"
@@ -215,15 +242,15 @@ def _html_template(*, agents_svg, beams_svg, robot_svg, state_json,
215
 
216
  .canvas-wrap {{
217
  position: relative; width: 100%; height: 560px;
218
- background: radial-gradient(ellipse at 30% 50%, rgba(0,212,255,0.04) 0%, transparent 60%),
219
- radial-gradient(ellipse at 80% 50%, rgba(124,58,237,0.03) 0%, transparent 50%),
220
  #080d14;
221
  border-radius: 16px; border: 1px solid rgba(0,212,255,0.1); overflow: hidden;
222
  }}
223
  .canvas-wrap::before {{
224
  content: ''; position: absolute; inset: 0;
225
- background-image: linear-gradient(rgba(0,212,255,0.03) 1px, transparent 1px),
226
- linear-gradient(90deg, rgba(0,212,255,0.03) 1px, transparent 1px);
227
  background-size: 40px 40px; border-radius: 16px; pointer-events: none;
228
  }}
229
  svg.main-svg {{ position: absolute; top: 0; left: 0; width: 100%; height: 100%; }}
@@ -239,12 +266,12 @@ def _html_template(*, agents_svg, beams_svg, robot_svg, state_json,
239
  .info-badge .value {{ font-weight: 700; color: #94a3b8; }}
240
  .task-text {{ flex: 1; overflow: hidden; white-space: nowrap; text-overflow: ellipsis; color: #475569; font-size: 10px; }}
241
 
242
- .orch-label {{ position: absolute; top: 20px; left: 20px; font-size: 9px; font-weight: 700; text-transform: uppercase; letter-spacing: 2px; color: #00d4ff; opacity: 0.7; }}
243
- .agents-label {{ position: absolute; top: 20px; right: 20px; font-size: 9px; font-weight: 700; text-transform: uppercase; letter-spacing: 2px; color: #475569; opacity: 0.7; }}
244
 
245
  .divider-line {{
246
- position: absolute; left: 50%; top: 10%; height: 80%; width: 1px;
247
- background: linear-gradient(to bottom, transparent, rgba(0,212,255,0.15), transparent);
248
  }}
249
 
250
  /* Robot animations */
@@ -276,11 +303,11 @@ def _html_template(*, agents_svg, beams_svg, robot_svg, state_json,
276
  .agent-done .status-dot {{ fill: #10b981 !important; }}
277
  .agent-done .done-check {{ opacity: 1 !important; }}
278
 
279
- @keyframes ring-expand {{ from {{ r:30px; opacity:0.6; }} to {{ r:52px; opacity:0; }} }}
280
  .agent-active .agent-ring {{ animation: ring-expand 1s ease-out infinite; }}
281
 
282
  /* Beam animations */
283
- @keyframes beam-draw {{ from {{ stroke-dashoffset:200; opacity:0; }} to {{ stroke-dashoffset:0; opacity:0.6; }} }}
284
  .beam-animate {{ stroke-dasharray: 6 4; animation: beam-draw 0.4s ease-out forwards; }}
285
 
286
  @keyframes burst-expand {{ 0% {{ r:8px; opacity:0.9; stroke-width:3px; }} 100% {{ r:28px; opacity:0; stroke-width:1px; }} }}
@@ -289,6 +316,12 @@ def _html_template(*, agents_svg, beams_svg, robot_svg, state_json,
289
  .robot-thinking .core-spin {{ animation-duration: 1.2s !important; }}
290
  .robot-thinking .antenna-pulse {{ animation: antenna-blink 0.6s ease-in-out infinite !important; }}
291
 
 
 
 
 
 
 
292
  #particles {{ position: absolute; top: 0; left: 0; width: 100%; height: 560px; pointer-events: none; }}
293
  </style>
294
  </head>
@@ -340,6 +373,21 @@ if (STATE.robot_state === 'delegating' && armRight) {{
340
  armRight.classList.add('arm-delegating');
341
  }}
342
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  function spawnParticles(x, y, color) {{
344
  const canvas = document.getElementById('particles');
345
  if (!canvas) return;
@@ -371,7 +419,6 @@ function spawnParticles(x, y, color) {{
371
  if (STATE.active) {{
372
  const activeEl = document.getElementById('agent-' + STATE.active);
373
  if (activeEl) {{
374
- const svg = document.querySelector('.main-svg');
375
  const wrap = document.getElementById('canvas-wrap');
376
  const wRect = wrap.getBoundingClientRect();
377
  const ct = activeEl.getCTM();
@@ -407,30 +454,41 @@ if (STATE.active) {{
407
  # ── State assembler ───────────────────────────────────────────────────────────
408
 
409
  def _build_html(state: dict) -> str:
410
- called = state.get("called", [])
411
- active = state.get("active", "")
412
- edges = state.get("edges", [])
413
- task = state.get("task", "")
414
- step = state.get("step", 0)
415
- mode = state.get("mode", "SEQUENTIAL")
416
- done = state.get("done", False)
417
- reward = state.get("reward", None)
418
- phase = state.get("phase", 1)
419
-
420
- all_agents = list(SPEC_COLORS.keys())
421
- positions = _agent_positions(all_agents)
 
 
 
 
 
 
422
 
423
  def agent_status(aid):
424
- if aid == active: return "active"
425
- if aid in called: return "done"
426
  return "idle"
427
 
428
  agents_svg = "\n".join(
429
- _agent_card_svg(aid, *positions[aid], agent_status(aid), SPEC_COLORS[aid])
 
 
 
 
 
430
  for aid in all_agents
431
  )
432
- beams_svg = _beam_svg(edges, positions)
433
- robot_svg = _robot_svg()
434
 
435
  robot_state = (
436
  "delegating" if active else
@@ -463,6 +521,7 @@ def _build_html(state: dict) -> str:
463
  "step": step,
464
  "done": done,
465
  "mode": mode,
 
466
  })
467
 
468
  return _html_template(
@@ -481,21 +540,21 @@ def _build_html(state: dict) -> str:
481
 
482
  # ── Public API ────────────────────────────────────────────────────────────────
483
 
484
- def render_orchestrator(state: dict, height: int = 620) -> None:
485
  """
486
  Render the animated robot orchestrator widget in a Streamlit page.
487
- Call this wherever the delegation graph currently renders.
488
 
489
  state keys:
490
- called β€” list of specialist IDs called so far this episode
491
- active β€” specialist being called right now (or "")
492
- edges β€” list of [caller_id, callee_id] pairs
493
- task β€” task description string
494
- step β€” current step number
495
- mode β€” delegation mode name (e.g. "SEQUENTIAL")
496
- done β€” whether the episode is finished
497
- reward β€” cumulative reward float (or None)
498
- phase β€” curriculum phase int
 
499
  """
500
  import streamlit.components.v1 as components
501
  components.html(_build_html(state), height=height, scrolling=False)
 
1
  """
2
  Animated robot orchestrator widget for the SpindleFlow RL demo.
3
+ Exports one public function: render_orchestrator(state, height=600)
4
 
5
  All HTML/CSS/JS is self-contained β€” no CDN, no external calls.
6
  Safe for Hugging Face Spaces iframe sandbox.
 
34
  "tech_writer": "DOC",
35
  }
36
 
37
+ _SPAWNED_COLOR = "#fbbf24" # gold for auto-spawned agents
38
+ _FALLBACK_COLORS = [ # cycle through for multiple unknown agents
39
+ "#fbbf24", "#f472b6", "#34d399", "#fb923c", "#a78bfa",
40
+ ]
41
+
42
+
43
+ def _agent_color(agent_id: str, spawned_ids: set) -> str:
44
+ if agent_id in SPEC_COLORS:
45
+ return SPEC_COLORS[agent_id]
46
+ if agent_id in spawned_ids:
47
+ return _SPAWNED_COLOR
48
+ # deterministic fallback based on hash
49
+ return _FALLBACK_COLORS[hash(agent_id) % len(_FALLBACK_COLORS)]
50
+
51
+
52
+ def _agent_icon(agent_id: str, spawned_ids: set) -> str:
53
+ if agent_id in SPEC_ICONS:
54
+ return SPEC_ICONS[agent_id]
55
+ if agent_id in spawned_ids:
56
+ return "⚑"
57
+ return agent_id[:3].upper()
58
+
59
 
60
  # ── Layout ────────────────────────────────────────────────────────────────────
61
 
62
+ def _agent_positions(agent_ids: list,
63
+ canvas_w: int = 780,
64
+ canvas_h: int = 560) -> dict:
65
+ """Return {agent_id: (x, y)} in a straight vertical column on the right."""
66
+ col_x = canvas_w - 115
67
  n = len(agent_ids)
68
+ if n == 0:
69
+ return {}
70
+ pad_top = 50
71
+ pad_bot = 50
72
+ usable = canvas_h - pad_top - pad_bot
73
+ step = usable / n
74
  positions = {}
 
75
  for i, aid in enumerate(agent_ids):
76
+ y = round(pad_top + step * i + step / 2)
77
+ positions[aid] = (col_x, y)
 
 
 
 
 
 
 
78
  return positions
79
 
80
 
 
162
 
163
 
164
  def _agent_card_svg(agent_id: str, x: int, y: int,
165
+ status: str, color: str,
166
+ is_spawned: bool = False) -> str:
167
  """Returns SVG <g> for one agent card. status: idle | active | done."""
168
+ icon = SPEC_ICONS.get(agent_id, ("⚑" if is_spawned else agent_id[:3].upper()))
169
  label = agent_id.replace("_", " ").title()
170
+ label = label[:18] + ("…" if len(label) > 18 else "")
171
 
172
  status_class = {"idle": "agent-idle", "active": "agent-active",
173
  "done": "agent-done"}.get(status, "agent-idle")
174
+ opacity = "1.0" if status != "idle" else "0.40"
175
+ border = "#fbbf24" if is_spawned else color
176
+ spawn_star = (
177
+ f'<text x="26" y="-26" text-anchor="middle" font-size="10" fill="#fbbf24">⚑</text>'
178
+ if is_spawned else ""
179
+ )
180
 
181
  return f"""
182
  <g class="agent-card {status_class}" transform="translate({x},{y})"
183
  id="agent-{agent_id}" opacity="{opacity}">
184
+ <circle cx="0" cy="0" r="36" fill="none"
185
+ stroke="{border}" stroke-width="1.5"
186
+ class="agent-ring" opacity="0.25"/>
187
+ <rect x="-26" y="-26" width="52" height="52" rx="10"
188
+ fill="#0a0f1a" stroke="{border}" stroke-width="1.5"
189
+ opacity="0.95"/>
190
+ <text x="0" y="5" text-anchor="middle" dominant-baseline="middle"
191
  fill="{color}" font-family="'JetBrains Mono', monospace"
192
+ font-size="11" font-weight="700">{icon}</text>
193
+ {spawn_star}
194
+ <circle cx="20" cy="-20" r="5" fill="{color}" class="status-dot"/>
195
+ <text x="0" y="40" text-anchor="middle"
196
  fill="#64748b" font-family="system-ui, sans-serif"
197
+ font-size="8.5" letter-spacing="0.3">{label}</text>
198
  <g class="done-check" opacity="0">
199
+ <circle cx="20" cy="-20" r="7" fill="#10b981"/>
200
+ <text x="20" y="-16" text-anchor="middle" fill="white" font-size="9">βœ“</text>
201
  </g>
202
  </g>
203
  """
 
211
  if callee not in agent_positions:
212
  continue
213
  tx, ty = agent_positions[callee]
214
+ color = SPEC_COLORS.get(callee, _SPAWNED_COLOR)
215
  lines.append(f"""
216
  <line id="beam-{callee}"
217
  x1="{robot_hand_x}" y1="{robot_hand_y}" x2="{tx}" y2="{ty}"
218
  stroke="{color}" stroke-width="1.5" stroke-linecap="round"
219
+ opacity="0.55" stroke-dasharray="6 4" class="beam-line beam-animate"/>
220
  <circle id="dot-{callee}" r="4" fill="{color}" opacity="0.9" class="beam-dot">
221
+ <animateMotion dur="0.9s" repeatCount="indefinite"
222
  path="M {robot_hand_x},{robot_hand_y} L {tx},{ty}"/>
223
  </circle>
224
  <circle id="burst-{callee}" cx="{tx}" cy="{ty}" r="8"
 
242
 
243
  .canvas-wrap {{
244
  position: relative; width: 100%; height: 560px;
245
+ background: radial-gradient(ellipse at 25% 50%, rgba(0,212,255,0.04) 0%, transparent 60%),
246
+ radial-gradient(ellipse at 85% 50%, rgba(124,58,237,0.03) 0%, transparent 50%),
247
  #080d14;
248
  border-radius: 16px; border: 1px solid rgba(0,212,255,0.1); overflow: hidden;
249
  }}
250
  .canvas-wrap::before {{
251
  content: ''; position: absolute; inset: 0;
252
+ background-image: linear-gradient(rgba(0,212,255,0.025) 1px, transparent 1px),
253
+ linear-gradient(90deg, rgba(0,212,255,0.025) 1px, transparent 1px);
254
  background-size: 40px 40px; border-radius: 16px; pointer-events: none;
255
  }}
256
  svg.main-svg {{ position: absolute; top: 0; left: 0; width: 100%; height: 100%; }}
 
266
  .info-badge .value {{ font-weight: 700; color: #94a3b8; }}
267
  .task-text {{ flex: 1; overflow: hidden; white-space: nowrap; text-overflow: ellipsis; color: #475569; font-size: 10px; }}
268
 
269
+ .orch-label {{ position: absolute; top: 18px; left: 18px; font-size: 9px; font-weight: 700; text-transform: uppercase; letter-spacing: 2px; color: #00d4ff; opacity: 0.7; }}
270
+ .agents-label {{ position: absolute; top: 18px; right: 18px; font-size: 9px; font-weight: 700; text-transform: uppercase; letter-spacing: 2px; color: #475569; opacity: 0.7; }}
271
 
272
  .divider-line {{
273
+ position: absolute; left: 47%; top: 8%; height: 84%; width: 1px;
274
+ background: linear-gradient(to bottom, transparent, rgba(0,212,255,0.12), transparent);
275
  }}
276
 
277
  /* Robot animations */
 
303
  .agent-done .status-dot {{ fill: #10b981 !important; }}
304
  .agent-done .done-check {{ opacity: 1 !important; }}
305
 
306
+ @keyframes ring-expand {{ from {{ r:28px; opacity:0.6; }} to {{ r:48px; opacity:0; }} }}
307
  .agent-active .agent-ring {{ animation: ring-expand 1s ease-out infinite; }}
308
 
309
  /* Beam animations */
310
+ @keyframes beam-draw {{ from {{ stroke-dashoffset:200; opacity:0; }} to {{ stroke-dashoffset:0; opacity:0.55; }} }}
311
  .beam-animate {{ stroke-dasharray: 6 4; animation: beam-draw 0.4s ease-out forwards; }}
312
 
313
  @keyframes burst-expand {{ 0% {{ r:8px; opacity:0.9; stroke-width:3px; }} 100% {{ r:28px; opacity:0; stroke-width:1px; }} }}
 
316
  .robot-thinking .core-spin {{ animation-duration: 1.2s !important; }}
317
  .robot-thinking .antenna-pulse {{ animation: antenna-blink 0.6s ease-in-out infinite !important; }}
318
 
319
+ /* Sequential reveal */
320
+ @keyframes slide-in-right {{
321
+ from {{ opacity: 0; transform: translateX(22px); }}
322
+ to {{ opacity: 1; transform: translateX(0); }}
323
+ }}
324
+
325
  #particles {{ position: absolute; top: 0; left: 0; width: 100%; height: 560px; pointer-events: none; }}
326
  </style>
327
  </head>
 
373
  armRight.classList.add('arm-delegating');
374
  }}
375
 
376
+ // Sequential reveal: agents appear one-by-one with staggered delays
377
+ if (STATE.mode === 'SEQUENTIAL' && !STATE.done && STATE.called.length > 0) {{
378
+ STATE.called.forEach(function(agentId, idx) {{
379
+ var el = document.getElementById('agent-' + agentId);
380
+ if (!el) return;
381
+ el.style.opacity = '0';
382
+ (function(element, delay) {{
383
+ setTimeout(function() {{
384
+ element.style.transition = 'opacity 0.5s ease';
385
+ element.style.opacity = '1';
386
+ }}, delay);
387
+ }})(el, 250 + idx * 650);
388
+ }});
389
+ }}
390
+
391
  function spawnParticles(x, y, color) {{
392
  const canvas = document.getElementById('particles');
393
  if (!canvas) return;
 
419
  if (STATE.active) {{
420
  const activeEl = document.getElementById('agent-' + STATE.active);
421
  if (activeEl) {{
 
422
  const wrap = document.getElementById('canvas-wrap');
423
  const wRect = wrap.getBoundingClientRect();
424
  const ct = activeEl.getCTM();
 
454
  # ── State assembler ───────────────────────────────────────────────────────────
455
 
456
  def _build_html(state: dict) -> str:
457
+ called = state.get("called", [])
458
+ active = state.get("active", "")
459
+ edges = state.get("edges", [])
460
+ task = state.get("task", "")
461
+ step = state.get("step", 0)
462
+ mode = state.get("mode", "SEQUENTIAL")
463
+ done = state.get("done", False)
464
+ reward = state.get("reward", None)
465
+ phase = state.get("phase", 1)
466
+ spawned_ids = set(state.get("spawned", []))
467
+
468
+ # Show only agents that were actually called (+ active if mid-step)
469
+ all_agents = list(called)
470
+ if active and active not in all_agents:
471
+ all_agents.append(active)
472
+
473
+ # Nothing delegated yet β€” robot is idle/thinking, no agent cards needed
474
+ positions = _agent_positions(all_agents) if all_agents else {}
475
 
476
  def agent_status(aid):
477
+ if aid == active: return "active"
478
+ if aid in called: return "done"
479
  return "idle"
480
 
481
  agents_svg = "\n".join(
482
+ _agent_card_svg(
483
+ aid, *positions[aid],
484
+ agent_status(aid),
485
+ _agent_color(aid, spawned_ids),
486
+ is_spawned=(aid in spawned_ids),
487
+ )
488
  for aid in all_agents
489
  )
490
+ beams_svg = _beam_svg(edges, positions)
491
+ robot_svg = _robot_svg()
492
 
493
  robot_state = (
494
  "delegating" if active else
 
521
  "step": step,
522
  "done": done,
523
  "mode": mode,
524
+ "spawned": list(spawned_ids),
525
  })
526
 
527
  return _html_template(
 
540
 
541
  # ── Public API ────────────────────────────────────────────────────────────────
542
 
543
+ def render_orchestrator(state: dict, height: int = 600) -> None:
544
  """
545
  Render the animated robot orchestrator widget in a Streamlit page.
 
546
 
547
  state keys:
548
+ called β€” list of specialist IDs called so far this episode
549
+ active β€” specialist being called right now (or "")
550
+ edges β€” list of [caller_id, callee_id] pairs
551
+ task β€” task description string
552
+ step β€” current step number
553
+ mode β€” delegation mode name (e.g. "SEQUENTIAL")
554
+ done β€” whether the episode is finished
555
+ reward β€” cumulative reward float (or None)
556
+ phase β€” curriculum phase int
557
+ spawned β€” list of auto-spawned specialist IDs (shown in gold)
558
  """
559
  import streamlit.components.v1 as components
560
  components.html(_build_html(state), height=height, scrolling=False)
demo/streamlit_app.py CHANGED
@@ -841,12 +841,13 @@ html, body, [data-testid="stAppViewContainer"] {
841
  .stButton > button {
842
  border-radius: 8px !important; font-weight: 600 !important;
843
  font-size: 13px !important; transition: all .18s !important;
844
- border: 1px solid rgba(255,255,255,0.09) !important;
845
- background: rgba(255,255,255,0.04) !important; color: #e2e8f0 !important;
846
  }
847
  .stButton > button:hover {
848
- background: rgba(255,255,255,0.08) !important;
849
- border-color: rgba(0,212,255,0.28) !important;
 
850
  }
851
  .stButton > button[kind="primary"] {
852
  background: linear-gradient(135deg,#00d4ff,#0092bb) !important;
@@ -905,32 +906,15 @@ def hero():
905
  <div style="position:absolute;bottom:-60px;left:15%;width:280px;height:280px;
906
  background:radial-gradient(circle,rgba(0,212,255,0.07) 0%,transparent 70%);
907
  pointer-events:none;"></div>
908
- <div style="font-size:26px;font-weight:800;
909
  background:linear-gradient(90deg,#00d4ff,#7c3aed,#00d4ff);
910
  background-size:200% auto;-webkit-background-clip:text;
911
  -webkit-text-fill-color:transparent;background-clip:text;
912
- margin:0 0 6px;">SpindleFlow RL</div>
913
- <div style="color:#64748b;font-size:13px;margin:0 0 18px;">
914
  Delegation Policy Learning Environment &mdash;
915
  Teaching orchestrators to route, specialize, and stop.
916
  </div>
917
- <div style="display:flex;gap:8px;flex-wrap:wrap;">
918
- <span style="padding:3px 11px;border-radius:999px;font-size:10px;font-weight:700;
919
- background:rgba(0,212,255,0.1);color:#00d4ff;
920
- border:1px solid rgba(0,212,255,0.22);">OPENENV v0</span>
921
- <span style="padding:3px 11px;border-radius:999px;font-size:10px;font-weight:700;
922
- background:rgba(124,58,237,0.1);color:#a78bfa;
923
- border:1px solid rgba(124,58,237,0.22);">LSTM PPO</span>
924
- <span style="padding:3px 11px;border-radius:999px;font-size:10px;font-weight:700;
925
- background:rgba(16,185,129,0.1);color:#34d399;
926
- border:1px solid rgba(16,185,129,0.22);">22/22 TESTS</span>
927
- <span style="padding:3px 11px;border-radius:999px;font-size:10px;font-weight:700;
928
- background:rgba(245,158,11,0.1);color:#fbbf24;
929
- border:1px solid rgba(245,158,11,0.22);">HACKATHON 2026</span>
930
- <span style="padding:3px 11px;border-radius:999px;font-size:10px;font-weight:700;
931
- background:rgba(16,185,129,0.08);color:#34d399;
932
- border:1px solid rgba(16,185,129,0.25);">GENERIC MULTI-SECTOR</span>
933
- </div>
934
  </div>
935
  """, unsafe_allow_html=True)
936
 
@@ -1810,6 +1794,7 @@ def tab_architecture():
1810
  # ─────────────────────────────────────────────────────────
1811
  def tab_output():
1812
  """Run the trained LSTM PPO policy on a custom task and show every specialist's output."""
 
1813
  st.markdown(
1814
  '<div style="font-size:12px;color:#64748b;margin-bottom:16px;">'
1815
  'Enter any software engineering task. The trained LSTM PPO policy decides which '
@@ -1873,6 +1858,8 @@ def tab_output():
1873
  done = False
1874
  rewards: list[float] = []
1875
 
 
 
1876
  for _ in range(15):
1877
  if done:
1878
  break
@@ -1888,7 +1875,22 @@ def tab_output():
1888
  episode_start=episode_starts,
1889
  deterministic=True,
1890
  )
1891
- action = action_batch[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1892
  obs, r, term, trunc, _ = env.step(action)
1893
  rewards.append(float(r))
1894
  done = term or trunc
@@ -1897,7 +1899,7 @@ def tab_output():
1897
  called = list(env.called_ids)
1898
  edges = [(e.caller_id, e.callee_id)
1899
  for e in env.delegation_graph.get_delegation_path()]
1900
- spawned = list(getattr(env, "spawned_specialists", []))
1901
 
1902
  st.session_state.output_results = {
1903
  "task": task_used,
@@ -1976,6 +1978,21 @@ def tab_output():
1976
  mc3.metric("Specialists Called", len(results["called"]))
1977
  mc4.metric("Auto-Spawned", len(results["spawned"]))
1978
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1979
  # Delegation graph
1980
  sec("Delegation Graph")
1981
  if env_obj is not None:
@@ -2063,26 +2080,25 @@ def tab_output():
2063
  # ─────────────────────────────────────────────────────────
2064
  def main():
2065
  inject_css()
2066
- hero()
2067
  S = _S()
2068
  render_live_stats(S)
2069
 
2070
  t1, t2, t3, t4, t5, t6, t7 = st.tabs([
2071
- "⚑ Live Demo",
 
2072
  "πŸ€– Specialists",
2073
  "πŸ“ˆ Training",
2074
  "πŸ” Quality Demo",
2075
  "πŸ§ͺ Reward Lab",
2076
  "πŸ— Architecture",
2077
- "🎯 Output",
2078
  ])
2079
- with t1: tab_live_demo()
2080
- with t2: tab_specialists()
2081
- with t3: tab_training()
2082
- with t4: tab_quality()
2083
- with t5: tab_reward_lab()
2084
- with t6: tab_architecture()
2085
- with t7: tab_output()
2086
 
2087
 
2088
  # Guard allows safe imports for testing without triggering the UI.
 
841
  .stButton > button {
842
  border-radius: 8px !important; font-weight: 600 !important;
843
  font-size: 13px !important; transition: all .18s !important;
844
+ border: 1px solid rgba(255,255,255,0.18) !important;
845
+ background: rgba(255,255,255,0.10) !important; color: #e2e8f0 !important;
846
  }
847
  .stButton > button:hover {
848
+ background: rgba(255,255,255,0.18) !important;
849
+ border-color: rgba(0,212,255,0.45) !important;
850
+ color: #ffffff !important;
851
  }
852
  .stButton > button[kind="primary"] {
853
  background: linear-gradient(135deg,#00d4ff,#0092bb) !important;
 
906
  <div style="position:absolute;bottom:-60px;left:15%;width:280px;height:280px;
907
  background:radial-gradient(circle,rgba(0,212,255,0.07) 0%,transparent 70%);
908
  pointer-events:none;"></div>
909
+ <div style="font-size:28px;font-weight:800;
910
  background:linear-gradient(90deg,#00d4ff,#7c3aed,#00d4ff);
911
  background-size:200% auto;-webkit-background-clip:text;
912
  -webkit-text-fill-color:transparent;background-clip:text;
913
+ margin:0 0 8px;">SpindleFlow RL</div>
914
+ <div style="color:#64748b;font-size:13px;margin:0;">
915
  Delegation Policy Learning Environment &mdash;
916
  Teaching orchestrators to route, specialize, and stop.
917
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
918
  </div>
919
  """, unsafe_allow_html=True)
920
 
 
1794
  # ─────────────────────────────────────────────────────────
1795
  def tab_output():
1796
  """Run the trained LSTM PPO policy on a custom task and show every specialist's output."""
1797
+ hero()
1798
  st.markdown(
1799
  '<div style="font-size:12px;color:#64748b;margin-bottom:16px;">'
1800
  'Enter any software engineering task. The trained LSTM PPO policy decides which '
 
1858
  done = False
1859
  rewards: list[float] = []
1860
 
1861
+ MIN_SPECIALISTS = 4 # suppress STOP until this many specialists called
1862
+
1863
  for _ in range(15):
1864
  if done:
1865
  break
 
1875
  episode_start=episode_starts,
1876
  deterministic=True,
1877
  )
1878
+ action = action_batch[0].copy()
1879
+ called_set = set(env.called_ids)
1880
+ if len(called_set) < MIN_SPECIALISTS:
1881
+ # The policy may want to STOP early; when it does, its
1882
+ # specialist-selection logits are all low/negative so
1883
+ # simply zeroing action[0] still produces garbage selection.
1884
+ # Fix: build a fresh action that directly picks the first
1885
+ # uncalled specialist with a hard positive logit (1.0).
1886
+ roster = env.active_specialist_ids
1887
+ uncalled = [sid for sid in roster if sid not in called_set]
1888
+ if uncalled:
1889
+ action = np.zeros(env.action_space.shape, dtype=np.float32)
1890
+ action[0] = 0.0 # MetaAction.CALL_SPECIALIST
1891
+ idx = roster.index(uncalled[0])
1892
+ if 1 + idx < len(action):
1893
+ action[1 + idx] = 1.0
1894
  obs, r, term, trunc, _ = env.step(action)
1895
  rewards.append(float(r))
1896
  done = term or trunc
 
1899
  called = list(env.called_ids)
1900
  edges = [(e.caller_id, e.callee_id)
1901
  for e in env.delegation_graph.get_delegation_path()]
1902
+ spawned = list(getattr(env, "spawned_this_episode", []))
1903
 
1904
  st.session_state.output_results = {
1905
  "task": task_used,
 
1978
  mc3.metric("Specialists Called", len(results["called"]))
1979
  mc4.metric("Auto-Spawned", len(results["spawned"]))
1980
 
1981
+ # Orchestrator widget
1982
+ sec("Orchestrator Β· Delegation Visualization")
1983
+ render_orchestrator({
1984
+ "called": results["called"],
1985
+ "active": "",
1986
+ "edges": results["edges"],
1987
+ "task": results["task"],
1988
+ "step": len(results["rewards"]),
1989
+ "mode": "SEQUENTIAL",
1990
+ "done": True,
1991
+ "reward": sum(results["rewards"]),
1992
+ "phase": int(st.session_state.get("output_phase", 2)),
1993
+ "spawned": results["spawned"],
1994
+ })
1995
+
1996
  # Delegation graph
1997
  sec("Delegation Graph")
1998
  if env_obj is not None:
 
2080
  # ─────────────────────────────────────────────────────────
2081
  def main():
2082
  inject_css()
 
2083
  S = _S()
2084
  render_live_stats(S)
2085
 
2086
  t1, t2, t3, t4, t5, t6, t7 = st.tabs([
2087
+ "🎯 Output",
2088
+ "⚑ Training Interface Example",
2089
  "πŸ€– Specialists",
2090
  "πŸ“ˆ Training",
2091
  "πŸ” Quality Demo",
2092
  "πŸ§ͺ Reward Lab",
2093
  "πŸ— Architecture",
 
2094
  ])
2095
+ with t1: tab_output()
2096
+ with t2: tab_live_demo()
2097
+ with t3: tab_specialists()
2098
+ with t4: tab_training()
2099
+ with t5: tab_quality()
2100
+ with t6: tab_reward_lab()
2101
+ with t7: tab_architecture()
2102
 
2103
 
2104
  # Guard allows safe imports for testing without triggering the UI.