seanpoyner commited on
Commit
daea45b
·
verified ·
1 Parent(s): 6cdce0d

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ demo.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:24.04
2
+ ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1
3
+ RUN apt-get update && apt-get install -y --no-install-recommends \
4
+ python3 python3-pip python3-venv ca-certificates && \
5
+ rm -rf /var/lib/apt/lists/*
6
+ RUN python3 -m venv /opt/venv
7
+ ENV PATH="/opt/venv/bin:$PATH"
8
+ WORKDIR /app
9
+ COPY requirements.txt smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl ./
10
+ RUN pip install --no-cache-dir -r requirements.txt \
11
+ ./smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl
12
+ COPY app.py demo.mp4 ./
13
+ COPY engine ./engine
14
+ COPY static ./static
15
+ # HF Docker Spaces run as uid 1000; let the agent write its workspace
16
+ RUN mkdir -p /app/.workspace && chmod -R 777 /app
17
+ ENV SMOLCODE_HOST=0.0.0.0 SMOLCODE_PORT=7860 HF_HOME=/tmp/hf
18
+ # Backend: full specialist matrix served from HAL via the public tunnel. Baked in
19
+ # (URL + "ollama" key are not secret) so it reaches the container reliably; swap
20
+ # this URL + rebuild to point at a durable endpoint for judging.
21
+ ENV SMALLCODE_PRESET=hal-matrix \
22
+ SMALLCODE_BASE_URL=https://collapse-snake-achieving-controversial.trycloudflare.com/v1 \
23
+ SMALLCODE_API_KEY=ollama
24
+ EXPOSE 7860
25
+ CMD ["python3", "app.py"]
README.md CHANGED
@@ -1,10 +1,96 @@
1
  ---
2
- title: Smolcode
3
- emoji: 👁
4
- colorFrom: green
5
  colorTo: indigo
6
  sdk: docker
 
7
  pinned: false
 
 
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: smolcode
3
+ emoji: 🤖
4
+ colorFrom: purple
5
  colorTo: indigo
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
+ license: apache-2.0
10
+ short_description: A tiny local model that writes code, runs it, and fixes it.
11
+ tags:
12
+ - build-small-hackathon
13
+ - agent
14
+ - code-generation
15
  ---
16
 
17
+
18
+ # smolcode 🤖
19
+
20
+ **A tiny local model that writes code, runs it, and fixes it — until it works.**
21
+
22
+ smolcode is an *agentic* coding assistant built for **small** language models. Instead of
23
+ autocompleting, it runs a **plan → write → execute → repair** loop: it writes a file, runs
24
+ it in a sandbox, reads the real error, and iterates until a test passes — on a model small
25
+ enough to run on your own machine (a ≤4B model on a laptop, scaling up to 32B on a
26
+ workstation). **No cloud APIs.**
27
+
28
+ Built for the [Hugging Face × Gradio **Build Small** Hackathon](https://huggingface.co/build-small-hackathon).
29
+
30
+ ## Why it's a "Build Small" entry
31
+ - **Agentic on a 3B model.** The loop — not the model size — does the work. A ≤4B model
32
+ drives tool calls reliably enough to write, run, and self-correct code.
33
+ - **Local-first & private.** Talks to any OpenAI-compatible endpoint (Ollama, llama.cpp).
34
+ Nothing leaves your machine.
35
+ - **Specialty routing.** A 2D router classifies tasks into 16 language/function
36
+ families and escalates within each family's fine-tuned ladder before falling back
37
+ to bigger Granite models.
38
+ - **Fine-tuned tiny coder.** We fine-tuned **Qwen2.5-Coder-1.5B** to emit native tool calls
39
+ so a ≤2B model can be the cheap entry tier — published at
40
+ [`seanpoyner/smolcode-coder-1.5b-tools`](https://huggingface.co/seanpoyner/smolcode-coder-1.5b-tools).
41
+ - **Rust core.** Agent loop, tool execution, and tracing run through
42
+ [**LiteForge**](https://github.com/seanpoyner/liteforge) and **smolcode-core**
43
+ (Rust/PyO3). Gradio is the (required) shell; the brain is Rust.
44
+
45
+ ## How to use this Space
46
+ 1. Type a coding task, e.g. *"write a function that validates an email and test it."*
47
+ 2. Watch the **agent trace** stream live: `write_file → run_python → (error) → fix → pass`.
48
+ 3. The **router** badge shows which tier solved it and whether it's **✓ verified**.
49
+ 4. Tick **⚡ fan out** and enter several lines to run independent tasks as **parallel subagents**.
50
+
51
+ ## Benchmark — the loop is the product
52
+ The agentic loop is what makes a tiny model useful. On the same HumanEval-style suite
53
+ (`bench/tasks.py`, 10 tasks, pass@1):
54
+
55
+ <!-- BENCH_TABLE_START -->
56
+ | System | Model | pass@1 |
57
+ |--------|-------|--------|
58
+ | single-shot | fine-tuned **1.5B** | 50% |
59
+ | **agentic loop** | fine-tuned **1.5B** | **70%** |
60
+ | single-shot | granite4.1:3b | 90% |
61
+
62
+ *The write→run→fix loop lifts the fine-tuned 1.5B from **50% → 70%** (+20 pts) — the
63
+ loop, not raw model size, does the work. A larger model (granite 3B) scores higher
64
+ single-shot, which is exactly why the router escalates only when the small tier can't
65
+ verify. Measured with `bench/run.py` on the hal backend.*
66
+ <!-- BENCH_TABLE_END -->
67
+
68
+ ## Under the hood
69
+ ```
70
+ Gradio UI → smolcode-core / LiteForge (Rust/PyO3) → OpenAI-compatible endpoint
71
+ specialty router + agent loop
72
+ tools: write_file, read_file, run_python, run_tests
73
+ served by Ollama / llama.cpp
74
+ ```
75
+
76
+ There's also a full terminal agent (`smolcode-cli`, a Rust ratatui TUI) and a
77
+ Replit/Lovable-style app builder (`smolbuilder.py`) on the same engine.
78
+
79
+ - **Code:** https://github.com/seanpoyner/smolcode
80
+ - **Model:** https://huggingface.co/seanpoyner/smolcode-coder-1.5b-tools
81
+ - **Engine:** https://github.com/seanpoyner/liteforge
82
+ - **App builder companion:** https://huggingface.co/spaces/seanpoyner/smolbuilder
83
+
84
+ ## Demo video
85
+ <video controls src="https://huggingface.co/spaces/seanpoyner/smolcode/resolve/main/demo.mp4"></video>
86
+
87
+ [▶️ Watch the demo](https://huggingface.co/spaces/seanpoyner/smolcode/resolve/main/demo.mp4) — the agent writes code, runs it, fixes the failing test, and shows the router tier that solved it.
88
+
89
+ ## Share
90
+ > Most coding tasks don't need a giant model. **smolcode** is an agentic coding agent that runs entirely on a *small local model* — it writes the code, runs it, reads the real error, and fixes itself until tests pass. Fine-tuned **1.5B** coder; the router escalates a tier only when needed (all ≤32B). Less compute, same result.
91
+ >
92
+ > Built for the #BuildSmall hackathon with @huggingface + @Gradio. 🦀 Rust core.
93
+ > ▶️ https://huggingface.co/spaces/seanpoyner/smolcode
94
+ > #SmallModels #LocalAI #Gradio #BuildSmall
95
+
96
+ 📣 **Posted on LinkedIn:** https://www.linkedin.com/posts/sean-poyner_buildsmall-smallmodels-localai-share-7472421438109650944-bQGy/
app.py ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """smolcode — CLI-parity web UI over the Rust engine."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import os
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+
9
+ import gradio as gr
10
+
11
+ from engine import Router, load_preset
12
+ from engine.config import (
13
+ Preset,
14
+ Tier,
15
+ is_specialty_model,
16
+ parse_size_b,
17
+ specialist_sizes,
18
+ )
19
+ from engine.branding import SMOLCODE_CSS
20
+ from engine.gradio_shell import (
21
+ AppSessionState,
22
+ SlashResult,
23
+ UiSettings,
24
+ dispatch_slash,
25
+ parse_input,
26
+ )
27
+ from engine.preflight import list_models
28
+ from engine.router import RouteResult
29
+ from engine.rust_session import (
30
+ RustSession,
31
+ apply_settings,
32
+ get_session_chat,
33
+ git_status,
34
+ list_background_jobs,
35
+ load_rust_config,
36
+ parse_session_label,
37
+ session_choices,
38
+ workspace_paths,
39
+ AUTOCOMPLETE_FILE_LIMIT,
40
+ UI_FILE_LIMIT,
41
+ )
42
+ from engine.trace import build_trace, save_trace
43
+ from engine.themes import theme_at
44
+ from engine.web_tui import (
45
+ Transcript,
46
+ agent_choices,
47
+ cycle_agent,
48
+ cycle_mode,
49
+ cycle_model,
50
+ cycle_think,
51
+ header_bar_html,
52
+ help_overlay_html,
53
+ host_from_url,
54
+ ingest_agent_event,
55
+ parse_git_header,
56
+ render_picker_html,
57
+ render_sidebar_html,
58
+ shell_theme_html,
59
+ slash_commands,
60
+ status_bar_html,
61
+ theme_picker_items,
62
+ whichkey_overlay_html,
63
+ )
64
+
65
+ PRESET = load_preset()
66
+ _JS_HEAD = (Path(__file__).parent / "static" / "web_tui.js").read_text()
67
+
68
+
69
+ @dataclass
70
+ class WebUiState:
71
+ sidebar_visible: bool = True
72
+ sidebar_view: str = "files"
73
+ sidebar_sel: int = 0
74
+ theme_idx: int = 0
75
+ overlay: str = ""
76
+ picker_kind: str = ""
77
+ picker_items: list[str] = field(default_factory=list)
78
+ picker_sel: int = 0
79
+ file_total: int = 0
80
+ # Blocking startup model pick: true until the user chooses from the modal.
81
+ needs_model_pick: bool = True
82
+
83
+
84
+ def _normalize_paths(files: list[str] | dict[str, str] | None) -> list[str]:
85
+ if not files:
86
+ return []
87
+ if isinstance(files, dict):
88
+ paths = sorted(files.keys())
89
+ else:
90
+ paths = sorted(files)
91
+ return paths[:UI_FILE_LIMIT]
92
+
93
+
94
+ def _cfg() -> dict:
95
+ return load_rust_config()
96
+
97
+
98
+ def _ensure_rust(app_state: AppSessionState, settings: UiSettings) -> RustSession:
99
+ if app_state.rust is None:
100
+ app_state.rust = RustSession(
101
+ workspace=settings.workspace,
102
+ agent=settings.agent,
103
+ yolo=settings.yolo,
104
+ model=_pinned_model(settings.model), # None for Auto -> router sets it
105
+ base_url=_cfg().get("base_url"),
106
+ approval_handler=app_state.approval.ask,
107
+ )
108
+ apply_settings(app_state.rust, settings)
109
+ return app_state.rust
110
+
111
+
112
+ # --- curated model picker (Auto-first, <=32B, specialty fine-tunes collapsed) -------
113
+ # Each row is (label, model, think). model "auto"/"auto:<size>" are router pseudo-tags
114
+ # interpreted by engine/router.py + rust_session.apply_settings; think "off" means the
115
+ # router derives the level.
116
+ _AUTO_ENTRIES: list[tuple[str, str, str]] = [
117
+ ("Auto", "auto", "off"),
118
+ ("Auto · think low", "auto", "low"),
119
+ ("Auto · think high", "auto", "high"),
120
+ ("Auto · think xtra", "auto", "xtra"),
121
+ ]
122
+
123
+
124
+ def _model_entries() -> list[tuple[str, str, str]]:
125
+ """All picker rows: Auto options, one Auto·<size> per served specialist size, then
126
+ generic concrete models filtered to <=32B with the per-specialty fine-tunes hidden."""
127
+ entries = list(_AUTO_ENTRIES)
128
+ for sz in specialist_sizes(PRESET):
129
+ entries.append((f"Auto · {sz.upper()}", f"auto:{sz}", "off"))
130
+ seen: set[str] = set()
131
+ base = [t.model for t in PRESET.tiers if t.model]
132
+ api = list_models(_cfg().get("base_url", PRESET.base_url))
133
+ for m in api + base:
134
+ if not m or m in seen or is_specialty_model(m) or parse_size_b(m) > 32:
135
+ continue
136
+ seen.add(m)
137
+ entries.append((m, m, "off"))
138
+ return entries
139
+
140
+
141
+ def _model_labels() -> list[str]:
142
+ return [lbl for lbl, _m, _t in _model_entries()]
143
+
144
+
145
+ def _label_to_selection(label: str) -> tuple[str, str] | None:
146
+ """(model, think) for a picker label, or None if unknown."""
147
+ for lbl, m, t in _model_entries():
148
+ if lbl == label:
149
+ return m, t
150
+ return None
151
+
152
+
153
+ def _model_sel_index(settings: UiSettings) -> int:
154
+ """Row index matching the current (model, think); falls back to 0 (Auto)."""
155
+ entries = _model_entries()
156
+ cur_m = settings.model or "auto"
157
+ cur_t = settings.think or "off"
158
+ for i, (_l, m, t) in enumerate(entries): # exact (model, think) wins
159
+ if m == cur_m and t == cur_t:
160
+ return i
161
+ for i, (_l, m, _t) in enumerate(entries): # else first model match
162
+ if m == cur_m:
163
+ return i
164
+ return 0
165
+
166
+
167
+ def _selection_label(settings: UiSettings) -> str:
168
+ """Friendly label for the current selection (model chip in header/status)."""
169
+ entries = _model_entries()
170
+ return entries[_model_sel_index(settings)][0] if entries else "Auto"
171
+
172
+
173
+ def _pinned_model(model_sel: str | None) -> str | None:
174
+ """The concrete model tag to pin, or None for Auto/Auto·size (router-driven)."""
175
+ m = model_sel or ""
176
+ return None if (not m or m == "auto" or m.startswith("auto:")) else m
177
+
178
+
179
+ def _effective_preset(model_sel: str | None):
180
+ """(preset, size_floor) for a picker selection.
181
+
182
+ 'auto' -> matrix preset (router picks size); 'auto:<size>' -> matrix + start pinned
183
+ to that size (still escalates); '<tag>' -> single-tier preset (pinned, no escalation).
184
+ """
185
+ sel = model_sel or "auto"
186
+ if sel == "auto":
187
+ return PRESET, None
188
+ if sel.startswith("auto:"):
189
+ return PRESET, (sel.split(":", 1)[1] or None)
190
+ return (
191
+ Preset(key=PRESET.key, base_url=PRESET.base_url, api_key=PRESET.api_key,
192
+ tiers=[Tier("custom", sel)]),
193
+ None,
194
+ )
195
+
196
+
197
+ def _picker_items(kind: str, settings: UiSettings) -> list[str]:
198
+ if kind == "models":
199
+ return _model_labels()
200
+ if kind == "themes":
201
+ return theme_picker_items()
202
+ if kind == "agents":
203
+ return agent_choices()
204
+ if kind == "sessions":
205
+ return session_choices()
206
+ return []
207
+
208
+
209
+ def _picker_sel_for(kind: str, settings: UiSettings, ui: WebUiState, items: list[str]) -> int:
210
+ if not items:
211
+ return 0
212
+ if kind == "models":
213
+ return _model_sel_index(settings)
214
+ if kind == "themes":
215
+ name = theme_at(ui.theme_idx).name
216
+ return items.index(name) if name in items else 0
217
+ if kind == "agents":
218
+ cur = settings.agent if settings.mode != "plan" else "plan"
219
+ return items.index(cur) if cur in items else 0
220
+ return 0
221
+
222
+
223
+ def _header(settings: UiSettings, ui: WebUiState) -> str:
224
+ git = git_status(settings.workspace)
225
+ branch, dirty = parse_git_header(git)
226
+ return header_bar_html(
227
+ git_branch=branch,
228
+ git_dirty=dirty,
229
+ model=_selection_label(settings),
230
+ host=host_from_url(_cfg().get("base_url", "")),
231
+ theme=theme_at(ui.theme_idx).name,
232
+ )
233
+
234
+
235
+ def _status(settings: UiSettings, app_state: AppSessionState, *, running: bool = False) -> str:
236
+ title = f"session {app_state.rust.session_id[:8]}" if app_state.rust else "new session"
237
+ return status_bar_html(
238
+ settings, session_title=title,
239
+ model=_selection_label(settings),
240
+ running=running,
241
+ )
242
+
243
+
244
+ def _sidebar_html(ui: WebUiState, settings: UiSettings, files: list[str], app_state: AppSessionState) -> str:
245
+ sid = app_state.rust.session_id if app_state.rust else "(none)"
246
+ return render_sidebar_html(
247
+ view=ui.sidebar_view,
248
+ files=files,
249
+ selected=ui.sidebar_sel,
250
+ session_id=sid,
251
+ agent=settings.agent,
252
+ file_total=ui.file_total or len(files),
253
+ )
254
+
255
+
256
+ def _overlay_html(ui: WebUiState) -> str:
257
+ if ui.overlay == "help":
258
+ return f'<div class="sc-overlay"><div class="sc-overlay-panel">{help_overlay_html()}</div></div>'
259
+ if ui.overlay == "whichkey":
260
+ return f'<div class="sc-overlay"><div class="sc-overlay-panel">{whichkey_overlay_html()}</div></div>'
261
+ if ui.overlay == "picker" and ui.picker_kind:
262
+ panel = render_picker_html(
263
+ ui.picker_kind,
264
+ ui.picker_items,
265
+ ui.picker_sel,
266
+ title=ui.picker_kind,
267
+ )
268
+ return f'<div class="sc-overlay"><div class="sc-overlay-panel">{panel}</div></div>'
269
+ return ""
270
+
271
+
272
+ def _js_boot_lines(settings: UiSettings, files: list[str]) -> str:
273
+ cmds = slash_commands(settings.workspace)
274
+ paths = sorted(files)[:AUTOCOMPLETE_FILE_LIMIT]
275
+ return (
276
+ f"window.__smolcode_workspace={json.dumps(settings.workspace)};"
277
+ f"window.__smolcode_commands={json.dumps(cmds)};"
278
+ f"window.__smolcode_files={json.dumps(paths)};"
279
+ )
280
+
281
+
282
+ def _embed_js(settings: UiSettings, files: list[str]) -> str:
283
+ return f"<script>{_js_boot_lines(settings, files)}</script>"
284
+
285
+
286
+ def _outputs(
287
+ transcript: Transcript,
288
+ app_state: AppSessionState,
289
+ settings: UiSettings,
290
+ ui: WebUiState,
291
+ files: list[str],
292
+ *,
293
+ running: bool = False,
294
+ trace_path: str | None = None,
295
+ ):
296
+ overlay_val = _overlay_html(ui)
297
+ return (
298
+ transcript.render_html(running=running),
299
+ _header(settings, ui),
300
+ _status(settings, app_state, running=running),
301
+ gr.update(value=_sidebar_html(ui, settings, files, app_state), visible=ui.sidebar_visible),
302
+ gr.update(value=overlay_val, visible=bool(overlay_val)),
303
+ shell_theme_html(ui.theme_idx),
304
+ gr.update(visible=bool(app_state.approval.pending_desc)),
305
+ app_state.approval.pending_desc or "",
306
+ files,
307
+ trace_path,
308
+ app_state,
309
+ settings,
310
+ ui,
311
+ transcript,
312
+ "", # clear editor
313
+ )
314
+
315
+
316
+ def _apply_slash_ui(sr: SlashResult, settings: UiSettings, ui: WebUiState, transcript: Transcript):
317
+ if sr.cycle_mode:
318
+ settings.mode = cycle_mode(settings.mode)
319
+ transcript.append_info(f"mode → {settings.mode}")
320
+ if sr.cycle_think:
321
+ settings.think = cycle_think(settings.think)
322
+ transcript.append_info(f"think → {settings.think}")
323
+ if sr.set_think:
324
+ settings.think = sr.set_think
325
+ transcript.append_info(f"think → {settings.think}")
326
+ if sr.toggle_sidebar:
327
+ ui.sidebar_visible = not ui.sidebar_visible
328
+ if sr.toggle_sidebar_view:
329
+ ui.sidebar_view = "stats" if ui.sidebar_view == "files" else "files"
330
+ if sr.show_help:
331
+ ui.overlay = "help"
332
+ if sr.show_whichkey:
333
+ ui.overlay = "whichkey"
334
+ if sr.open_picker:
335
+ ui.overlay = "picker"
336
+ ui.picker_kind = sr.open_picker
337
+ ui.picker_items = _picker_items(sr.open_picker, settings)
338
+ ui.picker_sel = _picker_sel_for(sr.open_picker, settings, ui, ui.picker_items)
339
+ transcript.append_info(f"picker → {sr.open_picker}")
340
+
341
+
342
+ async def _run_agent_turn(
343
+ task: str,
344
+ transcript: Transcript,
345
+ app_state: AppSessionState,
346
+ settings: UiSettings,
347
+ ui: WebUiState,
348
+ files: list[str],
349
+ ):
350
+ # Blocking model pick: refuse to run until the user has chosen from the modal.
351
+ if ui.needs_model_pick:
352
+ ui.overlay = "picker"
353
+ ui.picker_kind = "models"
354
+ ui.picker_items = _model_labels()
355
+ ui.picker_sel = _model_sel_index(settings)
356
+ transcript.append_info("pick a model to start — Auto is recommended")
357
+ yield _outputs(transcript, app_state, settings, ui, files)
358
+ return
359
+
360
+ rust = _ensure_rust(app_state, settings)
361
+ rust.clear_cancel()
362
+ preset, size_floor = _effective_preset(settings.model)
363
+ router = Router(
364
+ preset=preset,
365
+ approval_handler=app_state.approval.ask,
366
+ workspace_dir=settings.workspace,
367
+ think=settings.think,
368
+ yolo=settings.yolo,
369
+ agent=settings.agent,
370
+ size_floor=size_floor,
371
+ )
372
+ ladder, start, _think = router._route(task) # real routing for the badge
373
+ transcript.append_user(task)
374
+ transcript.append_info(f"routed to {ladder.tiers[start].name}")
375
+ ui.overlay = ""
376
+ yield _outputs(transcript, app_state, settings, ui, files, running=True)
377
+
378
+ result: RouteResult | None = None
379
+ async for frame in router.run_live(task, rust_session=rust):
380
+ if frame.raw_event:
381
+ ingest_agent_event(transcript, frame.raw_event)
382
+ if frame.files:
383
+ files = _normalize_paths(frame.files)
384
+ if frame.done and isinstance(frame.result, RouteResult):
385
+ result = frame.result
386
+ if rust.cancelled:
387
+ transcript.append_error("interrupted")
388
+ yield _outputs(transcript, app_state, settings, ui, files, running=not frame.done)
389
+
390
+ trace_path = None
391
+ if result and result.agent and not rust.cancelled:
392
+ app_state.bg_jobs = list_background_jobs()
393
+ rust.save()
394
+ try:
395
+ trace_path = str(save_trace(build_trace(
396
+ result.agent, task, result.final,
397
+ preset=PRESET.key, model=result.tier_model,
398
+ )))
399
+ except Exception:
400
+ pass
401
+ yield _outputs(transcript, app_state, settings, ui, files, trace_path=trace_path)
402
+
403
+
404
+ async def respond(
405
+ message: str,
406
+ transcript: Transcript,
407
+ app_state: AppSessionState,
408
+ settings: UiSettings,
409
+ ui: WebUiState,
410
+ files: list[str],
411
+ ):
412
+ message = (message or "").strip()
413
+ app_state.settings = settings
414
+
415
+ if not message:
416
+ yield _outputs(transcript, app_state, settings, ui, files)
417
+ return
418
+
419
+ _task, slash, shell_cmd = parse_input(
420
+ message,
421
+ workspace_files=files,
422
+ workspace=settings.workspace,
423
+ rust=app_state.rust,
424
+ )
425
+
426
+ if shell_cmd:
427
+ rust = _ensure_rust(app_state, settings)
428
+ out = rust.run_shell(shell_cmd)
429
+ transcript.append_user(f"!{shell_cmd}")
430
+ transcript.append_info(out)
431
+ yield _outputs(transcript, app_state, settings, ui, files)
432
+ return
433
+
434
+ if slash:
435
+ if slash.startswith("/search "):
436
+ q = slash.split(maxsplit=1)[1]
437
+ hits = transcript.search(q)
438
+ transcript.append_user(slash)
439
+ transcript.append_info("\n".join(hits) if hits else f"no matches for '{q}'")
440
+ yield _outputs(transcript, app_state, settings, ui, files)
441
+ return
442
+
443
+ sr = dispatch_slash(slash, app_state)
444
+ _apply_slash_ui(sr, settings, ui, transcript)
445
+ if sr.clear_chat:
446
+ transcript.clear()
447
+ if sr.reply:
448
+ transcript.append_user(slash)
449
+ plain = sr.reply.replace("**", "").replace("`", "")
450
+ transcript.append_info(plain)
451
+ if sr.queued_task:
452
+ async for out in _run_agent_turn(sr.queued_task, transcript, app_state, settings, ui, files):
453
+ yield out
454
+ return
455
+ yield _outputs(transcript, app_state, settings, ui, files, trace_path=sr.download_path)
456
+ return
457
+
458
+ async for out in _run_agent_turn(_task, transcript, app_state, settings, ui, files):
459
+ yield out
460
+
461
+
462
+ def on_interrupt(app_state: AppSessionState):
463
+ if app_state.rust:
464
+ app_state.rust.request_cancel()
465
+ return app_state
466
+
467
+
468
+ def on_clear(transcript: Transcript, ui: WebUiState):
469
+ transcript.clear()
470
+ ui.overlay = ""
471
+ ui.picker_kind = ""
472
+ ui.picker_items = []
473
+ ui.picker_sel = 0
474
+ return transcript, ui, ""
475
+
476
+
477
+ def on_close_overlay(ui: WebUiState):
478
+ ui.overlay = ""
479
+ ui.picker_kind = ""
480
+ ui.picker_items = []
481
+ ui.picker_sel = 0
482
+ return ui, gr.update(value="", visible=False)
483
+
484
+
485
+ def on_open_picker(kind: str, ui: WebUiState, settings: UiSettings):
486
+ ui.overlay = "picker"
487
+ ui.picker_kind = kind
488
+ ui.picker_items = _picker_items(kind, settings)
489
+ ui.picker_sel = _picker_sel_for(kind, settings, ui, ui.picker_items)
490
+ val = _overlay_html(ui)
491
+ return ui, gr.update(value=val, visible=True)
492
+
493
+
494
+ def on_picker_nav(delta: int, ui: WebUiState):
495
+ if ui.picker_items:
496
+ ui.picker_sel = max(0, min(len(ui.picker_items) - 1, ui.picker_sel + delta))
497
+ val = _overlay_html(ui)
498
+ return ui, gr.update(value=val, visible=bool(val))
499
+
500
+
501
+ def on_picker_select(
502
+ pick_idx: str,
503
+ ui: WebUiState,
504
+ settings: UiSettings,
505
+ app_state: AppSessionState,
506
+ transcript: Transcript,
507
+ files: list[str],
508
+ ):
509
+ try:
510
+ idx = int(pick_idx) if pick_idx else ui.picker_sel
511
+ except ValueError:
512
+ idx = ui.picker_sel
513
+ kind = ui.picker_kind
514
+ items = ui.picker_items
515
+ if items:
516
+ idx = max(0, min(len(items) - 1, idx))
517
+ item = items[idx]
518
+ if kind == "models":
519
+ sel = _label_to_selection(item)
520
+ if sel:
521
+ settings.model, settings.think = sel
522
+ ui.needs_model_pick = False
523
+ transcript.append_info(f"model → {item}")
524
+ elif kind == "themes":
525
+ if item in theme_names():
526
+ ui.theme_idx = theme_names().index(item)
527
+ transcript.append_info(f"theme → {item}")
528
+ elif kind == "agents":
529
+ if settings.mode != "plan":
530
+ settings.agent = item
531
+ transcript.append_info(f"agent → {item}")
532
+ elif kind == "sessions":
533
+ sid = parse_session_label(item)
534
+ if sid:
535
+ rust = RustSession(workspace=settings.workspace, agent=settings.agent, yolo=settings.yolo)
536
+ if rust.load_session(sid):
537
+ app_state.rust = rust
538
+ transcript.clear()
539
+ transcript.from_stored_chat(get_session_chat(sid))
540
+ transcript.append_info(f"loaded session {sid[:8]}")
541
+ ui.overlay = ""
542
+ ui.picker_kind = ""
543
+ ui.picker_items = []
544
+ ui.picker_sel = 0
545
+ overlay_val = _overlay_html(ui)
546
+ return (
547
+ transcript.render_html(),
548
+ _header(settings, ui),
549
+ _status(settings, app_state),
550
+ gr.update(value=overlay_val, visible=False),
551
+ shell_theme_html(ui.theme_idx),
552
+ settings,
553
+ ui,
554
+ transcript,
555
+ app_state,
556
+ )
557
+
558
+
559
+ def _cycle_outputs(
560
+ settings: UiSettings,
561
+ ui: WebUiState,
562
+ app_state: AppSessionState,
563
+ transcript: Transcript,
564
+ ):
565
+ return (
566
+ settings,
567
+ transcript,
568
+ transcript.render_html(),
569
+ _header(settings, ui),
570
+ _status(settings, app_state),
571
+ shell_theme_html(ui.theme_idx),
572
+ )
573
+
574
+
575
+ def on_toggle_sidebar(ui: WebUiState, settings: UiSettings, files: list[str], app_state: AppSessionState):
576
+ ui.sidebar_visible = not ui.sidebar_visible
577
+ return ui, gr.update(
578
+ value=_sidebar_html(ui, settings, files, app_state),
579
+ visible=ui.sidebar_visible,
580
+ )
581
+
582
+
583
+ def on_toggle_sidebar_view(
584
+ ui: WebUiState, settings: UiSettings, files: list[str], app_state: AppSessionState,
585
+ ):
586
+ ui.sidebar_view = "stats" if ui.sidebar_view == "files" else "files"
587
+ return ui, gr.update(value=_sidebar_html(ui, settings, files, app_state))
588
+
589
+
590
+ def on_load(settings: UiSettings, app_state: AppSessionState, ui: WebUiState):
591
+ paths, total = workspace_paths(settings.workspace)
592
+ ui.file_total = total
593
+ overlay_val = ""
594
+ if ui.needs_model_pick: # blocking startup model picker
595
+ ui.overlay = "picker"
596
+ ui.picker_kind = "models"
597
+ ui.picker_items = _model_labels()
598
+ ui.picker_sel = _model_sel_index(settings)
599
+ overlay_val = _overlay_html(ui)
600
+ return (
601
+ _sidebar_html(ui, settings, paths, app_state),
602
+ paths,
603
+ _embed_js(settings, paths),
604
+ gr.update(choices=session_choices()),
605
+ gr.update(value=overlay_val, visible=bool(overlay_val)),
606
+ ui,
607
+ )
608
+
609
+
610
+ def on_cycle_mode(settings: UiSettings, ui: WebUiState, app_state: AppSessionState, transcript: Transcript):
611
+ settings.mode = cycle_mode(settings.mode)
612
+ if settings.mode == "plan":
613
+ settings.agent = "plan"
614
+ elif settings.agent == "plan":
615
+ settings.agent = "build"
616
+ settings.yolo = settings.mode == "auto"
617
+ transcript.append_info(f"mode → {settings.mode}")
618
+ return _cycle_outputs(settings, ui, app_state, transcript)
619
+
620
+
621
+ def on_cycle_agent(settings: UiSettings, ui: WebUiState, app_state: AppSessionState, transcript: Transcript):
622
+ if settings.mode != "plan":
623
+ settings.agent = cycle_agent(settings.agent)
624
+ transcript.append_info(f"agent → {settings.agent}")
625
+ return _cycle_outputs(settings, ui, app_state, transcript)
626
+
627
+
628
+ def on_cycle_model(settings: UiSettings, ui: WebUiState, app_state: AppSessionState, transcript: Transcript):
629
+ labels = _model_labels()
630
+ nxt = cycle_model(labels, _selection_label(settings))
631
+ sel = _label_to_selection(nxt)
632
+ if sel:
633
+ settings.model, settings.think = sel
634
+ ui.needs_model_pick = False
635
+ transcript.append_info(f"model → {nxt}")
636
+ return _cycle_outputs(settings, ui, app_state, transcript)
637
+
638
+
639
+ def on_cycle_think(settings: UiSettings, ui: WebUiState, app_state: AppSessionState, transcript: Transcript):
640
+ settings.think = cycle_think(settings.think)
641
+ transcript.append_info(f"think → {settings.think}")
642
+ return _cycle_outputs(settings, ui, app_state, transcript)
643
+
644
+
645
+ def on_help(ui: WebUiState):
646
+ ui.overlay = "help"
647
+ val = _overlay_html(ui)
648
+ return ui, gr.update(value=val, visible=True)
649
+
650
+
651
+ def on_whichkey(ui: WebUiState):
652
+ ui.overlay = "whichkey"
653
+ val = _overlay_html(ui)
654
+ return ui, gr.update(value=val, visible=True)
655
+
656
+
657
+ def on_new_session():
658
+ settings = UiSettings(workspace=os.environ.get("SMALLCODE_WORKSPACE", "."), model="auto")
659
+ ui = WebUiState() # needs_model_pick defaults True -> reopen the blocking picker
660
+ ui.overlay = "picker"
661
+ ui.picker_kind = "models"
662
+ ui.picker_items = _model_labels()
663
+ ui.picker_sel = _model_sel_index(settings)
664
+ return (
665
+ Transcript(), AppSessionState(), settings, ui, [], None,
666
+ gr.update(value=_overlay_html(ui), visible=True),
667
+ )
668
+
669
+
670
+ def on_approval(yes: bool, app_state: AppSessionState):
671
+ if app_state:
672
+ app_state.approval.approve(yes)
673
+ return gr.update(visible=False), ""
674
+
675
+
676
+ def on_session_pick(label: str, app_state: AppSessionState, settings: UiSettings):
677
+ sid = parse_session_label(label or "")
678
+ if not sid:
679
+ return Transcript(), app_state
680
+ rust = RustSession(workspace=settings.workspace, agent=settings.agent, yolo=settings.yolo)
681
+ if not rust.load_session(sid):
682
+ return Transcript(), app_state
683
+ app_state.rust = rust
684
+ t = Transcript()
685
+ t.from_stored_chat(get_session_chat(sid))
686
+ return t, app_state
687
+
688
+
689
+ def build() -> gr.Blocks:
690
+ default_ws = os.environ.get("SMALLCODE_WORKSPACE", ".")
691
+ # Default selection is Auto (router-driven); the blocking startup modal lets the
692
+ # user confirm or change it before the first task.
693
+ settings = UiSettings(workspace=default_ws, model="auto")
694
+
695
+ with gr.Blocks(
696
+ css=SMOLCODE_CSS,
697
+ title="smolcode",
698
+ theme=gr.themes.Soft(primary_hue="purple", neutral_hue="slate"),
699
+ head=f"<script>{_JS_HEAD}\n{_js_boot_lines(settings, [])}</script>",
700
+ fill_height=True,
701
+ fill_width=True,
702
+ ) as demo:
703
+ transcript = gr.State(Transcript())
704
+ app_state = gr.State(AppSessionState(settings=settings))
705
+ settings_state = gr.State(settings)
706
+ ui_state = gr.State(WebUiState())
707
+ files_state = gr.State([])
708
+ trace_state = gr.State(None)
709
+
710
+ with gr.Column(elem_classes="sc-tui-shell"):
711
+ header = gr.HTML(_header(settings, WebUiState()))
712
+ shell_theme = gr.HTML(shell_theme_html(0), visible=False)
713
+ with gr.Row(elem_classes="sc-main-row"):
714
+ sidebar = gr.HTML(
715
+ _sidebar_html(WebUiState(), settings, [], AppSessionState()),
716
+ elem_classes="sc-sidebar",
717
+ visible=True,
718
+ )
719
+ with gr.Column(elem_classes="sc-main-col"):
720
+ transcript_view = gr.HTML(Transcript().render_html())
721
+ with gr.Group(elem_classes="sc-editor-wrap"):
722
+ gr.HTML(
723
+ '<div class="sc-editor-hint">'
724
+ "Enter run · Shift+Enter newline · / commands · ctrl+x leader"
725
+ "</div>"
726
+ )
727
+ editor = gr.Textbox(
728
+ placeholder="type a task…",
729
+ lines=5,
730
+ max_lines=8,
731
+ show_label=False,
732
+ elem_id="sc-editor",
733
+ interactive=True,
734
+ autofocus=True,
735
+ )
736
+ with gr.Group(visible=False) as approval_box:
737
+ approval_desc = gr.Markdown("", elem_classes="sc-approval")
738
+ with gr.Row():
739
+ gr.Button("Approve", variant="primary").click(
740
+ lambda s: on_approval(True, s), app_state, [approval_box, approval_desc])
741
+ gr.Button("Deny").click(
742
+ lambda s: on_approval(False, s), app_state, [approval_box, approval_desc])
743
+ status = gr.HTML(_status(settings, AppSessionState()), elem_classes="sc-status-wrap")
744
+
745
+ overlay = gr.HTML("", visible=False)
746
+ js_boot = gr.HTML(_embed_js(settings, []), elem_classes=["sc-hidden-controls"])
747
+
748
+ # Off-screen controls (visible=True so Gradio mounts them for JS shortcuts).
749
+ _hid = ["sc-hidden-btn"]
750
+ with gr.Row(elem_classes="sc-hidden-controls"):
751
+ btn_submit = gr.Button("submit", elem_id="sc-submit", elem_classes=_hid)
752
+ btn_clear = gr.Button("clear", elem_id="sc-clear", elem_classes=_hid)
753
+ btn_interrupt = gr.Button("interrupt", elem_id="sc-interrupt", elem_classes=_hid)
754
+ btn_toggle_sidebar = gr.Button("sidebar", elem_id="sc-toggle-sidebar", elem_classes=_hid)
755
+ btn_toggle_view = gr.Button("view", elem_id="sc-toggle-sidebar-view", elem_classes=_hid)
756
+ btn_cycle_mode = gr.Button("mode", elem_id="sc-cycle-mode", elem_classes=_hid)
757
+ btn_cycle_agent = gr.Button("agent", elem_id="sc-cycle-agent", elem_classes=_hid)
758
+ btn_cycle_model = gr.Button("model", elem_id="sc-cycle-model", elem_classes=_hid)
759
+ btn_cycle_think = gr.Button("think", elem_id="sc-cycle-think", elem_classes=_hid)
760
+ btn_help = gr.Button("help", elem_id="sc-help", elem_classes=_hid)
761
+ btn_whichkey = gr.Button("wk", elem_id="sc-whichkey", elem_classes=_hid)
762
+ btn_close = gr.Button("close", elem_id="sc-close-overlay", elem_classes=_hid)
763
+ btn_new = gr.Button("new", elem_id="sc-new-session", elem_classes=_hid)
764
+ btn_open_models = gr.Button("models", elem_id="sc-open-picker-models", elem_classes=_hid)
765
+ btn_open_themes = gr.Button("themes", elem_id="sc-open-picker-themes", elem_classes=_hid)
766
+ btn_open_agents = gr.Button("agents", elem_id="sc-open-picker-agents", elem_classes=_hid)
767
+ btn_open_sessions = gr.Button("sessions", elem_id="sc-open-picker-sessions", elem_classes=_hid)
768
+ btn_picker_up = gr.Button("up", elem_id="sc-picker-up", elem_classes=_hid)
769
+ btn_picker_down = gr.Button("down", elem_id="sc-picker-down", elem_classes=_hid)
770
+ btn_picker_confirm = gr.Button("confirm", elem_id="sc-picker-confirm", elem_classes=_hid)
771
+ picker_pick = gr.Textbox("", elem_id="sc-picker-pick", elem_classes=_hid, show_label=False)
772
+ session_pick = gr.Dropdown(choices=session_choices(), label="session", elem_id="sc-pick-sessions", elem_classes=_hid)
773
+ trace_dl = gr.DownloadButton("trace", elem_classes=_hid)
774
+
775
+ out = [
776
+ transcript_view, header, status, sidebar,
777
+ overlay, shell_theme, approval_box, approval_desc,
778
+ files_state, trace_state, app_state, settings_state, ui_state, transcript, editor,
779
+ ]
780
+
781
+ cycle_out = [
782
+ settings_state, transcript, transcript_view, header, status, shell_theme,
783
+ ]
784
+ picker_out = [
785
+ transcript_view, header, status, overlay, shell_theme,
786
+ settings_state, ui_state, transcript, app_state,
787
+ ]
788
+
789
+ respond_in = [editor, transcript, app_state, settings_state, ui_state, files_state]
790
+ btn_submit.click(respond, respond_in, out).then(lambda p: p, trace_state, trace_dl)
791
+ editor.submit(respond, respond_in, out).then(lambda p: p, trace_state, trace_dl)
792
+
793
+ btn_clear.click(on_clear, [transcript, ui_state], [transcript, ui_state, editor])
794
+ btn_interrupt.click(on_interrupt, app_state, app_state)
795
+ btn_toggle_sidebar.click(
796
+ on_toggle_sidebar, [ui_state, settings_state, files_state, app_state], [ui_state, sidebar])
797
+ btn_toggle_view.click(
798
+ on_toggle_sidebar_view,
799
+ [ui_state, settings_state, files_state, app_state],
800
+ [ui_state, sidebar],
801
+ )
802
+ btn_cycle_mode.click(
803
+ on_cycle_mode, [settings_state, ui_state, app_state, transcript], cycle_out)
804
+ btn_cycle_agent.click(
805
+ on_cycle_agent, [settings_state, ui_state, app_state, transcript], cycle_out)
806
+ btn_cycle_model.click(
807
+ on_cycle_model, [settings_state, ui_state, app_state, transcript], cycle_out)
808
+ btn_cycle_think.click(
809
+ on_cycle_think, [settings_state, ui_state, app_state, transcript], cycle_out)
810
+ btn_help.click(on_help, ui_state, [ui_state, overlay])
811
+ btn_whichkey.click(on_whichkey, ui_state, [ui_state, overlay])
812
+ btn_close.click(on_close_overlay, ui_state, [ui_state, overlay])
813
+ btn_new.click(on_new_session, None, [transcript, app_state, settings_state, ui_state, files_state, trace_state, overlay])
814
+ btn_open_models.click(lambda ui, s: on_open_picker("models", ui, s), [ui_state, settings_state], [ui_state, overlay])
815
+ btn_open_themes.click(lambda ui, s: on_open_picker("themes", ui, s), [ui_state, settings_state], [ui_state, overlay])
816
+ btn_open_agents.click(lambda ui, s: on_open_picker("agents", ui, s), [ui_state, settings_state], [ui_state, overlay])
817
+ btn_open_sessions.click(lambda ui, s: on_open_picker("sessions", ui, s), [ui_state, settings_state], [ui_state, overlay])
818
+ btn_picker_up.click(lambda ui: on_picker_nav(-1, ui), ui_state, [ui_state, overlay])
819
+ btn_picker_down.click(lambda ui: on_picker_nav(1, ui), ui_state, [ui_state, overlay])
820
+ btn_picker_confirm.click(
821
+ on_picker_select,
822
+ [picker_pick, ui_state, settings_state, app_state, transcript, files_state],
823
+ picker_out,
824
+ )
825
+ session_pick.change(on_session_pick, [session_pick, app_state, settings_state], [transcript, app_state])
826
+
827
+ demo.load(
828
+ on_load,
829
+ [settings_state, app_state, ui_state],
830
+ [sidebar, files_state, js_boot, session_pick, overlay, ui_state],
831
+ )
832
+
833
+ return demo
834
+
835
+
836
+ if __name__ == "__main__":
837
+ from engine.preflight import preflight
838
+
839
+ preflight(PRESET)
840
+ host = os.environ.get("SMOLCODE_HOST", "127.0.0.1")
841
+ os.environ["GRADIO_SERVER_PORT"] = os.environ.get("SMOLCODE_PORT", "7860")
842
+ os.environ["GRADIO_SERVER_NAME"] = host
843
+ # server_port=None lets Gradio scan GRADIO_SERVER_PORT..+99 (skips ghost 7860-7862).
844
+ # ssr_mode=False: SSR (default on HF when Node is present) renders before the
845
+ # custom web_tui.js applies the fixed-height layout, leaving the file sidebar
846
+ # uncapped (grows forever, hides the bottom bar/model picker). Client-side render
847
+ # applies the layout immediately.
848
+ build().queue().launch(server_name=host, server_port=None, show_api=False,
849
+ ssr_mode=False)
demo.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d786d4033bd453a36291aeb17f5999f5ca579c9553762d25bf72770b5d37c165
3
+ size 5896625
engine/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """smolcode engine package."""
2
+ from .agent import SmallCodeAgent, Step
3
+ from .builder import BuildResult, WebBuilder
4
+ from .config import (
5
+ Preset,
6
+ SpecialistLadder,
7
+ SpecialistPreset,
8
+ Tier,
9
+ default_ui_model,
10
+ load_preset,
11
+ )
12
+ from .fanout import FanoutResult, fan_out, fan_out_live, summarize
13
+ from .preview import inline_app, preview_iframe
14
+ from .router import Router, RouteResult, classify_specialty, classify_tier
15
+ from .rust_session import RustSession, rust_available
16
+
17
+ __all__ = ["SmallCodeAgent", "Step", "Preset", "Tier", "load_preset", "default_ui_model",
18
+ "SpecialistLadder", "SpecialistPreset",
19
+ "Router", "RouteResult", "classify_tier", "classify_specialty",
20
+ "FanoutResult", "fan_out", "fan_out_live", "summarize",
21
+ "WebBuilder", "BuildResult", "inline_app", "preview_iframe",
22
+ "RustSession", "rust_available"]
engine/agent.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """smolcode agent engine — backed by the Rust smolcode_core agent loop."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import os
6
+ import tempfile
7
+ from collections.abc import Callable
8
+ from dataclasses import dataclass
9
+
10
+ from .config import Preset, load_preset
11
+ from .rust_session import RustRunResult, RustSession, rust_available
12
+ from .sandbox import Workspace
13
+ from .trace_collector import TraceCollector
14
+
15
+ # Legacy prompt kept for docs; Rust agent uses prompts.rs system prompts.
16
+ SYSTEM_PROMPT = """You are smolcode, a precise coding assistant running on a small local model."""
17
+
18
+
19
+ @dataclass
20
+ class Step:
21
+ number: int
22
+ kind: str
23
+ detail: str
24
+ total_tokens: int | None = None
25
+
26
+
27
+ class SmallCodeAgent:
28
+ """Agent facade: uses the Rust engine when smolcode_core is installed."""
29
+
30
+ def __init__(
31
+ self,
32
+ preset: Preset | None = None,
33
+ model: str | None = None,
34
+ max_steps: int = 12,
35
+ *,
36
+ system_prompt: str | None = None,
37
+ registry_builder: Callable | None = None,
38
+ workspace: Workspace | None = None,
39
+ name: str = "smolcode",
40
+ agent: str = "build",
41
+ profile: str = "full",
42
+ yolo: bool = False,
43
+ workspace_dir: str | None = None,
44
+ approval_handler=None,
45
+ rust_session: RustSession | None = None,
46
+ ) -> None:
47
+ self.preset = preset or load_preset()
48
+ self.model = model or self.preset.default_model
49
+ self.max_steps = max_steps
50
+ self._system_prompt = system_prompt # unused by Rust; kept for API compat
51
+ self._registry_builder = registry_builder
52
+ self.hit_max_steps = False
53
+ self.errored = False
54
+
55
+ ws_path = workspace_dir or os.environ.get("SMALLCODE_WORKSPACE")
56
+ if workspace is not None:
57
+ ws_path = str(workspace.root)
58
+ elif ws_path is None:
59
+ ws_path = tempfile.mkdtemp(prefix="smallcode-")
60
+ self._owns_workspace = True
61
+ else:
62
+ self._owns_workspace = False
63
+
64
+ self.workspace = workspace or Workspace(root=ws_path)
65
+
66
+ profile_name = profile
67
+ if registry_builder is not None:
68
+ profile_name = "web"
69
+
70
+ if not rust_available():
71
+ raise RuntimeError(
72
+ "smolcode_core required; install with maturin in smolcode-cli/crates/smolcode-py"
73
+ )
74
+
75
+ if rust_session is not None:
76
+ self._rust = rust_session
77
+ else:
78
+ self._rust = RustSession(
79
+ workspace=ws_path,
80
+ agent=agent,
81
+ yolo=yolo,
82
+ model=self.model,
83
+ base_url=self.preset.base_url,
84
+ api_key=self.preset.api_key,
85
+ profile=profile_name,
86
+ approval_handler=approval_handler,
87
+ )
88
+ self.trace_collector = self._rust.trace_collector
89
+
90
+ if registry_builder is not None:
91
+ self._register_web_tools()
92
+
93
+ def _register_web_tools(self) -> None:
94
+ from .tools import check_app_impl
95
+
96
+ ws = self.workspace
97
+ collector = self.trace_collector
98
+
99
+ def check_app(args: dict) -> dict:
100
+ return check_app_impl(ws, collector, args)
101
+
102
+ self._rust.register_tool("check_app", check_app)
103
+
104
+ async def run(self, task: str, *, think: str | None = None, yolo: bool | None = None) -> tuple[str, list[Step]]:
105
+ self.hit_max_steps = False
106
+ self.errored = False
107
+ result: RustRunResult = await self._rust.run(task, think=think, yolo=yolo)
108
+ self.hit_max_steps = result.hit_max_steps
109
+ self.errored = result.errored
110
+ steps = self._steps_from_trace()
111
+ return result.final, steps
112
+
113
+ async def run_live_turn(
114
+ self,
115
+ task: str,
116
+ *,
117
+ think: str | None = None,
118
+ yolo: bool | None = None,
119
+ poll_interval: float = 0.35,
120
+ ):
121
+ """Async generator yielding LiveFrame snapshots during a Rust agent turn."""
122
+ from .live_run import LiveFrame
123
+
124
+ self.hit_max_steps = False
125
+ self.errored = False
126
+ self.trace_collector.events.clear()
127
+ self._rust.clear_cancel()
128
+ self._rust._session.start_turn(task, think=think, yolo=yolo)
129
+ final_text = ""
130
+ done = False
131
+ interrupted = False
132
+ while not done:
133
+ if self._rust.cancelled:
134
+ interrupted = True
135
+ done = True
136
+ break
137
+ ev = await asyncio.to_thread(self._rust._session.poll_event)
138
+ if ev is None:
139
+ yield LiveFrame(
140
+ events=self.trace_collector.snapshot(),
141
+ files=self.files(),
142
+ )
143
+ await asyncio.sleep(poll_interval)
144
+ continue
145
+ kind = ev.get("kind")
146
+ if kind == "approval":
147
+ approved = True
148
+ if self._rust.approval_handler is not None:
149
+ approved = await self._rust.approval_handler(ev.get("desc", ""))
150
+ self._rust._session.approve(approved)
151
+ continue
152
+ self._rust._ingest_event(ev)
153
+ if kind == "final":
154
+ final_text = ev.get("text", "")
155
+ if kind == "done":
156
+ done = True
157
+ yield LiveFrame(
158
+ events=self.trace_collector.snapshot(),
159
+ files=self.files(),
160
+ raw_event=ev,
161
+ )
162
+ if interrupted:
163
+ final_text = final_text or "interrupted"
164
+ self.errored = True
165
+ if final_text and not interrupted:
166
+ self._rust._session.record_turn(task, final_text)
167
+ steps = self._steps_from_trace()
168
+ yield LiveFrame(
169
+ steps=steps,
170
+ events=self.trace_collector.snapshot(),
171
+ files=self.files(),
172
+ done=True,
173
+ result=(final_text, steps),
174
+ )
175
+
176
+ def _steps_from_trace(self) -> list[Step]:
177
+ out: list[Step] = []
178
+ for i, ev in enumerate(self.trace_collector.events):
179
+ out.append(Step(number=i, kind=ev.kind, detail=ev.detail))
180
+ return out
181
+
182
+ def current_steps(self) -> list[Step]:
183
+ return self._steps_from_trace()
184
+
185
+ def raw_history(self) -> list:
186
+ return self.current_steps()
187
+
188
+ def files(self) -> dict[str, str]:
189
+ return self._rust.files()
190
+
191
+ @property
192
+ def rust_session(self) -> RustSession:
193
+ return self._rust
194
+
195
+ def cleanup(self) -> None:
196
+ if getattr(self, "_owns_workspace", False):
197
+ self.workspace.cleanup()
engine/branding.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared Hugging Face branding for smolcode Gradio UIs."""
2
+ from __future__ import annotations
3
+
4
+ from .themes import theme_css_vars
5
+
6
+ # Official HF icon (huggingface.co/front/assets/huggingface_logo-noborder.svg)
7
+ HF_LOGO_SVG = (
8
+ '<svg class="hf-logo" xmlns="http://www.w3.org/2000/svg" width="32" height="30" '
9
+ 'viewBox="0 0 95 88" fill="none" aria-label="Hugging Face">'
10
+ '<path fill="#FFD21E" d="M47.21 76.5a34.75 34.75 0 1 0 0-69.5 34.75 34.75 0 0 0 0 69.5Z" />'
11
+ '<path fill="#FF9D0B" d="M81.96 41.75a34.75 34.75 0 1 0-69.5 0 34.75 34.75 0 0 0 69.5 0Zm-73.5 0a38.75 38.75 0 1 1 77.5 0 38.75 38.75 0 0 1-77.5 0Z" />'
12
+ '<path fill="#3A3B45" d="M58.5 32.3c1.28.44 1.78 3.06 3.07 2.38a5 5 0 1 0-6.76-2.07c.61 1.15 2.55-.72 3.7-.32ZM34.95 32.3c-1.28.44-1.79 3.06-3.07 2.38a5 5 0 1 1 6.76-2.07c-.61 1.15-2.56-.72-3.7-.32Z" />'
13
+ '<path fill="#FF323D" d="M46.96 56.29c9.83 0 13-8.76 13-13.26 0-2.34-1.57-1.6-4.09-.36-2.33 1.15-5.46 2.74-8.9 2.74-7.19 0-13-6.88-13-2.38s3.16 13.26 13 13.26Z" />'
14
+ '<path fill="#3A3B45" fill-rule="evenodd" d="M39.43 54a8.7 8.7 0 0 1 5.3-4.49c.4-.12.81.57 1.24 1.28.4.68.82 1.37 1.24 1.37.45 0 .9-.68 1.33-1.35.45-.7.89-1.38 1.32-1.25a8.61 8.61 0 0 1 5 4.17c3.73-2.94 5.1-7.74 5.1-10.7 0-2.34-1.57-1.6-4.09-.36l-.14.07c-2.31 1.15-5.39 2.67-8.77 2.67s-6.45-1.52-8.77-2.67c-2.6-1.29-4.23-2.1-4.23.29 0 3.05 1.46 8.06 5.47 10.97Z" clip-rule="evenodd" />'
15
+ '<path fill="#FF9D0B" d="M70.71 37a3.25 3.25 0 1 0 0-6.5 3.25 3.25 0 0 0 0 6.5ZM24.21 37a3.25 3.25 0 1 0 0-6.5 3.25 3.25 0 0 0 0 6.5ZM17.52 48c-1.62 0-3.06.66-4.07 1.87a5.97 5.97 0 0 0-1.33 3.76 7.1 7.1 0 0 0-1.94-.3c-1.55 0-2.95.59-3.94 1.66a5.8 5.8 0 0 0-.8 7 5.3 5.3 0 0 0-1.79 2.82c-.24.9-.48 2.8.8 4.74a5.22 5.22 0 0 0-.37 5.02c1.02 2.32 3.57 4.14 8.52 6.1 3.07 1.22 5.89 2 5.91 2.01a44.33 44.33 0 0 0 10.93 1.6c5.86 0 10.05-1.8 12.46-5.34 3.88-5.69 3.33-10.9-1.7-15.92-2.77-2.78-4.62-6.87-5-7.77-.78-2.66-2.84-5.62-6.25-5.62a5.7 5.7 0 0 0-4.6 2.46c-1-1.26-1.98-2.25-2.86-2.82A7.4 7.4 0 0 0 17.52 48Zm0 4c.51 0 1.14.22 1.82.65 2.14 1.36 6.25 8.43 7.76 11.18.5.92 1.37 1.31 2.14 1.31 1.55 0 2.75-1.53.15-3.48-3.92-2.93-2.55-7.72-.68-8.01.08-.02.17-.02.24-.02 1.7 0 2.45 2.93 2.45 2.93s2.2 5.52 5.98 9.3c3.77 3.77 3.97 6.8 1.22 10.83-1.88 2.75-5.47 3.58-9.16 3.58-3.81 0-7.73-.9-9.92-1.46-.11-.03-13.45-3.8-11.76-7 .28-.54.75-.76 1.34-.76 2.38 0 6.7 3.54 8.57 3.54.41 0 .7-.17.83-.6.79-2.85-12.06-4.05-10.98-8.17.2-.73.71-1.02 1.44-1.02 3.14 0 10.2 5.53 11.68 5.53.11 0 .2-.03.24-.1.74-1.2.33-2.04-4.9-5.2-5.21-3.16-8.88-5.06-6.8-7.33.24-.26.58-.38 1-.38 3.17 0 10.66 6.82 10.66 6.82s2.02 2.1 3.25 2.1c.28 0 .52-.1.68-.38.86-1.46-8.06-8.22-8.56-11.01-.34-1.9.24-2.85 1.31-2.85Z" />'
16
+ '<path fill="#FFD21E" d="M38.6 76.69c2.75-4.04 2.55-7.07-1.22-10.84-3.78-3.77-5.98-9.3-5.98-9.3s-.82-3.2-2.69-2.9c-1.87.3-3.24 5.08.68 8.01 3.91 2.93-.78 4.92-2.29 2.17-1.5-2.75-5.62-9.82-7.76-11.18-2.13-1.35-3.63-.6-3.13 2.2.5 2.79 9.43 9.55 8.56 11-.87 1.47-3.93-1.71-3.93-1.71s-9.57-8.71-11.66-6.44c-2.08 2.27 1.59 4.17 6.8 7.33 5.23 3.16 5.64 4 4.9 5.2-.75 1.2-12.28-8.53-13.36-4.4-1.08 4.11 11.77 5.3 10.98 8.15-.8 2.85-9.06-5.38-10.74-2.18-1.7 3.21 11.65 6.98 11.76 7.01 4.3 1.12 15.25 3.49 19.08-2.12Z" />'
17
+ '<path fill="#FF9D0B" d="M77.4 48c1.62 0 3.07.66 4.07 1.87a5.97 5.97 0 0 1 1.33 3.76 7.1 7.1 0 0 1 1.95-.3c1.55 0 2.95.59 3.94 1.66a5.8 5.8 0 0 1 .8 7 5.3 5.3 0 0 1 1.78 2.82c.24.9.48 2.8-.8 4.74a5.22 5.22 0 0 1 .37 5.02c-1.02 2.32-3.57 4.14-8.51 6.1-3.08 1.22-5.9 2-5.92 2.01a44.33 44.33 0 0 1-10.93 1.6c-5.86 0-10.05-1.8-12.46-5.34-3.88-5.69-3.33-10.9 1.7-15.92 2.78-2.78 4.63-6.87 5.01-7.77.78-2.66 2.83-5.62 6.24-5.62a5.7 5.7 0 0 1 4.6 2.46c1-1.26 1.98-2.25 2.87-2.82A7.4 7.4 0 0 1 77.4 48Zm0 4c-.51 0-1.13.22-1.82.65-2.13 1.36-6.25 8.43-7.76 11.18a2.43 2.43 0 0 1-2.14 1.31c-1.54 0-2.75-1.53-.14-3.48 3.91-2.93 2.54-7.72.67-8.01a1.54 1.54 0 0 0-.24-.02c-1.7 0-2.45 2.93-2.45 2.93s-2.2 5.52-5.97 9.3c-3.78 3.77-3.98 6.8-1.22 10.83 1.87 2.75 5.47 3.58 9.15 3.58 3.82 0 7.73-.9 9.93-1.46.1-.03 13.45-3.8 11.76-7-.29-.54-.75-.76-1.34-.76-2.38 0-6.71 3.54-8.57 3.54-.42 0-.71-.17-.83-.6-.8-2.85 12.05-4.05 10.97-8.17-.19-.73-.7-1.02-1.44-1.02-3.14 0-10.2 5.53-11.68 5.53-.1 0-.19-.03-.23-.1-.74-1.2-.34-2.04 4.88-5.2 5.23-3.16 8.9-5.06 6.8-7.33-.23-.26-.57-.38-.98-.38-3.18 0-10.67 6.82-10.67 6.82s-2.02 2.1-3.24 2.1a.74.74 0 0 1-.68-.38c-.87-1.46 8.05-8.22 8.55-11.01.34-1.9-.24-2.85-1.31-2.85Z" />'
18
+ '<path fill="#FFD21E" d="M56.33 76.69c-2.75-4.04-2.56-7.07 1.22-10.84 3.77-3.77 5.97-9.3 5.97-9.3s.82-3.2 2.7-2.9c1.86.3 3.23 5.08-.68 8.01-3.92 2.93.78 4.92 2.28 2.17 1.51-2.75 5.63-9.82 7.76-11.18 2.13-1.35 3.64-.6 3.13 2.2-.5 2.79-9.42 9.55-8.55 11 .86 1.47 3.92-1.71 3.92-1.71s9.58-8.71 11.66-6.44c2.08 2.27-1.58 4.17-6.8 7.33-5.23 3.16-5.63 4-4.9 5.2.75 1.2 12.28-8.53 13.36-4.4 1.08 4.11-11.76 5.3-10.97 8.15.8 2.85 9.05-5.38 10.74-2.18 1.69 3.21-11.65 6.98-11.76 7.01-4.31 1.12-15.26 3.49-19.08-2.12Z" />'
19
+ '</svg>'
20
+ )
21
+
22
+ SMOLCODE_CSS = """
23
+ :root { --hf-yellow:#FFD21E; --sc-accent:#7c3aed; --sc-bg:#0b1020; --sc-panel:#111827;
24
+ --sc-border:#334155; --sc-fg:#e2e8f0; --sc-dim:#64748b; --sc-ok:#34d399; --sc-tool:#a78bfa; }
25
+ body, .gradio-container { background:var(--sc-bg) !important; color:var(--sc-fg) !important; }
26
+ /* Lock the whole page to the viewport so it can NEVER scroll; only inner panes scroll. */
27
+ html, body { height:100% !important; max-height:100vh !important; margin:0 !important;
28
+ overflow:hidden !important; }
29
+ gradio-app { display:block !important; height:100vh !important; max-height:100vh !important;
30
+ overflow:hidden !important; }
31
+ .gradio-container { max-width:100% !important; padding:0.5rem 1rem !important;
32
+ height:100vh !important; max-height:100vh !important; min-height:0 !important;
33
+ overflow:hidden !important; }
34
+ /* Every Gradio wrapper between the container and our shell must be height-locked, not auto. */
35
+ .gradio-container > .wrap, .gradio-container .contain,
36
+ main.fillable, main.app, .gradio-container > main {
37
+ height:100% !important; max-height:100% !important; min-height:0 !important;
38
+ overflow:hidden !important; }
39
+ /* The unnamed outer column Gradio injects around our shell column. */
40
+ main.fillable > .column, .contain > .column, .wrap > .column {
41
+ height:100% !important; max-height:100% !important; min-height:0 !important;
42
+ overflow:hidden !important; }
43
+ .sc-header { display:flex; align-items:center; gap:.75rem; margin-bottom:.25rem; }
44
+ .hf-logo { flex-shrink:0; }
45
+ .sc-title { font-weight:800; font-size:1.7rem; letter-spacing:-.02em; line-height:1.2; }
46
+ .sc-title .hf-accent, .hf-accent { color:var(--hf-yellow); }
47
+ .sc-badge { display:inline-block; padding:2px 10px; border-radius:999px;
48
+ background:#2a2410; color:var(--hf-yellow); border:1px solid rgba(255,210,30,.25);
49
+ font-size:.72rem; font-weight:600; margin-left:.4rem; vertical-align:middle; }
50
+ .sc-sub { color:#94a3b8; margin-top:.2rem; font-size:.9rem; }
51
+ .sc-tui-shell { display:flex !important; flex-direction:column; gap:.5rem;
52
+ height:100% !important; max-height:100% !important; min-height:0; overflow:hidden !important; }
53
+ .sc-header-bar { display:flex; align-items:center; gap:.85rem; padding:.5rem .75rem;
54
+ background:#1e293b; border-radius:6px; font-family:ui-monospace,monospace; font-size:.8rem;
55
+ flex-shrink:0; }
56
+ .sc-hbrand { font-weight:700; color:#0b1020; background:var(--sc-accent); padding:1px 8px;
57
+ border-radius:4px; }
58
+ .sc-hbrand .hf-accent { color:var(--hf-yellow); }
59
+ .sc-hgit { color:var(--sc-ok); }
60
+ .sc-hmodel { color:var(--sc-tool); font-weight:700; }
61
+ .sc-hhost { color:var(--sc-dim); }
62
+ .sc-htheme { color:var(--sc-dim); margin-left:auto; }
63
+ .sc-main-row { display:flex !important; flex-wrap:nowrap !important; align-items:stretch !important;
64
+ gap:.5rem !important; flex:1 !important; min-height:0 !important; overflow:hidden !important; }
65
+ .sc-main-row > .gr-html, .sc-main-row > .gr-column { min-height:0 !important; height:100% !important; }
66
+ .sc-sidebar { width:17rem !important; min-width:17rem !important; max-width:17rem !important;
67
+ flex-shrink:0 !important; height:100% !important; min-height:0 !important; overflow:hidden !important; }
68
+ .sc-sidebar > .html-container { padding:0 !important; height:100% !important; min-height:0 !important; }
69
+ .sc-sidebar-panel { height:100%; min-height:0; max-height:100%; display:flex; flex-direction:column;
70
+ background:var(--sc-panel); border:1px solid var(--sc-border); border-radius:8px;
71
+ font-family:ui-monospace,monospace; font-size:.78rem; overflow:hidden; }
72
+ .sc-sidebar-focused { border-color:var(--sc-accent); }
73
+ .sc-sidebar-title { padding:.35rem .55rem; color:var(--sc-accent); font-weight:700;
74
+ border-bottom:1px solid var(--sc-border); background:#0f172a; }
75
+ .sc-sidebar-body { flex:1 1 0%; min-height:0; height:100%;
76
+ max-height:calc(100vh - 5rem); overflow-y:auto; overflow-x:hidden;
77
+ padding:.25rem 0; line-height:1.35; }
78
+ .sc-sb-dir { color:var(--sc-accent); font-weight:700; padding:.1rem .45rem; white-space:nowrap; }
79
+ .sc-sb-file { display:flex; align-items:baseline; gap:.15rem; padding:.05rem .45rem;
80
+ color:var(--sc-fg); white-space:nowrap; }
81
+ .sc-sb-file:hover { background:#1e293b; }
82
+ .sc-sb-sel { background:var(--sc-ok); color:#0b1020; font-weight:700; }
83
+ .sc-sb-sel .sc-sb-glyph, .sc-sb-sel .sc-sb-name { color:#0b1020; }
84
+ .sc-sb-mark { display:inline-block; width:.85rem; text-align:center; }
85
+ .sc-sb-glyph { opacity:.6; }
86
+ .sc-sb-more { color:var(--sc-dim); font-style:italic; padding:.2rem .45rem; }
87
+ .sc-sb-empty, .sc-sb-stat { padding:.15rem .45rem; color:var(--sc-fg); }
88
+ .sc-sb-dim { color:var(--sc-dim); }
89
+ .sc-main-col { flex:1 !important; min-width:0 !important; min-height:0 !important;
90
+ height:100% !important; display:flex !important; flex-direction:column !important;
91
+ gap:.5rem !important; overflow:hidden !important; }
92
+ .sc-editor-wrap, .sc-editor-wrap .gr-group { overflow:visible !important; flex-shrink:0 !important; }
93
+ .sc-transcript-wrap { flex:1; min-height:0; overflow-y:auto; overflow-x:hidden;
94
+ background:#0f172a; border:1px solid var(--sc-border); border-radius:8px; padding:.5rem .65rem; }
95
+ .sc-transcript-inner { font-family:ui-monospace,monospace; font-size:.82rem; line-height:1.45; }
96
+ .sc-transcript-empty { color:var(--sc-dim); padding:1rem; font-family:ui-monospace,monospace; }
97
+ .sc-tline { margin:.15rem 0; }
98
+ .sc-tglyph { display:inline-block; width:1rem; }
99
+ .sc-editor-wrap { border:1px solid var(--sc-accent); border-radius:8px; padding:.25rem;
100
+ background:#0f172a; flex-shrink:0; min-height:9rem; overflow:visible !important; }
101
+ .sc-editor-wrap .block, #sc-editor { height:auto !important; min-height:7rem !important;
102
+ overflow:visible !important; }
103
+ .sc-editor-wrap label { display:flex !important; flex-direction:column; min-height:6.5rem; }
104
+ .sc-editor-wrap textarea, #sc-editor textarea, #sc-editor input,
105
+ [data-testid="textbox"] textarea, [data-testid="textbox"] input {
106
+ font-family:ui-monospace,monospace !important; font-size:.85rem !important;
107
+ background:#0f172a !important; color:var(--sc-fg) !important; border:none !important;
108
+ box-shadow:none !important; pointer-events:auto !important;
109
+ min-height:6.5rem !important; resize:vertical !important; }
110
+ #sc-editor { pointer-events:auto !important; }
111
+ .sc-editor-hint { font-size:.72rem; color:var(--sc-dim); padding:.2rem .4rem;
112
+ font-family:ui-monospace,monospace; }
113
+ .sc-status-wrap { flex-shrink:0; }
114
+ .sc-status-bar { display:flex; flex-wrap:wrap; gap:.35rem; padding:.4rem .5rem;
115
+ background:#1e293b; border-radius:6px; font-family:ui-monospace,monospace; font-size:.75rem; }
116
+ .sc-chip { padding:2px 8px; border-radius:4px; background:#334155; color:#e2e8f0; }
117
+ .sc-chip-brand { background:var(--sc-accent); color:#fff; font-weight:700; }
118
+ .sc-chip-mode { background:#2a2410; color:var(--hf-yellow); font-weight:600; }
119
+ .sc-chip-think { background:#422006; color:#fdba74; }
120
+ .sc-chip-run { background:#14532d; color:#86efac; }
121
+ .sc-chip-dim { color:#94a3b8; }
122
+ .sc-chip-model { color:#a78bfa; }
123
+ .sc-chip-clickable { cursor:pointer; border:none; font:inherit; font-family:inherit; font-size:inherit; }
124
+ .sc-chip-clickable:hover { filter:brightness(1.15); }
125
+ .sc-picker-title { color:var(--sc-accent); font-weight:700; margin-bottom:.5rem; }
126
+ .sc-picker-list { display:flex; flex-direction:column; gap:2px; max-height:280px; overflow-y:auto; }
127
+ .sc-picker-item { display:flex; gap:.35rem; align-items:baseline; width:100%; text-align:left;
128
+ padding:.25rem .4rem; background:transparent; border:none; color:var(--sc-fg);
129
+ font-family:ui-monospace,monospace; font-size:.85rem; cursor:pointer; border-radius:4px; }
130
+ .sc-picker-item:hover { background:#334155; }
131
+ .sc-picker-sel { background:var(--sc-accent); color:#fff; font-weight:700; }
132
+ .sc-picker-mark { display:inline-block; width:1rem; text-align:center; }
133
+ .sc-picker-hint { margin-top:.6rem; font-size:.72rem; color:var(--sc-dim); }
134
+ .sc-picker-empty { color:var(--sc-dim); font-style:italic; }
135
+ .sc-popup-item.sc-popup-sel { background:#334155; font-weight:700; }
136
+ .sc-overlay { position:fixed; inset:0; background:rgba(0,0,0,.55); z-index:9999;
137
+ display:flex; align-items:center; justify-content:center; pointer-events:auto; }
138
+ .sc-overlay-panel { background:#1e293b; border:1px solid #7c3aed; border-radius:10px;
139
+ padding:1rem 1.25rem; max-width:480px; font-family:ui-monospace,monospace; font-size:.85rem;
140
+ color:#e2e8f0; pointer-events:auto; }
141
+ .sc-popup { position:absolute; z-index:100; background:#1e293b; border:1px solid #7c3aed;
142
+ border-radius:6px; max-height:200px; overflow:auto; font-family:ui-monospace,monospace; font-size:.8rem; }
143
+ .sc-popup-item { padding:.25rem .5rem; cursor:pointer; color:#34d399; }
144
+ .sc-popup-item:hover { background:#334155; }
145
+ .sc-approval { padding:.75rem 1rem; border:1px solid rgba(124,58,237,.45);
146
+ border-radius:8px; background:#1e1b4b; margin:.5rem 0; font-size:.9rem; }
147
+ footer { display:none !important; }
148
+ .gradio-container .block, .gradio-container .form { background:transparent !important;
149
+ border:none !important; box-shadow:none !important; }
150
+ .gradio-container .gr-group { background:transparent !important; border:none !important; }
151
+ .gradio-container label { display:none !important; }
152
+ .sc-hidden-controls { position:fixed !important; left:-10000px !important; top:0 !important;
153
+ width:1px !important; height:1px !important; overflow:hidden !important; opacity:0 !important; }
154
+ .sc-hidden-btn, .sc-hidden-btn.block, #sc-submit, #sc-clear, #sc-interrupt, #sc-toggle-sidebar,
155
+ #sc-cycle-mode, #sc-cycle-agent, #sc-cycle-model, #sc-cycle-think, #sc-help, #sc-whichkey,
156
+ #sc-open-picker-models, #sc-open-picker-themes, #sc-open-picker-agents, #sc-open-picker-sessions,
157
+ #sc-picker-up, #sc-picker-down, #sc-picker-confirm, #sc-picker-pick {
158
+ position:fixed !important; left:-10000px !important; top:0 !important;
159
+ width:1px !important; height:1px !important; opacity:0 !important;
160
+ overflow:hidden !important; pointer-events:auto !important; }
161
+ """ + theme_css_vars()
162
+
163
+
164
+ def smolcode_header_html(*, preset: str, tier_badge: str, subtitle: str) -> str:
165
+ return (
166
+ f"<div class='sc-header'>{HF_LOGO_SVG}<div>"
167
+ f"<div class='sc-title'>smol<span class='hf-accent'>code</span>"
168
+ f"<span class='sc-badge'>preset: {preset}</span>"
169
+ f"<span class='sc-badge'>{tier_badge}</span></div>"
170
+ f"<div class='sc-sub'>{subtitle}</div>"
171
+ f"</div></div>"
172
+ )
engine/browser_runner.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Subprocess runner: check a model-built web app in a REAL headless browser.
2
+
3
+ Invoked as `python engine/browser_runner.py <app.html>` by
4
+ engine/browsercheck.py — never imported (keeps it free of the engine package /
5
+ liteforge, and isolates a browser crash from the Gradio process). It loads the
6
+ app wrapped in the EXACT same `srcdoc` + `sandbox` as the live preview
7
+ (engine/preview.py), so the verdict matches what the user sees, then clicks every
8
+ button and exercises the keyboard, and reports any uncaught JavaScript errors.
9
+
10
+ Browser: headless Firefox via Selenium + geckodriver. (Playwright's browser CDN
11
+ is firewalled in this environment; conda-forge Firefox is the reachable, rootless
12
+ real browser. The choice is invisible to callers — same JSON contract.)
13
+
14
+ We capture errors by injecting a tiny `window.onerror`/`unhandledrejection`
15
+ collector at the top of the framed document (so it catches errors during initial
16
+ script execution — the "script ran before its element / undefined function"
17
+ class), then read it back. That is the HARD failure signal.
18
+
19
+ Output: one JSON line {ok, errors, buttons, clicked}. Exit 3 only when the
20
+ browser itself can't run, so the caller can fall back to the jsdom checker.
21
+ """
22
+ import json
23
+ import os
24
+ import re
25
+ import sys
26
+ import tempfile
27
+
28
+ PREVIEW_SANDBOX = "allow-scripts allow-same-origin allow-modals allow-popups allow-forms"
29
+
30
+ # Installed by the rootless conda-forge setup (see DEVELOPING.md). Overridable.
31
+ _BROWSER_PREFIX = os.environ.get(
32
+ "SMOLBUILDER_BROWSER_PREFIX",
33
+ os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), ".browser"))
34
+ _FIREFOX_BIN = os.path.join(_BROWSER_PREFIX, "bin", "FirefoxApp", "firefox")
35
+ _GECKODRIVER = os.path.join(_BROWSER_PREFIX, "bin", "geckodriver")
36
+
37
+ # Injected first inside the frame so it catches errors thrown during load.
38
+ _CAPTURE = ("<script>(function(){window.__errs=[];"
39
+ "window.addEventListener('error',function(e){try{__errs.push('uncaught: '+"
40
+ "((e.error&&e.error.message)||e.message||String(e)))}catch(_){}} ,true);"
41
+ "window.addEventListener('unhandledrejection',function(e){try{__errs.push("
42
+ "'rejection: '+((e.reason&&e.reason.message)||e.reason))}catch(_){}});})();</script>")
43
+
44
+
45
+ def _escape_srcdoc(doc: str) -> str:
46
+ return doc.replace("&", "&amp;").replace('"', "&quot;")
47
+
48
+
49
+ def _inject_capture(app_html: str) -> str:
50
+ """Put the error collector before the app's own scripts."""
51
+ m = re.search(r"<head[^>]*>", app_html, re.I)
52
+ if m:
53
+ return app_html[:m.end()] + _CAPTURE + app_html[m.end():]
54
+ m = re.search(r"<html[^>]*>", app_html, re.I)
55
+ if m:
56
+ return app_html[:m.end()] + _CAPTURE + app_html[m.end():]
57
+ return _CAPTURE + app_html
58
+
59
+
60
+ def _emit(obj: dict) -> None:
61
+ sys.stdout.write(json.dumps(obj) + "\n")
62
+
63
+
64
+ def main(path: str) -> int:
65
+ try:
66
+ from selenium import webdriver
67
+ from selenium.webdriver.firefox.options import Options
68
+ from selenium.webdriver.firefox.service import Service
69
+ from selenium.webdriver.common.by import By
70
+ except Exception as e:
71
+ _emit({"ok": None, "infra": f"selenium import failed: {e}"})
72
+ return 3
73
+
74
+ if not (os.path.exists(_FIREFOX_BIN) and os.path.exists(_GECKODRIVER)):
75
+ _emit({"ok": None, "infra": "firefox/geckodriver not installed"})
76
+ return 3
77
+
78
+ with open(path, encoding="utf-8") as f:
79
+ app_html = f.read()
80
+
81
+ host = ('<!doctype html><meta charset="utf-8"><body style="margin:0">'
82
+ f'<iframe id="app" style="width:100%;height:600px;border:0" '
83
+ f'sandbox="{PREVIEW_SANDBOX}" '
84
+ f'srcdoc="{_escape_srcdoc(_inject_capture(app_html))}"></iframe>')
85
+ host_path = os.path.join(tempfile.mkdtemp(prefix="brhost-"), "host.html")
86
+ with open(host_path, "w", encoding="utf-8") as f:
87
+ f.write(host)
88
+
89
+ opts = Options()
90
+ opts.add_argument("-headless")
91
+ opts.binary_location = _FIREFOX_BIN
92
+ opts.set_preference("security.sandbox.content.level", 0) # no userns in container
93
+ svc = Service(executable_path=_GECKODRIVER, log_output=os.path.join(tempfile.gettempdir(), "gecko.log"))
94
+
95
+ try:
96
+ driver = webdriver.Firefox(options=opts, service=svc)
97
+ except Exception as e:
98
+ _emit({"ok": None, "infra": f"firefox launch failed: {str(e)[:200]}"})
99
+ return 3
100
+
101
+ errors: list[str] = []
102
+ buttons = clicked = 0
103
+ try:
104
+ driver.set_page_load_timeout(20)
105
+ driver.get("file://" + host_path)
106
+ driver.switch_to.frame(driver.find_element(By.ID, "app"))
107
+ import time
108
+ time.sleep(0.3) # let scripts settle
109
+ els = driver.find_elements(
110
+ By.CSS_SELECTOR, "button, [onclick], input[type=button], input[type=submit]")
111
+ buttons = len(els)
112
+ for el in els[:25]:
113
+ try:
114
+ driver.execute_script("arguments[0].disabled=false;", el)
115
+ el.click()
116
+ clicked += 1
117
+ except Exception:
118
+ pass # handler errors show up in __errs
119
+ # Exercise keyboard handlers (canvas games etc.).
120
+ try:
121
+ driver.execute_script(
122
+ "['ArrowUp','ArrowDown','ArrowLeft','ArrowRight',' '].forEach(function(k){"
123
+ "var c={key:k,keyCode:k===' '?32:({ArrowUp:38,ArrowDown:40,ArrowLeft:37,ArrowRight:39}[k]),bubbles:true};"
124
+ "document.dispatchEvent(new KeyboardEvent('keydown',c));"
125
+ "window.dispatchEvent(new KeyboardEvent('keydown',c));});")
126
+ except Exception:
127
+ pass
128
+ time.sleep(0.3) # surface late/timer errors
129
+ try:
130
+ errors = driver.execute_script("return window.__errs || [];") or []
131
+ except Exception:
132
+ errors = []
133
+ finally:
134
+ try:
135
+ driver.quit()
136
+ except Exception:
137
+ pass
138
+
139
+ errors = [str(e)[:400] for e in errors][:20]
140
+ _emit({"ok": len(errors) == 0, "errors": errors, "buttons": buttons, "clicked": clicked})
141
+ return 0
142
+
143
+
144
+ if __name__ == "__main__":
145
+ sys.exit(main(sys.argv[1]))
engine/browsercheck.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Real-browser verification of model-built web apps, with a jsdom fallback.
2
+
3
+ The web equivalent of `run_python`, but faithful: it drives a REAL headless
4
+ browser (Firefox via Selenium, in engine/browser_runner.py as a subprocess) and
5
+ loads the app in the exact `srcdoc`/`sandbox` wrapper the live preview uses — so
6
+ the agent's verdict matches what the user actually sees. jsdom
7
+ (engine/webcheck.py) can't: it has a working localStorage and never applies the
8
+ sandbox, so it falsely passes apps that break in a browser (e.g. a notepad on a
9
+ `data:` opaque origin).
10
+
11
+ Same contract as webcheck.check_html — (True, []) / (False, [...]) / (None, [...]).
12
+ Fallback chain: real browser -> jsdom -> unverifiable. A browser that's missing,
13
+ slow, or crashes returns None internally and falls back rather than failing the
14
+ build (a flaky checker must never cause spurious model escalation).
15
+
16
+ The browser must be installed wherever this runs (rootless conda-forge Firefox —
17
+ see DEVELOPING.md); on a minimal image (e.g. the HF Space) it isn't, and we use
18
+ jsdom.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import functools
23
+ import json
24
+ import os
25
+ import subprocess
26
+ import sys
27
+ import tempfile
28
+ from pathlib import Path
29
+
30
+ from . import webcheck
31
+
32
+ # Real-browser runners, tried in order. Playwright/Chromium first (the reachable
33
+ # rootless browser in this devcontainer), then conda-forge Firefox/Selenium.
34
+ # Whichever launches first is cached for the life of the process. Both speak the
35
+ # same JSON contract, so the choice is invisible to callers.
36
+ _RUNNERS = [
37
+ Path(__file__).with_name("playwright_runner.py"),
38
+ Path(__file__).with_name("browser_runner.py"),
39
+ ]
40
+ _BROWSER_PREFIX = Path(os.environ.get(
41
+ "SMOLBUILDER_BROWSER_PREFIX",
42
+ str(Path(__file__).resolve().parent.parent / ".browser")))
43
+
44
+
45
+ def _child_env() -> dict:
46
+ """Env for the runner subprocess: Firefox's conda libs on LD_LIBRARY_PATH."""
47
+ env = dict(os.environ)
48
+ libdir = str(_BROWSER_PREFIX / "lib")
49
+ prev = env.get("LD_LIBRARY_PATH", "")
50
+ env["LD_LIBRARY_PATH"] = f"{libdir}:{prev}" if prev else libdir
51
+ env["SMOLBUILDER_BROWSER_PREFIX"] = str(_BROWSER_PREFIX)
52
+ return env
53
+
54
+
55
+ @functools.lru_cache(maxsize=1)
56
+ def _active_runner() -> Path | None:
57
+ """First runner whose browser actually launches (probed once; cached, since a
58
+ launch is slow and availability is fixed for the life of the process)."""
59
+ probe = "<!doctype html><html><body><button>probe</button></body></html>"
60
+ for runner in _RUNNERS:
61
+ if not runner.exists():
62
+ continue
63
+ ok, _ = _invoke(probe, 45, runner)
64
+ if ok is not None:
65
+ return runner
66
+ return None
67
+
68
+
69
+ def available() -> bool:
70
+ """True if any real-browser check actually runs."""
71
+ return _active_runner() is not None
72
+
73
+
74
+ def check_html(html: str, timeout: int = 35) -> tuple[bool | None, list[str]]:
75
+ """Real-browser check with graceful fallback to jsdom, then unverifiable."""
76
+ runner = _active_runner()
77
+ if runner is not None:
78
+ ok, errors = _invoke(html, timeout, runner)
79
+ if ok is not None:
80
+ return ok, errors
81
+ if webcheck.available():
82
+ return webcheck.check_html(html, timeout=min(timeout, 20))
83
+ return None, ["no runtime checker available (browser + jsdom both missing)"]
84
+
85
+
86
+ def _invoke(html: str, timeout: int, runner: Path) -> tuple[bool | None, list[str]]:
87
+ """Run a browser runner once. Returns (ok|None, errors); None = couldn't run."""
88
+ with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
89
+ f.write(html)
90
+ path = f.name
91
+ try:
92
+ proc = subprocess.run(
93
+ [sys.executable, str(runner), path],
94
+ capture_output=True, text=True, timeout=timeout, env=_child_env())
95
+ except subprocess.TimeoutExpired:
96
+ return None, []
97
+ finally:
98
+ Path(path).unlink(missing_ok=True)
99
+
100
+ if proc.returncode == 3:
101
+ return None, []
102
+ lines = (proc.stdout or "").strip().splitlines()
103
+ if not lines:
104
+ return None, []
105
+ try:
106
+ data = json.loads(lines[-1])
107
+ except json.JSONDecodeError:
108
+ return None, []
109
+ if data.get("ok") is None:
110
+ return None, []
111
+ return bool(data.get("ok")), list(data.get("errors", []))
engine/builder.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """smolbuilder — a Lovable/Replit-style web-app builder on a tiny local model.
2
+
3
+ Where `Router` (engine/router.py) answers one coding *task* per call with a
4
+ fresh workspace, `WebBuilder` is a **stateful session**: you describe a web app,
5
+ the agent builds a self-contained `index.html`, and then you keep talking to it
6
+ ("make it dark mode", "add a reset button") and it edits the *same* workspace.
7
+
8
+ First build uses the router's escalation idea — start small, and if the tiny
9
+ model can't produce a usable app, retry on the next-bigger model — but once a
10
+ tier succeeds we **lock onto that agent and its workspace** so every later turn
11
+ is a cheap incremental edit rather than a from-scratch rebuild.
12
+
13
+ The build is verified by rendering: did the agent leave a non-trivial HTML
14
+ entrypoint behind? Static apps have no `run_python` signal, so "it produced an
15
+ app you can preview" is the success criterion the UI also relies on.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ from collections.abc import AsyncIterator
20
+ from dataclasses import dataclass, field
21
+
22
+ from .agent import SmallCodeAgent, Step
23
+ from .config import Preset, Tier, load_preset
24
+ from .live_run import LiveFrame
25
+ from .preview import find_entry, inline_app, preview_iframe
26
+ from .router import classify_tier
27
+ from .sandbox import Workspace
28
+ from .tools import build_web_registry
29
+ from .trace_collector import TraceEvent
30
+ from .ui_trace import merge_step_metadata
31
+ from . import browsercheck
32
+
33
+ BUILD_SYSTEM_PROMPT = """You are smolbuilder, a web app builder running on a small local model.
34
+
35
+ You build small, self-contained web apps that run directly in a browser — like a tiny Lovable or Replit.
36
+
37
+ Your workspace tools:
38
+ - write_file(path, content): create or overwrite a file.
39
+ - read_file(path): read a file back.
40
+ - list_files(): see what already exists.
41
+ - check_app(): run the current app in a headless browser — load index.html, execute its JavaScript, click every button — and report any errors.
42
+
43
+ Hard rules:
44
+ 1. The app's entrypoint is ALWAYS a single file named index.html, and it must start with <!doctype html><html> and include <head> and <body>.
45
+ 2. Put the CSS in a <style> tag and the JavaScript in a <script> tag INSIDE index.html. Prefer one self-contained file — it must run with no build step and no server.
46
+ 3. Put the <script> tag at the very END of <body>, AFTER the elements it uses (or wrap your code in window.addEventListener('DOMContentLoaded', ...)). If a script runs before its elements exist, document.getElementById returns null and every button silently breaks.
47
+ 4. Every button or interactive control must have a working handler that you actually wire up. Define functions before they are referenced.
48
+ 5. Vanilla HTML/CSS/JS only. Do not require a framework, npm, or a backend. You may load a library from a CDN with a full https:// URL only if it is truly needed.
49
+ 6. Make it look good by default: sensible layout, spacing, a coherent color palette, readable type. Mobile-friendly.
50
+
51
+ Method — follow it every time:
52
+ 1. Write a complete index.html in one write_file call.
53
+ 2. Call check_app() to test it.
54
+ 3. If check_app reports errors, read them, fix index.html (write the FULL file again), and call check_app again. Repeat until it reports ok.
55
+ 4. To CHANGE an existing app, write the FULL updated index.html (never a partial file — keep everything that already worked), then check_app again.
56
+
57
+ Only finish once check_app reports the app works. Then reply with one short sentence describing what the app does. Do not paste the code in your reply.
58
+ """
59
+
60
+ # Minimum entrypoint size (chars) to count as "a real app" and not a stub.
61
+ _MIN_APP_CHARS = 60
62
+
63
+
64
+ @dataclass
65
+ class BuildResult:
66
+ final: str
67
+ steps: list[Step]
68
+ files: dict[str, str]
69
+ preview_html: str
70
+ entry: str | None
71
+ tier_name: str
72
+ tier_model: str
73
+ start_tier: str
74
+ escalations: int
75
+ verified: bool
76
+ turn: int = 0
77
+ trace_events: list[TraceEvent] = field(default_factory=list)
78
+ agent: SmallCodeAgent | None = None
79
+
80
+ @property
81
+ def app_html(self) -> str:
82
+ """The self-contained document — for the 'download app' button."""
83
+ return inline_app(self.files)
84
+
85
+
86
+ def _evaluate(agent: SmallCodeAgent) -> tuple[bool, str | None, dict[str, str]]:
87
+ """Did the agent leave a *working* app behind? Drives the verified badge and
88
+ escalation. Structural first (is there a real HTML entrypoint), then a
89
+ runtime check — a broken app (JS errors) counts as a failure so the router
90
+ escalates to a bigger model. An unverifiable check (no Node) doesn't fail.
91
+ """
92
+ files = agent.files()
93
+ entry = find_entry(files)
94
+ if entry is None or len(files[entry].strip()) < _MIN_APP_CHARS:
95
+ return False, entry, files
96
+ if entry.lower().endswith((".html", ".htm")):
97
+ ok, _errors = browsercheck.check_html(inline_app(files))
98
+ if ok is False:
99
+ return False, entry, files
100
+ return True, entry, files
101
+
102
+
103
+ class WebBuilder:
104
+ """A persistent build session. One instance per browser session (gr.State)."""
105
+
106
+ def __init__(self, preset: Preset | None = None, max_steps: int = 16,
107
+ preview_height: int = 540) -> None:
108
+ self.preset = preset or load_preset()
109
+ self.tiers: list[Tier] = self.preset.tiers
110
+ self.max_steps = max_steps
111
+ self.preview_height = preview_height
112
+ # The workspace (the built app on disk) persists across turns; the tier
113
+ # that built it is remembered so edits stay on the same model. A spent
114
+ # LiteForge agent can't be re-run, so each turn gets a fresh agent over
115
+ # this same workspace.
116
+ self.workspace: Workspace | None = None
117
+ self.tier_idx = 0
118
+ self.turn = 0
119
+ self.think = "off"
120
+ self.yolo = False
121
+
122
+ @property
123
+ def has_app(self) -> bool:
124
+ """True once a first build has produced a workspace to iterate on."""
125
+ return self.workspace is not None
126
+
127
+ # --- public API ------------------------------------------------------
128
+ async def send(self, message: str) -> BuildResult:
129
+ """Build (first turn) or edit (later turns) and return a BuildResult."""
130
+ result: BuildResult | None = None
131
+ async for frame in self.send_live(message):
132
+ if frame.done and isinstance(frame.result, BuildResult):
133
+ result = frame.result
134
+ assert result is not None
135
+ return result
136
+
137
+ async def send_live(self, message: str) -> AsyncIterator[LiveFrame]:
138
+ """Yield live frames while building or editing."""
139
+ self.turn += 1
140
+ if self.workspace is None:
141
+ async for frame in self._first_build_live(message):
142
+ yield frame
143
+ else:
144
+ async for frame in self._iterate_live(message):
145
+ yield frame
146
+
147
+ def reset(self) -> None:
148
+ """Drop the current app and start a fresh session."""
149
+ self.cleanup()
150
+ self.workspace = None
151
+ self.tier_idx = 0
152
+ self.turn = 0
153
+
154
+ def cleanup(self) -> None:
155
+ if self.workspace is not None:
156
+ self.workspace.cleanup()
157
+
158
+ def empty_preview(self) -> str:
159
+ return preview_iframe({}, height=self.preview_height)
160
+
161
+ # --- internals -------------------------------------------------------
162
+ def _new_agent(self, tier: Tier, workspace: Workspace | None = None) -> SmallCodeAgent:
163
+ return SmallCodeAgent(
164
+ preset=self.preset, model=tier.model, max_steps=self.max_steps,
165
+ system_prompt=BUILD_SYSTEM_PROMPT, registry_builder=build_web_registry,
166
+ workspace=workspace, name="smolbuilder",
167
+ agent="build", profile="web",
168
+ )
169
+
170
+ async def _first_build_live(self, message: str) -> AsyncIterator[LiveFrame]:
171
+ """Escalate the model ladder until one produces a previewable app."""
172
+ start = classify_tier(message, len(self.tiers))
173
+ task = (f"Build this web app as a self-contained index.html:\n\n{message}")
174
+ escalations = 0
175
+ last: BuildResult | None = None
176
+ prev_tier_name: str | None = None
177
+
178
+ for idx in range(start, len(self.tiers)):
179
+ tier = self.tiers[idx]
180
+ if prev_tier_name is not None:
181
+ yield LiveFrame(events=[
182
+ TraceEvent(kind="tier_escalation", name=tier.name,
183
+ detail=f"escalated from {prev_tier_name}"),
184
+ ])
185
+ agent = self._new_agent(tier)
186
+ async for frame in agent.run_live_turn(
187
+ task, think=self.think, yolo=self.yolo,
188
+ ):
189
+ if not frame.done:
190
+ yield frame
191
+ continue
192
+ final, steps = frame.result
193
+ ok, entry, files = _evaluate(agent)
194
+ ok = ok and not (agent.hit_max_steps or agent.errored)
195
+ last = self._result(agent, final, steps, files, entry, tier,
196
+ self.tiers[start].name, escalations, ok)
197
+ is_last_tier = idx == len(self.tiers) - 1
198
+ if ok or is_last_tier:
199
+ self.workspace = agent.workspace
200
+ self.tier_idx = idx
201
+ yield LiveFrame(
202
+ steps=steps,
203
+ events=last.trace_events,
204
+ files=last.files,
205
+ done=True,
206
+ result=last,
207
+ )
208
+ return
209
+ if idx < len(self.tiers) - 1:
210
+ agent.trace_collector.record_escalation(tier.name, self.tiers[idx + 1].name)
211
+ agent.cleanup()
212
+ escalations += 1
213
+ prev_tier_name = tier.name
214
+
215
+ if last is not None:
216
+ yield LiveFrame(
217
+ steps=last.steps,
218
+ events=last.trace_events,
219
+ files=last.files,
220
+ done=True,
221
+ result=last,
222
+ )
223
+
224
+ async def _iterate_live(self, message: str) -> AsyncIterator[LiveFrame]:
225
+ tier = self.tiers[self.tier_idx]
226
+ agent = self._new_agent(tier, self.workspace)
227
+ cur = self.workspace.read_file("index.html")
228
+ body = cur["content"] if cur.get("ok") else ""
229
+ task = (
230
+ "You are editing an existing web app. Here is the current "
231
+ "index.html:\n\n```html\n" + body + "\n```\n\n"
232
+ "Apply the change below, then save the COMPLETE updated file with a "
233
+ "single write_file(\"index.html\", <full new contents>). Keep "
234
+ "everything that already works and output the whole file, never a "
235
+ "fragment.\n\nChange to make: " + message
236
+ )
237
+ async for frame in agent.run_live_turn(
238
+ task, think=self.think, yolo=self.yolo,
239
+ ):
240
+ if not frame.done:
241
+ yield frame
242
+ continue
243
+ final, steps = frame.result
244
+ ok, entry, files = _evaluate(agent)
245
+ ok = ok and not (agent.hit_max_steps or agent.errored)
246
+ result = self._result(agent, final, steps, files, entry, tier, tier.name, 0, ok)
247
+ yield LiveFrame(
248
+ steps=steps,
249
+ events=result.trace_events,
250
+ files=result.files,
251
+ done=True,
252
+ result=result,
253
+ )
254
+
255
+ def _result(self, agent: SmallCodeAgent, final, steps, files, entry, tier, start_name,
256
+ escalations, verified) -> BuildResult:
257
+ # Small models sometimes write the file but return an empty answer; give
258
+ # the chat something sensible rather than a blank bubble.
259
+ if not (final or "").strip():
260
+ final = "✅ Done: check the live preview." if verified else \
261
+ "I made an attempt; have a look and tell me what to fix."
262
+ events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
263
+ return BuildResult(
264
+ final=final, steps=steps, files=files,
265
+ preview_html=preview_iframe(files, height=self.preview_height),
266
+ entry=entry, tier_name=tier.name, tier_model=tier.model,
267
+ start_tier=start_name, escalations=escalations,
268
+ verified=bool(verified), turn=self.turn,
269
+ trace_events=events, agent=agent,
270
+ )
engine/config.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Backend presets for smolcode.
2
+
3
+ smolcode always talks to ONE OpenAI-compatible endpoint. A "preset" just
4
+ selects the base_url and the model *tiers* the router may escalate through.
5
+ Everything is overridable by environment variables so the same code runs on a
6
+ laptop, inside an HF Space, or against the hal-9000 "home supercomputer".
7
+
8
+ Env overrides (highest priority):
9
+ SMALLCODE_PRESET space | laptop | hal | hal-smol (default: hal)
10
+ SMALLCODE_BASE_URL OpenAI-compatible /v1 URL
11
+ SMALLCODE_API_KEY bearer token (most local servers ignore it)
12
+ SMALLCODE_MODEL force a single model (disables tiering)
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ import re
18
+ from dataclasses import dataclass, field
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class Tier:
23
+ """One rung of the model ladder. `name` is what the router shows in the UI."""
24
+ name: str
25
+ model: str
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class Preset:
30
+ key: str
31
+ base_url: str
32
+ api_key: str
33
+ # Ordered cheap -> expensive. The router starts at tiers[0] and escalates.
34
+ tiers: list[Tier] = field(default_factory=list)
35
+
36
+ @property
37
+ def default_model(self) -> str:
38
+ return self.tiers[0].model
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class SpecialistLadder:
43
+ """One specialist family's size ladder (cheap -> expensive), reusing Tier."""
44
+ specialty: str
45
+ tiers: list[Tier] = field(default_factory=list)
46
+
47
+
48
+ @dataclass(frozen=True)
49
+ class SpecialistPreset(Preset):
50
+ """A Preset whose escalation space is 2D: specialty -> size ladder.
51
+
52
+ Subclasses Preset so every existing reader of .base_url/.api_key/.tiers/
53
+ .default_model (bench, builder, agent) keeps working: the inherited `tiers` is
54
+ the GENERIC fallback ladder, and `ladders` holds the per-specialty rungs.
55
+ """
56
+ ladders: dict[str, SpecialistLadder] = field(default_factory=dict)
57
+
58
+ def ladder_for(self, specialty: str) -> SpecialistLadder:
59
+ """The specialist ladder for a key, or the generic ladder as a fallback."""
60
+ lad = self.ladders.get(specialty)
61
+ if lad and lad.tiers:
62
+ return lad
63
+ return SpecialistLadder(specialty="general", tiers=self.tiers)
64
+
65
+
66
+ # Local Ollama on the workstation exposes an OpenAI-compatible API at :11435/v1.
67
+ # NOTE: the default model is a tool-TUNED 3B (granite4.1:3b), not a coder model.
68
+ # Tiny coder models (qwen2.5-coder:3b) text-emit ```json instead of native
69
+ # `tool_calls`, which LiteForge's agent loop can't execute. Granite-3B (also
70
+ # <=4B, Tiny-Titan-eligible) emits native tool_calls. The dual-mode parser
71
+ # (P1) will let qwen-coder back in for code quality.
72
+ _LAPTOP = Preset(
73
+ key="laptop",
74
+ base_url="http://localhost:11435/v1",
75
+ api_key="ollama",
76
+ tiers=[Tier("3B", "granite4.1:3b")],
77
+ )
78
+
79
+ # The submission Space: a single tiny model served by llama.cpp's llama-server.
80
+ # Kept to one <=4B model so the Tiny Titan claim is unambiguous.
81
+ # Port is configurable: 8080 inside the Space, but on the workstation 8080 is
82
+ # taken by Guacamole/Tomcat so local dev uses SMALLCODE_LLAMA_PORT=8088.
83
+ # llama-server ignores the model name and serves whatever GGUF was loaded.
84
+ _LLAMA_PORT = os.environ.get("SMALLCODE_LLAMA_PORT", "8080")
85
+ _SPACE = Preset(
86
+ key="space",
87
+ base_url=f"http://127.0.0.1:{_LLAMA_PORT}/v1",
88
+ api_key="local",
89
+ tiers=[Tier("3B", "qwen2.5-coder-3b-instruct-q4_k_m.gguf")],
90
+ )
91
+
92
+ # hal-9000 (DGX Spark): full tiered router. Points straight at hal's Ollama
93
+ # (:11434/v1), which serves every pulled model over one OpenAI-compatible
94
+ # endpoint with native tool_calls — simpler than LiteLLM (whose :4000 exposed no
95
+ # models). Tiny tier is a TOOL-TUNED model (granite4.1:3b) that reliably drives
96
+ # the loop; escalate to bigger Qwen *coder* models for hard codegen. (Tiny coder
97
+ # models can't native-tool-call — see engine/config laptop note.)
98
+ _HAL = Preset(
99
+ key="hal",
100
+ base_url="http://10.8.0.6:11434/v1",
101
+ api_key=os.environ.get("SMALLCODE_API_KEY", "ollama"),
102
+ # All-Granite ladder: every tier emits native tool_calls on Ollama (verified
103
+ # on hal), all <=32B. NOTE: qwen2.5-coder does NOT native-tool-call on Ollama
104
+ # at ANY size (3b/14b text-emit the call) — bringing the Qwen *coder* models
105
+ # in (for the benchmark story) requires the dual-mode parser (see task 6).
106
+ tiers=[
107
+ Tier("3B", "granite4.1:3b"),
108
+ Tier("8B", "granite4.1:8b"),
109
+ Tier("30B", "granite4.1:30b"),
110
+ ],
111
+ )
112
+
113
+ # hal-9000 with the fine-tuned coder as the entry tier. The finetune/ pipeline
114
+ # trains Qwen2.5-Coder-1.5B to emit native <tool_call> (see finetune/README.md),
115
+ # so once it's served on hal's Ollama it can be the cheap first rung and we only
116
+ # escalate to Granite on verification failure. The served tag is configurable via
117
+ # SMALLCODE_SMOL_MODEL (default matches the published model name); import the GGUF
118
+ # into Ollama under that tag, or point SMALLCODE_BASE_URL at a llama-server.
119
+ _SMOL_MODEL = os.environ.get("SMALLCODE_SMOL_MODEL", "smolcode-coder-1.5b:tools")
120
+ _HAL_SMOL = Preset(
121
+ key="hal-smol",
122
+ base_url="http://10.8.0.6:11434/v1",
123
+ api_key=os.environ.get("SMALLCODE_API_KEY", "ollama"),
124
+ tiers=[
125
+ Tier("1.5B-tuned", _SMOL_MODEL),
126
+ Tier("8B", "granite4.1:8b"),
127
+ Tier("30B", "granite4.1:30b"),
128
+ ],
129
+ )
130
+
131
+ # --- the 2D specialist matrix (hal-matrix preset) ----------------------------
132
+ # A model per language/function (smolcode-coder-{specialty}-{size}:tools), served
133
+ # on hal's Ollama. The router classifies the task's specialty, picks that family's
134
+ # size ladder, and escalates within it — then into the generic Granite ladder at
135
+ # the top. Tags are derived by CONVENTION + served-tag discovery, so adding a
136
+ # specialist is a serving action, not a code edit.
137
+
138
+ _SPECIALIST_SIZES = ("1.5b", "3b", "7b") # 7b deferred but recognized if served.
139
+ _SPECIALTIES = ("py", "js", "bash", "git", "dotnet", "csharp", "java",
140
+ "powershell", "rust", "docker", "bsd", "go", "sql", "cpp", "terraform",
141
+ "orchestrate") # task_batch / parallel fan-out specialist
142
+
143
+ # Pattern is overridable so one env var can repoint the whole matrix. Back-compat:
144
+ # a value WITHOUT a "{specialty}" placeholder is treated as a legacy single tag.
145
+ _SMOL_PATTERN = os.environ.get("SMALLCODE_SMOL_MODEL",
146
+ "smolcode-coder-{specialty}-{size}:tools")
147
+
148
+ # Size parsing + specialty detection — shared by the model picker (Tiny-Titan <=32B
149
+ # display filter, collapsing the 16-per-size specialty fine-tunes to one "Auto" entry
150
+ # per size). Mirrors smolcode-cli/src/router.rs parse_size_b and the size_b() regex in
151
+ # tests/test_matrix_routing.py.
152
+ _SIZE_RE = re.compile(r"(\d+(?:\.\d+)?)b\b", re.I)
153
+
154
+
155
+ def parse_size_b(model: str) -> float:
156
+ """Parameter count in billions from a model tag (last '<n>b' group), else 0.0.
157
+
158
+ 'granite4.1:30b' -> 30.0, 'smolcode-coder-py-1.5b:tools' -> 1.5. Unknown -> 0.0
159
+ (so size-unknown models pass a '<=32B' filter rather than being hidden)."""
160
+ found = _SIZE_RE.findall(model or "")
161
+ return float(found[-1]) if found else 0.0
162
+
163
+
164
+ def is_specialty_model(model: str) -> bool:
165
+ """True if the tag is a per-specialty fine-tune (smolcode-coder-<specialty>-...)."""
166
+ m = (model or "").lower()
167
+ return any(m.startswith(f"smolcode-coder-{s}-") for s in _SPECIALTIES)
168
+
169
+
170
+ def specialist_sizes(preset: "Preset") -> list[str]:
171
+ """Distinct specialist sizes (<=32B) present in a matrix preset's ladders,
172
+ smallest first (e.g. ['1.5b', '3b']). Empty for non-matrix presets."""
173
+ sizes: dict[float, str] = {}
174
+ for lad in (getattr(preset, "ladders", {}) or {}).values():
175
+ for t in lad.tiers:
176
+ if is_specialty_model(t.model):
177
+ sb = parse_size_b(t.model)
178
+ if 0 < sb <= 32:
179
+ sizes.setdefault(sb, f"{_SIZE_RE.findall(t.model)[-1]}b")
180
+ return [sizes[k] for k in sorted(sizes)]
181
+
182
+ # Generic Granite ladder every specialist escalates INTO at its top rung (all <=32B).
183
+ _GENERIC_TIERS = [Tier("8B", "granite4.1:8b"), Tier("30B", "granite4.1:30b")]
184
+
185
+ # Static fallback set of served tags when /v1/models discovery is unavailable.
186
+ # Keep in sync with what's pulled on hal; discovery (below) supersedes it.
187
+ _HAL_SERVED: set[str] = {f"smolcode-coder-{s}-1.5b:tools" for s in _SPECIALTIES} | \
188
+ {f"smolcode-coder-{s}-3b:tools" for s in _SPECIALTIES}
189
+
190
+ _DISCOVERY_CACHE: dict[str, set[str]] = {}
191
+
192
+
193
+ def _discover_served(base_url: str, api_key: str) -> set[str]:
194
+ """GET the OpenAI-compatible /v1/models once (cached per base_url); the set of
195
+ served model tags. Any failure -> empty set (caller falls back to _HAL_SERVED)."""
196
+ if base_url in _DISCOVERY_CACHE:
197
+ return _DISCOVERY_CACHE[base_url]
198
+ served: set[str] = set()
199
+ try:
200
+ import json
201
+ import urllib.request
202
+ req = urllib.request.Request(base_url.rstrip("/") + "/models",
203
+ headers={"Authorization": f"Bearer {api_key}"})
204
+ with urllib.request.urlopen(req, timeout=2) as r:
205
+ data = json.loads(r.read())
206
+ served = {m["id"] for m in data.get("data", []) if "id" in m}
207
+ except Exception:
208
+ served = set()
209
+ _DISCOVERY_CACHE[base_url] = served
210
+ return served
211
+
212
+
213
+ def _build_ladder(specialty: str, served: set[str]) -> SpecialistLadder:
214
+ """One specialist ladder: served specialist sizes (smallest first), then the
215
+ generic Granite tiers. Missing sizes are skipped; a wholly-missing specialist
216
+ yields just the generic tiers (ladder_for also guards this)."""
217
+ tiers: list[Tier] = []
218
+ if "{specialty}" in _SMOL_PATTERN:
219
+ for size in _SPECIALIST_SIZES:
220
+ tag = _SMOL_PATTERN.format(specialty=specialty, size=size)
221
+ if tag in served:
222
+ tiers.append(Tier(f"{size}-{specialty}", tag))
223
+ tiers.extend(_GENERIC_TIERS)
224
+ return SpecialistLadder(specialty=specialty, tiers=tiers)
225
+
226
+
227
+ _HAL_MATRIX = SpecialistPreset(
228
+ key="hal-matrix",
229
+ base_url="http://10.8.0.6:11434/v1",
230
+ api_key=os.environ.get("SMALLCODE_API_KEY", "ollama"),
231
+ tiers=_GENERIC_TIERS, # generic fallback ladder (inherited Preset.tiers)
232
+ ladders={}, # built lazily in load_preset (needs the resolved base_url)
233
+ )
234
+
235
+ _PRESETS = {p.key: p for p in (_LAPTOP, _SPACE, _HAL, _HAL_SMOL, _HAL_MATRIX)}
236
+
237
+
238
+ def default_ui_model(preset: Preset, cfg: dict) -> str:
239
+ """Resolve the default model for the web UI from config and preset tiers."""
240
+ if cfg.get("model"):
241
+ return str(cfg["model"])
242
+ if preset.tiers:
243
+ return preset.default_model
244
+ return ""
245
+
246
+
247
+ def load_preset() -> Preset:
248
+ """Resolve the active preset, applying env overrides and Rust config.toml."""
249
+ # Default to the 2D specialist matrix so "Auto" routes by specialty out of the box;
250
+ # it auto-detects served specialists and falls back to the generic Granite ladder
251
+ # (per-specialty: ladder_for(); whole matrix: _discover_served -> _HAL_SERVED).
252
+ key = os.environ.get("SMALLCODE_PRESET", "hal-matrix").lower()
253
+ base = _PRESETS.get(key, _LAPTOP)
254
+
255
+ rust_cfg: dict = {}
256
+ try:
257
+ from .rust_session import load_rust_config
258
+ rust_cfg = load_rust_config()
259
+ except Exception:
260
+ pass
261
+
262
+ base_url = os.environ.get("SMALLCODE_BASE_URL", rust_cfg.get("base_url", base.base_url))
263
+ api_key = os.environ.get("SMALLCODE_API_KEY", base.api_key)
264
+
265
+ # An explicit env SMALLCODE_MODEL is a hard single-model override and wins over
266
+ # everything (including the matrix). A `model` in config.toml is only a *default*
267
+ # — it must NOT silently disable the matrix when the user explicitly asked for it
268
+ # via SMALLCODE_PRESET=hal-matrix.
269
+ env_model = os.environ.get("SMALLCODE_MODEL")
270
+ if env_model:
271
+ return Preset(key=base.key, base_url=base_url, api_key=api_key,
272
+ tiers=[Tier("custom", env_model)])
273
+
274
+ if isinstance(base, SpecialistPreset):
275
+ served = _discover_served(base_url, api_key) or _HAL_SERVED
276
+ ladders = {s: _build_ladder(s, served) for s in _SPECIALTIES}
277
+ return SpecialistPreset(key=base.key, base_url=base_url, api_key=api_key,
278
+ tiers=_GENERIC_TIERS, ladders=ladders)
279
+
280
+ # A config.toml `model` is a DEFAULT, not a hard override (that's SMALLCODE_MODEL,
281
+ # handled above). If it just names this preset's entry tier — the common case, e.g.
282
+ # the CLI default == hal-smol's 1.5B entry — keep the full escalation LADDER (so the
283
+ # router + judge still work). Only a model that ISN'T the preset entry is treated as
284
+ # a deliberate single-model choice.
285
+ forced = rust_cfg.get("model")
286
+ if forced and base.tiers and forced != base.default_model:
287
+ return Preset(key=base.key, base_url=base_url, api_key=api_key,
288
+ tiers=[Tier("custom", forced)])
289
+
290
+ return Preset(key=base.key, base_url=base_url, api_key=api_key, tiers=base.tiers)
engine/fanout.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Parallel sub-agent fan-out for the Python engine (mirror of the Rust CLI's
2
+ `task_batch`).
3
+
4
+ Where the Router runs ONE task through a tier ladder, fan-out runs MANY independent
5
+ tasks at once: each gets its own SmallCodeAgent + fresh Workspace and they run
6
+ concurrently via asyncio.gather, bounded so local inference isn't oversubscribed.
7
+ Use it for independent work — exploring/solving several things in parallel — when
8
+ each subtask doesn't depend on the others' output.
9
+
10
+ Cheap when each agent is a small local (e.g. the fine-tuned 1.5B) model: wall-clock
11
+ is ~the slowest job, not the sum.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+ from collections.abc import AsyncIterator
17
+ from dataclasses import dataclass, field
18
+
19
+ from .agent import SmallCodeAgent, Step
20
+ from .config import Preset, load_preset
21
+ from .live_run import LiveFrame
22
+ from .router import _verify
23
+ from .trace_collector import TraceEvent
24
+ from .ui_trace import merge_step_metadata
25
+
26
+ MAX_CONCURRENCY = 4
27
+
28
+
29
+ @dataclass
30
+ class FanoutResult:
31
+ index: int
32
+ task: str
33
+ final: str
34
+ steps: list[Step]
35
+ model: str
36
+ verified: bool
37
+ files: dict[str, str] = field(default_factory=dict)
38
+ error: str | None = None
39
+ trace_events: list[TraceEvent] = field(default_factory=list)
40
+ agent: SmallCodeAgent | None = None
41
+
42
+
43
+ async def fan_out(tasks: list[str], preset: Preset | None = None,
44
+ model: str | None = None, max_steps: int = 12,
45
+ concurrency: int = MAX_CONCURRENCY) -> list[FanoutResult]:
46
+ """Run `tasks` concurrently, each in its own agent/workspace.
47
+
48
+ `model` defaults to the preset's entry tier (the cheap small model — the
49
+ natural choice for fanning out). Results are returned in input order.
50
+ """
51
+ results: list[FanoutResult] = []
52
+ async for frame in fan_out_live(tasks, preset=preset, model=model,
53
+ max_steps=max_steps, concurrency=concurrency):
54
+ if frame.done and isinstance(frame.result, list):
55
+ results = frame.result
56
+ return results
57
+
58
+
59
+ async def fan_out_live(
60
+ tasks: list[str],
61
+ preset: Preset | None = None,
62
+ model: str | None = None,
63
+ max_steps: int = 12,
64
+ concurrency: int = MAX_CONCURRENCY,
65
+ poll_interval: float = 0.35,
66
+ ) -> AsyncIterator[LiveFrame]:
67
+ """Yield aggregate live frames while fan-out jobs run."""
68
+ if not tasks:
69
+ yield LiveFrame(done=True, result=[])
70
+ return
71
+
72
+ preset = preset or load_preset()
73
+ model = model or preset.default_model
74
+ sem = asyncio.Semaphore(max(1, concurrency))
75
+ agents: list[SmallCodeAgent] = []
76
+ for i, t in enumerate(tasks):
77
+ agents.append(SmallCodeAgent(preset=preset, model=model, max_steps=max_steps))
78
+
79
+ async def _job(index: int, task: str, agent: SmallCodeAgent) -> FanoutResult:
80
+ async with sem:
81
+ try:
82
+ final, steps = await agent.run(task)
83
+ ok = False if (agent.hit_max_steps or agent.errored) else _verify(agent)
84
+ events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
85
+ return FanoutResult(
86
+ index=index, task=task, final=final, steps=steps, model=model,
87
+ verified=bool(ok), files=agent.files(), trace_events=events, agent=agent,
88
+ )
89
+ except Exception as e:
90
+ return FanoutResult(index=index, task=task, final="", steps=[],
91
+ model=model, verified=False, error=str(e))
92
+ finally:
93
+ agent.cleanup()
94
+
95
+ job_tasks = [
96
+ asyncio.create_task(_job(i, t, agents[i]))
97
+ for i, t in enumerate(tasks)
98
+ ]
99
+ try:
100
+ while not all(j.done() for j in job_tasks):
101
+ # Mid-run we must NOT call current_steps()/history() on a live agent
102
+ # (the Rust agent isn't reentrant and would deadlock). Read only the
103
+ # trace collectors (plain lists) and workspace files (disk).
104
+ events: list[TraceEvent] = []
105
+ all_files: dict[str, str] = {}
106
+ for i, agent in enumerate(agents):
107
+ events.extend(agent.trace_collector.snapshot())
108
+ for path, content in agent.files().items():
109
+ all_files[f"[{i + 1}] {path}"] = content
110
+ yield LiveFrame(steps=[], events=events, files=all_files)
111
+ await asyncio.sleep(poll_interval)
112
+ results = [await j for j in job_tasks]
113
+ results.sort(key=lambda r: r.index)
114
+ yield LiveFrame(done=True, result=results)
115
+ finally:
116
+ for j in job_tasks:
117
+ if not j.done():
118
+ j.cancel()
119
+
120
+
121
+ def summarize(results: list[FanoutResult]) -> str:
122
+ """Aggregate fan-out results into one labeled summary (mirrors the Rust output)."""
123
+ out = [f"Ran {len(results)} subagents in parallel. Results:\n"]
124
+ for r in results:
125
+ head = f"=== [{r.index + 1}] {r.model} {'OK' if r.verified else 'unverified'} ==="
126
+ body = r.error and f"error: {r.error}" or r.final.strip()
127
+ out.append(f"{head}\n{body}\n")
128
+ return "\n".join(out).rstrip()
engine/file_tree.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Workspace file tree with git status (Rust-backed)."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+ from dataclasses import dataclass
6
+
7
+ from .rust_session import (
8
+ git_status,
9
+ rust_available,
10
+ workspace_files,
11
+ workspace_tree,
12
+ )
13
+
14
+ _GIT_LINE = re.compile(r"^([ MADRCU?!]{1,2})\s+(.+)$")
15
+
16
+
17
+ @dataclass
18
+ class WorkspacePanel:
19
+ tree_md: str
20
+ git_md: str
21
+ file_choices: list[str]
22
+ preview_md: str
23
+
24
+
25
+ def parse_git_dirty(git_status_text: str) -> dict[str, str]:
26
+ """Map repo-relative path to a one-character git status marker."""
27
+ markers: dict[str, str] = {}
28
+ for line in git_status_text.splitlines():
29
+ m = _GIT_LINE.match(line.strip())
30
+ if not m:
31
+ continue
32
+ status, path = m.group(1).strip(), m.group(2).strip()
33
+ if " -> " in path:
34
+ path = path.split(" -> ")[-1].strip()
35
+ mark = status.replace(" ", "")
36
+ markers[path] = mark[-1] if mark else "?"
37
+ return markers
38
+
39
+
40
+ def _preview_md(path: str, content: str) -> str:
41
+ lang = "python" if path.endswith(".py") else ""
42
+ return f"**`{path}`**\n```{lang}\n{content}\n```"
43
+
44
+
45
+ def build_workspace_panel(
46
+ workspace: str,
47
+ selected: str | None = None,
48
+ *,
49
+ depth: int = 3,
50
+ files: dict[str, str] | None = None,
51
+ ) -> WorkspacePanel:
52
+ """Build git header, ASCII tree, file picker choices, and file preview."""
53
+ if not rust_available():
54
+ return WorkspacePanel(
55
+ tree_md="_smolcode_core not installed_",
56
+ git_md="",
57
+ file_choices=[],
58
+ preview_md="",
59
+ )
60
+
61
+ git_text = git_status(workspace)
62
+ git_lines = git_text.splitlines()
63
+ git_md = "\n".join(git_lines[:6]) if git_lines else "_not a git repository_"
64
+
65
+ tree_body = workspace_tree(workspace, depth=depth)
66
+ tree_md = f"```\n{tree_body}\n```"
67
+
68
+ if files is None:
69
+ files = workspace_files(workspace)
70
+
71
+ dirty = parse_git_dirty(git_text)
72
+ file_choices: list[str] = []
73
+ for path in sorted(files):
74
+ mark = dirty.get(path, "")
75
+ label = f"{mark} {path}" if mark else path
76
+ file_choices.append(label)
77
+
78
+ preview_md = ""
79
+ if selected:
80
+ clean = selected
81
+ if len(selected) > 2 and selected[1] == " " and selected[0] in "MADRCU?!":
82
+ clean = selected[2:]
83
+ content = files.get(clean, "")
84
+ if content:
85
+ preview_md = _preview_md(clean, content)
86
+
87
+ return WorkspacePanel(
88
+ tree_md=tree_md,
89
+ git_md=git_md,
90
+ file_choices=file_choices,
91
+ preview_md=preview_md,
92
+ )
engine/gradio_shell.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared Gradio UI helpers for app.py and smolbuilder.py (CLI parity)."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import os
6
+ from dataclasses import dataclass, field
7
+
8
+ import gradio as gr
9
+
10
+ from .rust_session import (
11
+ RustSession,
12
+ expand_command,
13
+ expand_skill,
14
+ export_transcript,
15
+ list_background_jobs,
16
+ list_commands,
17
+ list_mcp,
18
+ list_rules,
19
+ list_skills,
20
+ render_config,
21
+ session_timeline,
22
+ write_agents_md,
23
+ )
24
+
25
+
26
+ @dataclass
27
+ class UiSettings:
28
+ workspace: str = "."
29
+ model: str = ""
30
+ agent: str = "build"
31
+ mode: str = "normal" # normal | auto | plan
32
+ think: str = "off"
33
+ yolo: bool = False
34
+ fan_out: bool = False
35
+
36
+
37
+ @dataclass
38
+ class ApprovalState:
39
+ pending_desc: str | None = None
40
+ result: bool | None = None
41
+
42
+ async def ask(self, desc: str) -> bool:
43
+ self.pending_desc = desc
44
+ self.result = None
45
+ while self.result is None:
46
+ await asyncio.sleep(0.15)
47
+ approved = bool(self.result)
48
+ self.pending_desc = None
49
+ self.result = None
50
+ return approved
51
+
52
+ def approve(self, yes: bool = True) -> None:
53
+ self.result = yes
54
+
55
+ def deny(self) -> None:
56
+ self.approve(False)
57
+
58
+
59
+ @dataclass
60
+ class AppSessionState:
61
+ """Gradio gr.State payload for session + settings."""
62
+ rust: RustSession | None = None
63
+ settings: UiSettings = field(default_factory=UiSettings)
64
+ approval: ApprovalState = field(default_factory=ApprovalState)
65
+ status_msg: str = ""
66
+ bg_jobs: str = ""
67
+
68
+
69
+ @dataclass
70
+ class SlashResult:
71
+ reply: str = ""
72
+ queued_task: str | None = None
73
+ clear_chat: bool = False
74
+ download_path: str | None = None
75
+ toggle_sidebar: bool = False
76
+ toggle_sidebar_view: bool = False
77
+ open_picker: str | None = None
78
+ cycle_mode: bool = False
79
+ cycle_think: bool = False
80
+ set_think: str | None = None
81
+ show_help: bool = False
82
+ show_whichkey: bool = False
83
+
84
+
85
+ _BUILTIN_SLASH = {
86
+ "/help", "/new", "/sessions", "/fork", "/rename", "/export", "/stats",
87
+ "/mcp", "/rules", "/skills", "/skill", "/commit", "/init", "/bg", "/clear",
88
+ "/delete", "/timeline", "/mode", "/think", "/config", "/search",
89
+ "/agents", "/models", "/themes", "/files", "/quit",
90
+ }
91
+
92
+
93
+ _ATTACH_MAX = 8192
94
+
95
+
96
+ def parse_input(
97
+ text: str,
98
+ *,
99
+ workspace_files: list[str] | None = None,
100
+ workspace: str | None = None,
101
+ rust: RustSession | None = None,
102
+ ) -> tuple[str, str | None, str | None]:
103
+ """Parse user input. Returns (task, slash_command_result, shell_output).
104
+
105
+ - `!cmd` runs shell directly
106
+ - `/cmd args` returns command to dispatch
107
+ - `@file` inlines file content into task
108
+ """
109
+ stripped = (text or "").strip()
110
+ if not stripped:
111
+ return "", None, None
112
+
113
+ if stripped.startswith("!"):
114
+ return "", None, stripped[1:].strip()
115
+
116
+ if stripped.startswith("/"):
117
+ return "", stripped, None
118
+
119
+ task = stripped
120
+ if "@" in task and (workspace_files or workspace):
121
+ from .rust_session import read_workspace_file
122
+
123
+ paths = list(workspace_files or [])
124
+ import re
125
+
126
+ for match in re.finditer(r"@(\S+)", task):
127
+ path = match.group(1)
128
+ if paths and path not in paths:
129
+ candidates = [p for p in paths if p.endswith(path) or p == path]
130
+ if len(candidates) == 1:
131
+ path = candidates[0]
132
+ elif path not in paths:
133
+ continue
134
+ ws = workspace or (rust.workspace_path if rust else ".")
135
+ content = read_workspace_file(ws, path, max_bytes=_ATTACH_MAX, rust=rust)
136
+ if content is not None:
137
+ block = f"[attached: {path}]\n```\n{content}\n```"
138
+ task = task.replace(f"@{match.group(1)}", block, 1)
139
+ return task, None, None
140
+
141
+
142
+ def _workspace(session: AppSessionState) -> str:
143
+ return session.settings.workspace or "."
144
+
145
+
146
+ def dispatch_slash(cmd_line: str, session: AppSessionState) -> SlashResult:
147
+ """Handle a slash command; mirrors CLI TUI handle_slash."""
148
+ parts = cmd_line.strip().split(maxsplit=1)
149
+ cmd = parts[0].lower()
150
+ args = parts[1] if len(parts) > 1 else ""
151
+ ws = _workspace(session)
152
+
153
+ if cmd == "/help":
154
+ custom = list_commands(ws)
155
+ extra = ""
156
+ if custom:
157
+ extra = "\n\n**Custom commands:** " + ", ".join(f"`/{n}`" for n in custom)
158
+ return SlashResult(
159
+ reply=(
160
+ "**Slash commands:** `/new`, `/sessions`, `/fork`, `/rename <title>`, "
161
+ "`/stats`, `/export [file]`, `/timeline`, `/delete`, `/mcp`, `/rules`, "
162
+ "`/skills`, `/skill <name>`, `/commit [msg]`, `/init`, `/bg`, `/clear`, "
163
+ "`/mode`, `/think`, `/config`, `/search`, `/files`"
164
+ f"{extra}\n\n"
165
+ "**Input:** `!cmd` runs shell without LLM; `@file` attaches workspace files."
166
+ )
167
+ )
168
+
169
+ if cmd == "/new":
170
+ session.rust = None
171
+ return SlashResult(reply="Started a new session.", clear_chat=True)
172
+
173
+ if cmd == "/sessions":
174
+ rows = RustSession.list_sessions()
175
+ if not rows:
176
+ return SlashResult(reply="_No saved sessions._")
177
+ lines = [f"- **{r['title']}** (`{r['id']}`)" for r in rows[:20]]
178
+ return SlashResult(reply="**Sessions:**\n" + "\n".join(lines))
179
+
180
+ if cmd == "/fork":
181
+ if session.rust and (nid := session.rust.fork()):
182
+ return SlashResult(reply=f"Forked session → `{nid}`")
183
+ return SlashResult(reply="Nothing to fork yet.")
184
+
185
+ if cmd == "/rename":
186
+ if session.rust and args and session.rust.rename(args):
187
+ return SlashResult(reply=f"Renamed session to **{args}**")
188
+ return SlashResult(reply="Usage: `/rename <title>`")
189
+
190
+ if cmd == "/stats":
191
+ nfiles = len(session.rust.files()) if session.rust else 0
192
+ sid = session.rust.session_id if session.rust else "(none)"
193
+ return SlashResult(
194
+ reply=(
195
+ f"session `{sid}` · workspace: `{ws}` · files: {nfiles} · "
196
+ f"agent: {session.settings.agent}"
197
+ )
198
+ )
199
+
200
+ if cmd == "/export":
201
+ sid = session.rust.session_id if session.rust else ""
202
+ if not sid:
203
+ return SlashResult(reply="No session to export yet.")
204
+ try:
205
+ path = export_transcript(sid, args or None)
206
+ return SlashResult(
207
+ reply=f"Exported transcript to `{path}`",
208
+ download_path=path,
209
+ )
210
+ except Exception as e:
211
+ return SlashResult(reply=f"/export failed: {e}")
212
+
213
+ if cmd == "/mcp":
214
+ if session.rust is None:
215
+ return SlashResult(
216
+ reply="_Start a task first so MCP servers are connected._"
217
+ )
218
+ servers = list_mcp(session.rust)
219
+ if not servers:
220
+ return SlashResult(
221
+ reply=(
222
+ "no MCP servers connected — add `[[mcp]]` entries to "
223
+ "`~/.config/smolcode/config.toml` or `.smolcode/config.toml`"
224
+ )
225
+ )
226
+ lines = [f"**MCP servers ({len(servers)}):**"]
227
+ for row in servers:
228
+ tools = row.get("tools", [])
229
+ tlist = ", ".join(tools[:8]) if tools else "(no tools)"
230
+ if len(tools) > 8:
231
+ tlist += "…"
232
+ lines.append(f"- **{row.get('server', '?')}** ({len(tools)}): {tlist}")
233
+ return SlashResult(reply="\n".join(lines))
234
+
235
+ if cmd == "/rules":
236
+ rules = list_rules(ws)
237
+ if not rules:
238
+ return SlashResult(
239
+ reply="no rules — add `*.md` to `.smolcode/rules/` or `~/.config/smolcode/rules/`"
240
+ )
241
+ lines = [f"**active rules ({len(rules)}):**"]
242
+ for r in rules:
243
+ desc = r.get("description", "")
244
+ tail = f" — {desc}" if desc else ""
245
+ lines.append(f"- `{r.get('name', '?')}` [{r.get('scope', '?')}]{tail}")
246
+ return SlashResult(reply="\n".join(lines))
247
+
248
+ if cmd == "/skills":
249
+ skills = list_skills(ws)
250
+ if not skills:
251
+ return SlashResult(
252
+ reply="no skills — add `<name>/SKILL.md` to `.smolcode/skills/`"
253
+ )
254
+ lines = [f"**skills ({len(skills)})** — run with `/skill <name>`:"]
255
+ for s in skills:
256
+ desc = s.get("description", "")
257
+ tail = f" — {desc}" if desc else ""
258
+ lines.append(f"- `{s.get('name', '?')}`{tail}")
259
+ return SlashResult(reply="\n".join(lines))
260
+
261
+ if cmd == "/skill":
262
+ if not args:
263
+ return SlashResult(reply="Usage: `/skill <name> [args]` (see `/skills`)")
264
+ sname, _, sargs = args.partition(" ")
265
+ sname = sname.strip()
266
+ sargs = sargs.strip()
267
+ expanded = expand_skill(ws, sname, sargs)
268
+ if expanded is None:
269
+ return SlashResult(reply=f"no skill named `{sname}` (see `/skills`)")
270
+ return SlashResult(reply=f"Running skill **{sname}**…", queued_task=expanded)
271
+
272
+ if cmd == "/commit":
273
+ if args:
274
+ task = f"Commit all current changes with git_commit using this message: {args}"
275
+ else:
276
+ task = (
277
+ "Review the staged/unstaged changes with git_diff, then commit them "
278
+ "with git_commit using a concise, descriptive message."
279
+ )
280
+ return SlashResult(reply="Queued git commit task…", queued_task=task)
281
+
282
+ if cmd == "/init":
283
+ try:
284
+ path = write_agents_md(ws)
285
+ return SlashResult(reply=f"wrote `{path}` (project guide for agents)")
286
+ except Exception as e:
287
+ return SlashResult(reply=f"/init: {e}")
288
+
289
+ if cmd == "/bg":
290
+ session.bg_jobs = list_background_jobs()
291
+ return SlashResult(reply=session.bg_jobs or "_No background jobs._")
292
+
293
+ if cmd == "/timeline":
294
+ sid = session.rust.session_id if session.rust else ""
295
+ if not sid:
296
+ return SlashResult(reply="no saved session yet")
297
+ lines = session_timeline(sid)
298
+ return SlashResult(reply="**Timeline:**\n" + "\n".join(f"- {ln}" for ln in lines))
299
+
300
+ if cmd == "/delete":
301
+ removed = session.rust.delete() if session.rust else False
302
+ session.rust = None
303
+ msg = "deleted session; started a new one" if removed else "started a new session"
304
+ return SlashResult(reply=msg, clear_chat=True)
305
+
306
+ if cmd == "/clear":
307
+ return SlashResult(reply="_Transcript cleared._", clear_chat=True)
308
+
309
+ if cmd == "/mode":
310
+ return SlashResult(reply="Cycling mode…", cycle_mode=True)
311
+
312
+ if cmd == "/think":
313
+ if args:
314
+ return SlashResult(reply=f"think → {args}", set_think=args.split()[0].lower())
315
+ return SlashResult(reply="Cycling think level…", cycle_think=True)
316
+
317
+ if cmd == "/config":
318
+ if session.rust is None:
319
+ return SlashResult(reply="_Start a task first to view config._")
320
+ return SlashResult(reply=f"```\n{render_config(session.rust)}\n```")
321
+
322
+ if cmd == "/search":
323
+ if not args:
324
+ return SlashResult(reply="Usage: `/search <text>`")
325
+ return SlashResult(reply=f"_Search for `{args}` runs in transcript handler._")
326
+
327
+ if cmd == "/agents":
328
+ return SlashResult(reply="Opening agent picker…", open_picker="agents")
329
+
330
+ if cmd == "/models":
331
+ return SlashResult(reply="Opening model picker…", open_picker="models")
332
+
333
+ if cmd == "/themes":
334
+ return SlashResult(reply="Opening theme picker…", open_picker="themes")
335
+
336
+ if cmd == "/files":
337
+ return SlashResult(reply="Toggling sidebar…", toggle_sidebar=True)
338
+
339
+ if cmd == "/quit":
340
+ return SlashResult(reply="_Use browser close to exit the web UI._")
341
+
342
+ if cmd not in _BUILTIN_SLASH:
343
+ name = cmd.lstrip("/")
344
+ expanded = expand_command(ws, name, args)
345
+ if expanded is not None:
346
+ return SlashResult(
347
+ reply=f"Running custom command `/{name}`…",
348
+ queued_task=expanded,
349
+ )
350
+
351
+ return SlashResult(reply=f"Unknown command `{cmd}`. Try `/help`.")
352
+
353
+
354
+ def settings_from_ui(
355
+ workspace: str,
356
+ model: str,
357
+ agent: str,
358
+ mode: str,
359
+ think: str,
360
+ yolo: bool,
361
+ ) -> UiSettings:
362
+ y = yolo or mode == "auto"
363
+ ag = "plan" if mode == "plan" else agent
364
+ return UiSettings(
365
+ workspace=workspace or ".",
366
+ model=model or "",
367
+ agent=ag,
368
+ mode=mode,
369
+ think=think,
370
+ yolo=y,
371
+ )
372
+
373
+
374
+ def build_settings_panel(preset_models: list[str]) -> dict:
375
+ """Return Gradio components for the settings sidebar."""
376
+ with gr.Accordion("⚙️ settings", open=False):
377
+ workspace = gr.Textbox(
378
+ value=os.environ.get("SMALLCODE_WORKSPACE", "."),
379
+ label="workspace directory",
380
+ )
381
+ model = gr.Dropdown(
382
+ choices=preset_models,
383
+ value=preset_models[0] if preset_models else "",
384
+ label="model",
385
+ allow_custom_value=True,
386
+ )
387
+ agent = gr.Dropdown(
388
+ choices=["build", "plan"],
389
+ value="build",
390
+ label="agent",
391
+ )
392
+ mode = gr.Radio(
393
+ choices=["normal", "auto", "plan"],
394
+ value="normal",
395
+ label="mode",
396
+ )
397
+ think = gr.Dropdown(
398
+ choices=["off", "low", "high", "xtra"],
399
+ value="off",
400
+ label="think level",
401
+ )
402
+ yolo = gr.Checkbox(value=False, label="yolo (auto-approve tools)")
403
+ return {
404
+ "workspace": workspace,
405
+ "model": model,
406
+ "agent": agent,
407
+ "mode": mode,
408
+ "think": think,
409
+ "yolo": yolo,
410
+ }
411
+
412
+
413
+ def file_tree_md(files: dict[str, str], selected: str | None = None) -> str:
414
+ """Legacy flat file list (prefer engine.file_tree.build_workspace_panel)."""
415
+ if not files:
416
+ return "_workspace is empty_"
417
+ lines = []
418
+ for path in sorted(files):
419
+ mark = " →" if path == selected else ""
420
+ lines.append(f"- `{path}`{mark}")
421
+ body = files.get(selected or "", "") if selected and selected in files else ""
422
+ if body:
423
+ lang = "python" if selected.endswith(".py") else ""
424
+ return "\n".join(lines) + f"\n\n**`{selected}`**\n```{lang}\n{body}\n```"
425
+ return "\n".join(lines)
engine/judge.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM-judge correctness gate for the router.
2
+
3
+ `router._verify()` only proves the produced code RUNS (clean exit / tests it wrote
4
+ itself), not that it's actually CORRECT — so a small model can ship a clean-but-wrong
5
+ solution and the router accepts it instead of escalating (exactly how the bench's
6
+ roman_to_int slipped through: ran fine, wrong output).
7
+
8
+ This judge asks a more capable model whether the solution truly satisfies the task; a
9
+ concrete "no" is turned into an escalation by the router. Mirrors
10
+ smolcode-cli/src/judge.rs (JSON-only reply, temperature 0, lenient parse), but the
11
+ verdict drives ESCALATION rather than stop/continue.
12
+
13
+ Conservative by design: only a clear defect escalates. On judge error / timeout /
14
+ unparseable reply we ACCEPT — the judge is a net to catch obvious wrongness, not a
15
+ hard gate, and we don't want to over-escalate (and lose the small-model win).
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import os
21
+ import re
22
+
23
+ import liteforge as lf
24
+
25
+ _SYSTEM = (
26
+ "You are a strict senior code reviewer. You are given a coding TASK and the FILES "
27
+ "an agent produced. The code already runs without crashing — your job is to judge "
28
+ "whether it is actually CORRECT and COMPLETE for the task: check the exact "
29
+ "requirements, edge cases, and obvious logic bugs.\n"
30
+ "Reply with ONLY a JSON object: {\"correct\": true|false, \"reason\": \"<one short sentence>\"}.\n"
31
+ "Set \"correct\": false if you find ANY bug, wrong/missing edge case, or unmet "
32
+ "requirement. Ignore style. Do not write code."
33
+ )
34
+
35
+
36
+ def judge_enabled() -> bool:
37
+ """Judge is on by default; SMALLCODE_JUDGE=0 disables it."""
38
+ return os.environ.get("SMALLCODE_JUDGE", "1").lower() not in ("0", "false", "no", "")
39
+
40
+
41
+ def _files_block(files: dict[str, str], cap: int = 6000) -> str:
42
+ blob = "\n\n".join(f"### {path}\n{content}" for path, content in files.items())
43
+ return blob[:cap]
44
+
45
+
46
+ def _parse(text: str) -> bool | None:
47
+ """True (correct), False (defect found), or None (couldn't tell)."""
48
+ m = re.search(r"\{.*\}", text, re.DOTALL)
49
+ if m:
50
+ try:
51
+ obj = json.loads(m.group(0))
52
+ if isinstance(obj.get("correct"), bool):
53
+ return obj["correct"]
54
+ except Exception:
55
+ pass
56
+ low = text.lower()
57
+ if "correct\": false" in low or "correct: false" in low or "incorrect" in low:
58
+ return False
59
+ if "correct\": true" in low or "correct: true" in low:
60
+ return True
61
+ return None
62
+
63
+
64
+ async def judge_correct(preset, judge_model: str, task: str,
65
+ files: dict[str, str], final: str) -> bool:
66
+ """Return True if the solution likely satisfies the task, False on a clear defect.
67
+
68
+ Accepts (True) on empty files, judge error, or unparseable reply.
69
+ """
70
+ if not files:
71
+ return True
72
+ user = (
73
+ f"TASK:\n{task}\n\nFILES:\n{_files_block(files)}\n\n"
74
+ f"AGENT'S FINAL CLAIM:\n{(final or '')[:500]}\n\n"
75
+ "Is the solution correct and complete for the task? Reply with JSON only."
76
+ )
77
+ try:
78
+ client = lf.AsyncForgeClient(
79
+ base_url=preset.base_url, api_key=preset.api_key, default_model=judge_model,
80
+ )
81
+ resp = await client.complete(
82
+ messages=[{"role": "system", "content": _SYSTEM},
83
+ {"role": "user", "content": user}],
84
+ model=judge_model, temperature=0.0,
85
+ )
86
+ content = resp["choices"][0]["message"].get("content", "") or ""
87
+ except Exception:
88
+ return True # judge unavailable -> don't block the accept
89
+ verdict = _parse(content)
90
+ return True if verdict is None else verdict
engine/live_run.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Live polling helper for Gradio streaming updates."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ from collections.abc import AsyncIterator, Awaitable, Callable
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, TypeVar
8
+
9
+ from .agent import SmallCodeAgent, Step
10
+ from .trace_collector import TraceEvent
11
+
12
+ T = TypeVar("T")
13
+
14
+
15
+ @dataclass
16
+ class LiveFrame:
17
+ steps: list[Step] = field(default_factory=list)
18
+ events: list[TraceEvent] = field(default_factory=list)
19
+ files: dict[str, str] = field(default_factory=dict)
20
+ done: bool = False
21
+ result: Any = None
22
+ raw_event: dict | None = None
23
+
24
+
25
+ async def run_with_live_updates(
26
+ coro: Awaitable[T],
27
+ agent: SmallCodeAgent,
28
+ *,
29
+ poll_interval: float = 0.35,
30
+ ) -> AsyncIterator[LiveFrame]:
31
+ """Yield snapshots while `coro` runs, then a final frame with the result."""
32
+ task = asyncio.create_task(coro)
33
+ try:
34
+ while not task.done():
35
+ yield _live_snapshot(agent)
36
+ await asyncio.sleep(poll_interval)
37
+ result = await task
38
+ yield _final_snapshot(agent, result=result)
39
+ except asyncio.CancelledError:
40
+ task.cancel()
41
+ raise
42
+
43
+
44
+ async def stream_live(
45
+ make_coro: Callable[[], Awaitable[T]],
46
+ get_agent: Callable[[], SmallCodeAgent | None],
47
+ *,
48
+ poll_interval: float = 0.35,
49
+ ) -> AsyncIterator[LiveFrame]:
50
+ """Like run_with_live_updates but agent may appear only after coro starts."""
51
+ task = asyncio.create_task(make_coro())
52
+ try:
53
+ while not task.done():
54
+ agent = get_agent()
55
+ yield _live_snapshot(agent) if agent is not None else LiveFrame()
56
+ await asyncio.sleep(poll_interval)
57
+ result = await task
58
+ agent = get_agent()
59
+ if agent is not None:
60
+ yield _final_snapshot(agent, result=result)
61
+ else:
62
+ yield LiveFrame(done=True, result=result)
63
+ except asyncio.CancelledError:
64
+ task.cancel()
65
+ raise
66
+
67
+
68
+ def _live_snapshot(agent: SmallCodeAgent) -> LiveFrame:
69
+ """A mid-run snapshot.
70
+
71
+ IMPORTANT: never touch the LiteForge agent object (history/state) while a run
72
+ is in flight — the Rust ToolCallingAgent is not reentrant and `run()` holds an
73
+ internal lock for its whole duration, so `current_steps()` would deadlock. We
74
+ read only the trace collector (a plain Python list the wrapped tools append to)
75
+ and the workspace files (plain disk reads).
76
+ """
77
+ return LiveFrame(
78
+ steps=[],
79
+ events=agent.trace_collector.snapshot(),
80
+ files=agent.files(),
81
+ done=False,
82
+ )
83
+
84
+
85
+ def _final_snapshot(agent: SmallCodeAgent, *, result: Any = None) -> LiveFrame:
86
+ """A post-run snapshot — safe to read the agent now that `run()` has returned."""
87
+ return LiveFrame(
88
+ steps=agent.current_steps(),
89
+ events=agent.trace_collector.snapshot(),
90
+ files=agent.files(),
91
+ done=True,
92
+ result=result,
93
+ )
engine/playwright_runner.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Subprocess runner: check a model-built web app in headless Chromium.
2
+
3
+ A Playwright/Chromium sibling of engine/browser_runner.py (Firefox/Selenium),
4
+ with the IDENTICAL JSON contract so engine/browsercheck.py can try whichever
5
+ real browser is installed. Invoked as `python engine/playwright_runner.py
6
+ <app.html>` — never imported (keeps Playwright out of the Gradio process and
7
+ isolates a browser crash).
8
+
9
+ It loads the app in the EXACT same `srcdoc` + `sandbox` wrapper as the live
10
+ preview (engine/preview.py), injects an error collector before the app's own
11
+ scripts, clicks every button, exercises the keyboard, and reports uncaught JS
12
+ errors — the hard failure signal that lets the router escalate a broken build.
13
+
14
+ Output: one JSON line {ok, errors, buttons, clicked}. Exit 3 only when Chromium
15
+ itself can't run (Playwright missing or the browser binary not downloaded), so
16
+ the caller falls back to Firefox, then jsdom.
17
+ """
18
+ import json
19
+ import os
20
+ import re
21
+ import sys
22
+ import tempfile
23
+
24
+ PREVIEW_SANDBOX = "allow-scripts allow-same-origin allow-modals allow-popups allow-forms"
25
+
26
+ # Same collector browser_runner.py injects: catches errors thrown during load
27
+ # (the "script ran before its element / undefined function" class).
28
+ _CAPTURE = ("<script>(function(){window.__errs=[];"
29
+ "window.addEventListener('error',function(e){try{__errs.push('uncaught: '+"
30
+ "((e.error&&e.error.message)||e.message||String(e)))}catch(_){}} ,true);"
31
+ "window.addEventListener('unhandledrejection',function(e){try{__errs.push("
32
+ "'rejection: '+((e.reason&&e.reason.message)||e.reason))}catch(_){}});})();</script>")
33
+
34
+ _CLICK_SELECTOR = "button, [onclick], input[type=button], input[type=submit]"
35
+ _KEYBOARD_JS = (
36
+ "['ArrowUp','ArrowDown','ArrowLeft','ArrowRight',' '].forEach(function(k){"
37
+ "var c={key:k,keyCode:k===' '?32:({ArrowUp:38,ArrowDown:40,ArrowLeft:37,ArrowRight:39}[k]),bubbles:true};"
38
+ "document.dispatchEvent(new KeyboardEvent('keydown',c));"
39
+ "window.dispatchEvent(new KeyboardEvent('keydown',c));});")
40
+
41
+
42
+ def _escape_srcdoc(doc: str) -> str:
43
+ return doc.replace("&", "&amp;").replace('"', "&quot;")
44
+
45
+
46
+ def _inject_capture(app_html: str) -> str:
47
+ m = re.search(r"<head[^>]*>", app_html, re.I)
48
+ if m:
49
+ return app_html[:m.end()] + _CAPTURE + app_html[m.end():]
50
+ m = re.search(r"<html[^>]*>", app_html, re.I)
51
+ if m:
52
+ return app_html[:m.end()] + _CAPTURE + app_html[m.end():]
53
+ return _CAPTURE + app_html
54
+
55
+
56
+ def _emit(obj: dict) -> None:
57
+ sys.stdout.write(json.dumps(obj) + "\n")
58
+
59
+
60
+ def main(path: str) -> int:
61
+ try:
62
+ from playwright.sync_api import sync_playwright
63
+ except Exception as e: # noqa: BLE001
64
+ _emit({"ok": None, "infra": f"playwright import failed: {e}"})
65
+ return 3
66
+
67
+ with open(path, encoding="utf-8") as f:
68
+ app_html = f.read()
69
+
70
+ host = ('<!doctype html><meta charset="utf-8"><body style="margin:0">'
71
+ f'<iframe id="app" style="width:100%;height:600px;border:0" '
72
+ f'sandbox="{PREVIEW_SANDBOX}" '
73
+ f'srcdoc="{_escape_srcdoc(_inject_capture(app_html))}"></iframe>')
74
+ host_path = os.path.join(tempfile.mkdtemp(prefix="pwhost-"), "host.html")
75
+ with open(host_path, "w", encoding="utf-8") as f:
76
+ f.write(host)
77
+
78
+ errors: list[str] = []
79
+ buttons = clicked = 0
80
+ try:
81
+ with sync_playwright() as p:
82
+ try:
83
+ browser = p.chromium.launch(
84
+ headless=True,
85
+ args=["--allow-file-access-from-files", "--no-sandbox"])
86
+ except Exception as e: # noqa: BLE001
87
+ _emit({"ok": None, "infra": f"chromium launch failed: {str(e)[:200]}"})
88
+ return 3
89
+ try:
90
+ page = browser.new_page()
91
+ page.set_default_timeout(4000)
92
+ page.goto("file://" + host_path, timeout=20000)
93
+ handle = page.wait_for_selector("#app", timeout=5000)
94
+ frame = handle.content_frame()
95
+ if frame is None:
96
+ _emit({"ok": None, "infra": "could not enter app iframe"})
97
+ return 3
98
+ page.wait_for_timeout(300) # let scripts settle
99
+ els = frame.query_selector_all(_CLICK_SELECTOR)
100
+ buttons = len(els)
101
+ for el in els[:25]:
102
+ try:
103
+ el.evaluate("e => { e.disabled = false; }")
104
+ el.click(force=True, timeout=1000)
105
+ clicked += 1
106
+ except Exception:
107
+ pass # handler errors land in __errs
108
+ try:
109
+ frame.evaluate(_KEYBOARD_JS)
110
+ except Exception:
111
+ pass
112
+ page.wait_for_timeout(300) # surface late/timer errors
113
+ try:
114
+ errors = frame.evaluate("() => window.__errs || []") or []
115
+ except Exception:
116
+ errors = []
117
+ finally:
118
+ try:
119
+ browser.close()
120
+ except Exception:
121
+ pass
122
+ except Exception as e: # noqa: BLE001
123
+ _emit({"ok": None, "infra": f"playwright run failed: {str(e)[:200]}"})
124
+ return 3
125
+
126
+ errors = [str(e)[:400] for e in errors][:20]
127
+ _emit({"ok": len(errors) == 0, "errors": errors, "buttons": buttons, "clicked": clicked})
128
+ return 0
129
+
130
+
131
+ if __name__ == "__main__":
132
+ sys.exit(main(sys.argv[1]))
engine/preflight.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Startup reachability check for the active backend.
2
+
3
+ The whole point of smolcode is that one OpenAI-compatible endpoint (chosen by
4
+ the preset) serves the model ladder. If that endpoint is unreachable — hal is
5
+ off the VPN, the laptop Ollama isn't running — the agent loop will hang or fail
6
+ deep inside a request with no obvious cause. Worse, a silent default to the
7
+ wrong preset (the historical "it's using my laptop, not hal" bug) looks fine
8
+ until you notice the weak single-tier model.
9
+
10
+ `preflight()` makes that visible: it prints which preset/endpoint is active and
11
+ probes `{base_url}/models` once at startup. On success it prints a one-line
12
+ banner with the model count; on failure it prints a loud warning naming the dead
13
+ URL and which *other* presets are reachable right now, so the fix is obvious.
14
+
15
+ It never raises and never blocks the app from starting — it only informs.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import sys
21
+ import urllib.error
22
+ import urllib.request
23
+
24
+ from .config import Preset, _PRESETS, load_preset
25
+
26
+ _TIMEOUT = 4.0
27
+
28
+ # ANSI: bold, green ok, red warn — degrade to plain text when not a TTY.
29
+ _BOLD, _GREEN, _RED, _DIM, _RESET = "\033[1m", "\033[32m", "\033[31m", "\033[2m", "\033[0m"
30
+
31
+
32
+ def _color(s: str, code: str) -> str:
33
+ return f"{code}{s}{_RESET}" if sys.stderr.isatty() else s
34
+
35
+
36
+ def list_models(base_url: str, timeout: float = _TIMEOUT) -> list[str]:
37
+ """Fetch model IDs from {base_url}/models. Returns [] on failure."""
38
+ url = base_url.rstrip("/") + "/models"
39
+ try:
40
+ with urllib.request.urlopen(url, timeout=timeout) as resp:
41
+ if resp.status != 200:
42
+ return []
43
+ data = json.loads(resp.read().decode("utf-8", "replace"))
44
+ models = data.get("data") if isinstance(data, dict) else None
45
+ if not isinstance(models, list):
46
+ return []
47
+ ids: list[str] = []
48
+ for m in models:
49
+ if isinstance(m, dict) and m.get("id"):
50
+ ids.append(str(m["id"]))
51
+ return sorted(ids)
52
+ except (urllib.error.URLError, TimeoutError, OSError, ValueError, json.JSONDecodeError):
53
+ return []
54
+
55
+
56
+ def probe(base_url: str, timeout: float = _TIMEOUT,
57
+ api_key: str | None = None) -> tuple[bool, int | None, str | None]:
58
+ """Return (reachable, model_count, error). Never raises.
59
+
60
+ Sends the bearer token so endpoints that require auth (e.g. a vLLM server
61
+ started with --api-key) report reachable instead of a spurious 401."""
62
+ url = base_url.rstrip("/") + "/models"
63
+ headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
64
+ try:
65
+ req = urllib.request.Request(url, headers=headers)
66
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
67
+ if resp.status != 200:
68
+ return False, None, f"HTTP {resp.status}"
69
+ data = json.loads(resp.read().decode("utf-8", "replace"))
70
+ models = data.get("data") if isinstance(data, dict) else None
71
+ count = len(models) if isinstance(models, list) else None
72
+ return True, count, None
73
+ except urllib.error.URLError as e:
74
+ return False, None, getattr(e, "reason", str(e)).__str__()
75
+ except (TimeoutError, OSError, ValueError, json.JSONDecodeError) as e:
76
+ return False, None, str(e)
77
+
78
+
79
+ def _reachable_alternatives(active_key: str) -> list[str]:
80
+ """Which *other* known presets answer right now — points at the easy fix."""
81
+ out = []
82
+ for key, preset in _PRESETS.items():
83
+ if key == active_key:
84
+ continue
85
+ ok, _count, _err = probe(preset.base_url, timeout=2.0, api_key=preset.api_key)
86
+ if ok:
87
+ out.append(f"{key} ({preset.base_url})")
88
+ return out
89
+
90
+
91
+ def preflight(preset: Preset | None = None) -> bool:
92
+ """Print a startup banner for the active backend. Returns True if reachable."""
93
+ preset = preset or load_preset()
94
+ tiers = " · ".join(f"{t.name}:{t.model}" for t in preset.tiers)
95
+ ok, count, err = probe(preset.base_url, api_key=preset.api_key)
96
+ if ok:
97
+ models = f"{count} models" if count is not None else "reachable"
98
+ banner = (f"smolcode backend: preset={preset.key} · {preset.base_url} "
99
+ f"· {models}\n tiers: {tiers}")
100
+ print(_color(banner, _BOLD + _GREEN), file=sys.stderr)
101
+ return True
102
+
103
+ lines = [
104
+ _color("⚠ smolcode backend UNREACHABLE", _BOLD + _RED),
105
+ f" preset={preset.key} · {preset.base_url} · {err}",
106
+ f" tiers: {tiers}",
107
+ ]
108
+ alts = _reachable_alternatives(preset.key)
109
+ if alts:
110
+ lines.append(" reachable instead: " + ", ".join(alts))
111
+ lines.append(_color(" → set SMALLCODE_PRESET to one of the above, "
112
+ "or fix the endpoint.", _DIM))
113
+ else:
114
+ lines.append(_color(" → no known preset endpoint is answering right now.", _DIM))
115
+ print("\n".join(lines), file=sys.stderr)
116
+ return False
engine/preview.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Live-preview rendering for smolbuilder.
2
+
3
+ Turns the agent's workspace (a `path -> content` dict of a small static web app)
4
+ into a single self-contained HTML document, then into a sandboxed iframe that
5
+ Gradio can drop straight into a `gr.HTML`. This is the "Replit/Lovable" preview:
6
+ what the tiny model just built, running live in the browser.
7
+
8
+ Deliberately dependency-free (stdlib only) so it can be unit-tested without
9
+ Gradio or the Rust engine, and so the rendering logic stays trivially auditable.
10
+
11
+ Design choices:
12
+ - We inline locally-referenced `<link rel=stylesheet>` and `<script src=...>`
13
+ from sibling files, so a model that splits style.css / script.js out of
14
+ index.html still previews correctly — but we never touch absolute/CDN URLs.
15
+ - The iframe is loaded via `srcdoc=` (not a `data:` URI). A `data:` URL has an
16
+ *opaque origin*, where `localStorage`/`sessionStorage` throw `SecurityError` —
17
+ so any app that persists state (a notepad, a to-do list) dies on load before it
18
+ can wire up its buttons. A `srcdoc` frame inherits the embedder's (Gradio's)
19
+ origin, so storage and scripts work the way the model expects.
20
+ - SECURITY TRADE-OFF: `sandbox="allow-scripts allow-same-origin ..."` is required
21
+ for storage to work, but that combination also lets the framed (model-written)
22
+ code reach the parent page. This is acceptable for a *local, single-user*
23
+ builder — the framed code is the same user's own request, on a page holding no
24
+ one else's secrets. Do NOT reuse this wrapper to embed untrusted third-party
25
+ apps on an origin that holds other users' data; the isolation-preserving fix is
26
+ to serve the preview from a separate origin (out of scope here).
27
+ - The same wrapper (`PREVIEW_SANDBOX`/`_escape_srcdoc`) is reused by the headless
28
+ verification check (engine/browsercheck.py) so the agent tests *exactly* what
29
+ the user sees.
30
+ """
31
+ from __future__ import annotations
32
+
33
+ import html
34
+ import re
35
+
36
+ # Sandbox flags shared by the live preview and the verification check.
37
+ # allow-same-origin is required so srcdoc inherits the parent origin and web
38
+ # storage works; combined with allow-scripts it weakens isolation (see docstring).
39
+ PREVIEW_SANDBOX = "allow-scripts allow-same-origin allow-modals allow-popups allow-forms"
40
+
41
+ # Files we know how to treat as the app entrypoint, best first.
42
+ _ENTRY_CANDIDATES = ("index.html", "main.html", "app.html")
43
+
44
+ _LINK_RE = re.compile(
45
+ r"""<link\b[^>]*?\brel\s*=\s*['"]?stylesheet['"]?[^>]*?>""", re.I | re.S)
46
+ _SCRIPT_SRC_RE = re.compile(
47
+ r"""<script\b[^>]*?\bsrc\s*=\s*['"]([^'"]+)['"][^>]*?>\s*</script>""", re.I | re.S)
48
+ _HREF_RE = re.compile(r"""\bhref\s*=\s*['"]([^'"]+)['"]""", re.I)
49
+
50
+
51
+ def find_entry(files: dict[str, str]) -> str | None:
52
+ """Pick the HTML entrypoint to preview, or None if there's nothing webby."""
53
+ lower = {p.lower(): p for p in files}
54
+ for cand in _ENTRY_CANDIDATES:
55
+ if cand in lower:
56
+ return lower[cand]
57
+ # Fall back to any .html file (shallowest path wins for determinism).
58
+ htmls = sorted((p for p in files if p.lower().endswith(".html")),
59
+ key=lambda p: (p.count("/"), p))
60
+ return htmls[0] if htmls else None
61
+
62
+
63
+ def _is_local(url: str) -> bool:
64
+ """True for a same-app relative reference we can inline (not a CDN/data URI)."""
65
+ u = url.strip()
66
+ if not u:
67
+ return False
68
+ return not re.match(r"^(?:[a-z]+:)?//|^https?:|^data:|^mailto:|^#", u, re.I)
69
+
70
+
71
+ def _lookup(files: dict[str, str], ref: str) -> str | None:
72
+ """Resolve a relative href/src against the workspace file map."""
73
+ ref = ref.split("?", 1)[0].split("#", 1)[0].lstrip("./").lstrip("/")
74
+ if ref in files:
75
+ return files[ref]
76
+ # Case-insensitive / basename fallback so '/style.css' finds 'style.css'.
77
+ base = ref.rsplit("/", 1)[-1].lower()
78
+ for path, content in files.items():
79
+ if path.lower() == ref.lower() or path.rsplit("/", 1)[-1].lower() == base:
80
+ return content
81
+ return None
82
+
83
+
84
+ def inline_app(files: dict[str, str]) -> str:
85
+ """Return one self-contained HTML document for the app in `files`.
86
+
87
+ If there's no HTML entrypoint, render a friendly placeholder (e.g. the model
88
+ has only written notes or a not-yet-web file).
89
+ """
90
+ entry = find_entry(files)
91
+ if entry is None:
92
+ return _placeholder(files)
93
+
94
+ doc = files[entry]
95
+
96
+ def _inline_css(match: re.Match) -> str:
97
+ tag = match.group(0)
98
+ href_m = _HREF_RE.search(tag)
99
+ if not href_m or not _is_local(href_m.group(1)):
100
+ return tag
101
+ css = _lookup(files, href_m.group(1))
102
+ if css is None:
103
+ return tag
104
+ return f"<style>\n{css}\n</style>"
105
+
106
+ def _inline_js(match: re.Match) -> str:
107
+ src = match.group(1)
108
+ if not _is_local(src):
109
+ return match.group(0)
110
+ js = _lookup(files, src)
111
+ if js is None:
112
+ return match.group(0)
113
+ # Guard against the inlined body prematurely closing the script element.
114
+ safe = js.replace("</script>", "<\\/script>")
115
+ return f"<script>\n{safe}\n</script>"
116
+
117
+ doc = _LINK_RE.sub(_inline_css, doc)
118
+ doc = _SCRIPT_SRC_RE.sub(_inline_js, doc)
119
+ return doc
120
+
121
+
122
+ def _escape_srcdoc(doc: str) -> str:
123
+ """Escape an HTML document for a double-quoted `srcdoc="..."` attribute.
124
+
125
+ Only `&` and `"` are significant inside a double-quoted attribute value, and
126
+ `&` must go first (so the `&` we introduce for `"` isn't re-escaped). `<`,
127
+ `>` and even a literal `</script>` are FINE here — the parser is in
128
+ attribute-value state, not script-data state — so we must NOT touch them
129
+ (html.escape would corrupt the rendered document).
130
+ """
131
+ return doc.replace("&", "&amp;").replace('"', "&quot;")
132
+
133
+
134
+ def preview_iframe(files: dict[str, str], *, height: int = 540) -> str:
135
+ """Render the app as a sandboxed `srcdoc` iframe ready for `gr.HTML`."""
136
+ srcdoc = _escape_srcdoc(inline_app(files))
137
+ return (
138
+ f'<iframe title="smolbuilder preview" '
139
+ f'style="width:100%;height:{height}px;border:0;border-radius:12px;'
140
+ f'background:#fff;box-shadow:0 1px 0 rgba(0,0,0,.06)" '
141
+ f'sandbox="{PREVIEW_SANDBOX}" '
142
+ f'srcdoc="{srcdoc}"></iframe>'
143
+ )
144
+
145
+
146
+ def _placeholder(files: dict[str, str]) -> str:
147
+ listing = "".join(
148
+ f"<li><code>{html.escape(p)}</code></li>" for p in sorted(files)
149
+ ) or "<li><em>workspace is empty</em></li>"
150
+ return (
151
+ "<!doctype html><html><head><meta charset='utf-8'>"
152
+ "<style>body{font:15px/1.5 system-ui,sans-serif;color:#475569;"
153
+ "background:#f8fafc;padding:2rem}h2{color:#7c3aed;margin:.2rem 0 1rem}"
154
+ "code{background:#ede9fe;color:#5b21b6;padding:1px 6px;border-radius:6px}"
155
+ "</style></head><body>"
156
+ "<h2>No preview yet</h2>"
157
+ "<p>smolbuilder previews the app's <code>index.html</code>. "
158
+ "Describe a web app on the left and it'll appear here, live.</p>"
159
+ f"<p>Files in the workspace:</p><ul>{listing}</ul>"
160
+ "</body></html>"
161
+ )
engine/route_clf.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Learned routing classifier — the confidence-gated upgrade to the regex router.
2
+
3
+ smolcode's router historically guesses two things from cheap regex
4
+ ([router.classify_specialty][engine.router.classify_specialty] and
5
+ [router.classify_tier][engine.router.classify_tier]). This module adds tiny
6
+ learned classifiers (SetFit backbone + light head, exported to int8 ONNX) that
7
+ predict, per task:
8
+
9
+ - **specialty** — which fine-tune family (16-way)
10
+ - **tier** — a difficulty bucket -> the *starting* rung in the ladder
11
+ - **escalate** — whether the task will likely need a bigger model
12
+
13
+ Thinking level (off/low/high/xtra) is *derived* from (tier, escalate), not a
14
+ separate model.
15
+
16
+ The design is deliberately "pure upside": every prediction is gated by a
17
+ calibrated confidence threshold. Below threshold — or if onnxruntime / the model
18
+ artifacts aren't present at all — the field **falls back to the existing regex**,
19
+ so we can never route worse than the status quo and rules-confident cases stay
20
+ 100% deterministic.
21
+
22
+ Heavy deps (onnxruntime, tokenizers, numpy) are imported lazily; if any is
23
+ missing the classifier simply abstains everywhere and the regex drives routing.
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import functools
28
+ import json
29
+ import os
30
+ from pathlib import Path
31
+
32
+ from pydantic import BaseModel, Field
33
+
34
+ from .router import classify_specialty, classify_tier
35
+
36
+ # Difficulty buckets the tier head predicts; mapped onto the ladder by
37
+ # start = min(bucket, n_tiers - 1) — exactly classify_tier's clamping contract,
38
+ # so the head stays ladder-length-agnostic.
39
+ TIER_BUCKETS = 3
40
+
41
+ # Ordered thinking levels (matches smolcode-cli/src/router.rs Think enum).
42
+ THINK_LEVELS = ("off", "low", "high", "xtra")
43
+
44
+ # Default per-head confidence thresholds; overridden by router_clf.json's
45
+ # "thresholds" map written at export/calibration time.
46
+ _DEFAULT_TAU = {"specialty": 0.60, "tier": 0.55, "escalate": 0.65}
47
+
48
+ _DEFAULT_DIR = Path(__file__).resolve().parent.parent / "finetune" / "router_clf" / "onnx"
49
+
50
+
51
+ class RouteDecision(BaseModel):
52
+ """The typed routing decision. `tier` is a start index into the active ladder."""
53
+
54
+ specialty: str
55
+ tier: int
56
+ escalate: bool
57
+ think: str
58
+ # Per-field model confidence (0.0 when the field came from regex/default).
59
+ confidences: dict[str, float] = Field(default_factory=dict)
60
+ # Per-field provenance: "model" | "regex" | "default" — for telemetry/debugging.
61
+ sources: dict[str, str] = Field(default_factory=dict)
62
+
63
+
64
+ def _softmax(row): # row: 1-D numpy array
65
+ import numpy as np
66
+
67
+ # If the ONNX head already emits a probability distribution, don't re-normalize
68
+ # (argmax is unaffected either way, but confidence should stay honest).
69
+ if row.min() >= 0.0 and abs(float(row.sum()) - 1.0) < 1e-3:
70
+ return row
71
+ e = np.exp(row - row.max())
72
+ return e / e.sum()
73
+
74
+
75
+ class _OnnxHead:
76
+ """A single ONNX sequence-classification head + its tokenizer and label map."""
77
+
78
+ def __init__(self, session, tokenizer, labels: list[str], input_names: set[str],
79
+ max_len: int = 128) -> None:
80
+ self.session = session
81
+ self.tokenizer = tokenizer
82
+ self.labels = labels
83
+ self.input_names = input_names
84
+ self.max_len = max_len
85
+
86
+ @classmethod
87
+ def try_load(cls, dpath: Path) -> "_OnnxHead | None":
88
+ """Load model.onnx + tokenizer.json + labels.json from a dir, or None."""
89
+ model_file, tok_file, labels_file = (
90
+ dpath / "model.onnx", dpath / "tokenizer.json", dpath / "labels.json",
91
+ )
92
+ if not (model_file.exists() and tok_file.exists() and labels_file.exists()):
93
+ return None
94
+ import onnxruntime as ort
95
+ from tokenizers import Tokenizer
96
+
97
+ sess = ort.InferenceSession(
98
+ str(model_file), providers=["CPUExecutionProvider"],
99
+ )
100
+ tok = Tokenizer.from_file(str(tok_file))
101
+ meta = json.loads(labels_file.read_text())
102
+ labels = meta["labels"] if isinstance(meta, dict) else list(meta)
103
+ max_len = int(meta.get("max_len", 128)) if isinstance(meta, dict) else 128
104
+ input_names = {i.name for i in sess.get_inputs()}
105
+ return cls(sess, tok, labels, input_names, max_len=max_len)
106
+
107
+ def predict(self, text: str) -> tuple[str, float]:
108
+ """(label, confidence) for the argmax class."""
109
+ import numpy as np
110
+
111
+ enc = self.tokenizer.encode(text)
112
+ ids = enc.ids[: self.max_len]
113
+ mask = [1] * len(ids)
114
+ feed = {
115
+ "input_ids": np.asarray([ids], dtype=np.int64),
116
+ "attention_mask": np.asarray([mask], dtype=np.int64),
117
+ }
118
+ if "token_type_ids" in self.input_names:
119
+ feed["token_type_ids"] = np.zeros((1, len(ids)), dtype=np.int64)
120
+ out = self.session.run(None, feed)[0]
121
+ probs = _softmax(np.asarray(out)[0])
122
+ idx = int(probs.argmax())
123
+ return self.labels[idx], float(probs[idx])
124
+
125
+
126
+ class RouteClassifier:
127
+ """Loads the (optional) ONNX heads and turns a task string into a RouteDecision.
128
+
129
+ Always safe to construct: missing deps or artifacts -> empty `heads`, and every
130
+ prediction abstains to the regex baseline.
131
+ """
132
+
133
+ def __init__(self, model_dir: str | os.PathLike | None = None) -> None:
134
+ self.model_dir = Path(
135
+ model_dir or os.environ.get("SMALLCODE_ROUTER_CLF_DIR", _DEFAULT_DIR)
136
+ )
137
+ self.heads: dict[str, _OnnxHead] = {}
138
+ self.thresholds = dict(_DEFAULT_TAU)
139
+ self.think_map: dict | None = None
140
+ self._load()
141
+
142
+ def _load(self) -> None:
143
+ try: # the heavy trio — absent in a bare runtime, which is fine.
144
+ import numpy # noqa: F401
145
+ import onnxruntime # noqa: F401
146
+ import tokenizers # noqa: F401
147
+ except Exception:
148
+ return
149
+ cfg_path = self.model_dir / "router_clf.json"
150
+ if cfg_path.exists():
151
+ try:
152
+ cfg = json.loads(cfg_path.read_text())
153
+ self.thresholds.update(cfg.get("thresholds", {}))
154
+ self.think_map = cfg.get("think_map")
155
+ except Exception:
156
+ pass
157
+ for name in ("specialty", "tier", "escalate"):
158
+ try:
159
+ head = _OnnxHead.try_load(self.model_dir / name)
160
+ except Exception:
161
+ head = None
162
+ if head is not None:
163
+ self.heads[name] = head
164
+
165
+ @property
166
+ def available(self) -> bool:
167
+ return bool(self.heads)
168
+
169
+ # --- per-decision helpers (model if confident, else regex/default) --------
170
+
171
+ def pick_specialty(self, task: str, specialties=None) -> tuple[str, float, str]:
172
+ head = self.heads.get("specialty")
173
+ if head is not None:
174
+ label, conf = head.predict(task)
175
+ ok = conf >= self.thresholds["specialty"]
176
+ if ok and (specialties is None or label in specialties):
177
+ return label, conf, "model"
178
+ return classify_specialty(task), 0.0, "regex"
179
+
180
+ def pick_tier(self, task: str, n_tiers: int) -> tuple[int, float, str]:
181
+ head = self.heads.get("tier")
182
+ if head is not None:
183
+ label, conf = head.predict(task)
184
+ if conf >= self.thresholds["tier"]:
185
+ try:
186
+ bucket = int(label)
187
+ except ValueError:
188
+ bucket = 0
189
+ return min(bucket, max(n_tiers - 1, 0)), conf, "model"
190
+ return classify_tier(task, n_tiers), 0.0, "regex"
191
+
192
+ def pick_escalate(self, task: str) -> tuple[bool, float, str]:
193
+ head = self.heads.get("escalate")
194
+ if head is not None:
195
+ label, conf = head.predict(task)
196
+ if conf >= self.thresholds["escalate"]:
197
+ return label in ("1", "true", "yes", "escalate"), conf, "model"
198
+ # No regex equivalent — default to "no escalation predicted".
199
+ return False, 0.0, "default"
200
+
201
+ def think_for(self, tier: int, n_tiers: int, escalate: bool) -> str:
202
+ if self.think_map:
203
+ key = f"{min(tier, n_tiers - 1)}:{int(escalate)}"
204
+ lvl = self.think_map.get(key) or self.think_map.get(str(tier))
205
+ if lvl in THINK_LEVELS:
206
+ return lvl
207
+ return default_think(tier, n_tiers, escalate)
208
+
209
+ def decide(self, task: str, *, specialties=None, n_tiers: int = 1) -> RouteDecision:
210
+ sp, sp_c, sp_s = self.pick_specialty(task, specialties)
211
+ tier, t_c, t_s = self.pick_tier(task, n_tiers)
212
+ esc, e_c, e_s = self.pick_escalate(task)
213
+ return RouteDecision(
214
+ specialty=sp,
215
+ tier=tier,
216
+ escalate=esc,
217
+ think=self.think_for(tier, n_tiers, esc),
218
+ confidences={"specialty": sp_c, "tier": t_c, "escalate": e_c},
219
+ sources={"specialty": sp_s, "tier": t_s, "escalate": e_s},
220
+ )
221
+
222
+
223
+ def default_think(tier: int, n_tiers: int, escalate: bool) -> str:
224
+ """Monotone map: a higher start rung / predicted escalation -> more thinking."""
225
+ if n_tiers <= 1:
226
+ return "high" if escalate else "off"
227
+ frac = tier / (n_tiers - 1)
228
+ if frac >= 0.999:
229
+ return "xtra" if escalate else "high"
230
+ if frac >= 0.5:
231
+ return "high" if escalate else "low"
232
+ return "low" if escalate else "off"
233
+
234
+
235
+ @functools.lru_cache(maxsize=1)
236
+ def get_classifier() -> RouteClassifier:
237
+ """Process-wide singleton (loads ONNX sessions once)."""
238
+ return RouteClassifier()
239
+
240
+
241
+ def classify_route(task: str, *, specialties=None, n_tiers: int = 1) -> RouteDecision:
242
+ """Public entry: a typed, confidence-gated routing decision for `task`."""
243
+ return get_classifier().decide(task, specialties=specialties, n_tiers=n_tiers)
engine/router.py ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tiered model router — the "forge-router" pattern.
2
+
3
+ The point of smolcode: don't burn a 32B model on a one-line helper, and don't
4
+ fail a hard task on a 3B. The router picks a *starting* tier from a cheap
5
+ complexity heuristic, runs the agent, then **escalates on failure**: if the
6
+ produced code doesn't actually pass when re-run, it retries the whole task on the
7
+ next-bigger model. The tier that ultimately solved it is surfaced for the UI badge.
8
+
9
+ Each tier is an independent SmallCodeAgent (its own model + fresh workspace), so
10
+ every model in the ladder uses LiteForge's native tool-calling loop — no parsing
11
+ hacks. All tiers are <=32B to stay hackathon-eligible.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ import re
17
+ from collections.abc import AsyncIterator
18
+ from dataclasses import dataclass, field
19
+
20
+ from . import browsercheck
21
+ from .agent import SmallCodeAgent, Step
22
+ from .config import Preset, SpecialistLadder, SpecialistPreset, Tier, load_preset
23
+ from .judge import judge_correct, judge_enabled
24
+ from .live_run import LiveFrame
25
+ from .preview import find_entry, inline_app
26
+ from .trace_collector import TraceEvent
27
+ from .ui_trace import merge_step_metadata
28
+
29
+ # Signals that a task is non-trivial and worth starting higher up the ladder.
30
+ # Leading \b + trailing \w* so stems match their word family
31
+ # (recursi -> recursive, optimi -> optimize, concurren -> concurrency).
32
+ _HARD_HINTS = re.compile(
33
+ r"\b(class|async|thread|concurren|regex|pars|algorithm|optimi|recursi|"
34
+ r"benchmark|refactor|multiple files|api|server|database|sql|decorator|"
35
+ r"generator|data ?structure|graph|tree|dynamic programming)\w*",
36
+ re.I,
37
+ )
38
+
39
+
40
+ def _route_classifier():
41
+ """The learned routing classifier singleton, or None if unavailable.
42
+
43
+ Importing route_clf pulls in pydantic (and lazily onnxruntime); any failure
44
+ here just means we route with the regex baseline below.
45
+ """
46
+ try:
47
+ from .route_clf import get_classifier
48
+ return get_classifier()
49
+ except Exception:
50
+ return None
51
+
52
+
53
+ def classify_tier(task: str, n_tiers: int) -> int:
54
+ """Pick a starting tier index (0 = smallest). Cheap, transparent heuristic."""
55
+ if n_tiers <= 1:
56
+ return 0
57
+ score = 0
58
+ if len(task) > 280:
59
+ score += 1
60
+ if len(_HARD_HINTS.findall(task)) >= 1:
61
+ score += 1
62
+ if len(_HARD_HINTS.findall(task)) >= 3:
63
+ score += 1
64
+ return min(score, n_tiers - 1)
65
+
66
+
67
+ # --- specialty (language/function) classifier --------------------------------
68
+ # Picks the specialist *family* for a task; classify_tier then picks the size
69
+ # within it. Same cheap, transparent, ordered-regex style as classify_tier.
70
+ # Priority on ties (earlier wins); 'py' is last because it's the safe default.
71
+ # `orchestrate` is first: explicit fan-out language is a strong, specific signal
72
+ # that should win over an incidental language mention.
73
+ _SPECIALTY_ORDER = ("orchestrate", "git", "terraform", "docker", "sql", "powershell",
74
+ "bsd", "rust", "go", "cpp", "java", "dotnet", "csharp", "bash",
75
+ "js", "py")
76
+
77
+ _FENCE_LANG = re.compile(r"```([a-z0-9+#.]+)", re.I)
78
+ _FENCE_TO_SPECIALTY = {
79
+ "python": "py", "py": "py", "pytest": "py",
80
+ "bash": "bash", "sh": "bash", "shell": "bash", "zsh": "bash", "console": "bash",
81
+ "powershell": "powershell", "ps1": "powershell", "pwsh": "powershell",
82
+ "sql": "sql", "psql": "sql", "sqlite": "sql",
83
+ "javascript": "js", "js": "js", "ts": "js", "typescript": "js",
84
+ "jsx": "js", "tsx": "js", "node": "js",
85
+ "go": "go", "golang": "go",
86
+ "rust": "rust", "rs": "rust",
87
+ "cpp": "cpp", "c++": "cpp", "cc": "cpp", "c": "cpp",
88
+ "java": "java",
89
+ "csharp": "csharp", "cs": "csharp",
90
+ "dockerfile": "docker", "docker": "docker",
91
+ "hcl": "terraform", "terraform": "terraform", "tf": "terraform",
92
+ }
93
+
94
+ _EXT_RE = re.compile(r"\.(py|sh|bash|ps1|sql|js|mjs|cjs|ts|tsx|jsx|go|rs|cpp|cc|cxx|"
95
+ r"hpp|java|cs|csproj|tf|dockerfile)\b", re.I)
96
+ _EXT_TO_SPECIALTY = {
97
+ "py": "py", "sh": "bash", "bash": "bash", "ps1": "powershell", "sql": "sql",
98
+ "js": "js", "mjs": "js", "cjs": "js", "ts": "js", "tsx": "js", "jsx": "js",
99
+ "go": "go", "rs": "rust", "cpp": "cpp", "cc": "cpp", "cxx": "cpp", "hpp": "cpp",
100
+ "java": "java", "cs": "csharp", "csproj": "dotnet", "tf": "terraform",
101
+ "dockerfile": "docker",
102
+ }
103
+
104
+ _SPECIALTY_HINTS = {
105
+ # Fan-out / parallel delegation work -> the task_batch specialist.
106
+ "orchestrate": re.compile(r"\b(in parallel|fan ?out|concurrently|task_batch|"
107
+ r"orchestrat|several independent|multiple independent|"
108
+ r"simultaneously|batch of (tasks|jobs))\w*", re.I),
109
+ # NOTE: `staged` requires the trailing 'd' so it does NOT match "stage" inside
110
+ # "multi-stage" (a docker term) — that false-positive misrouted Docker tasks.
111
+ "git": re.compile(r"\b(git|commit|rebase|cherry-?pick|merge conflict|stash|"
112
+ r"\bbranch\b|pull request|\bPR\b|revert|bisect|staged)\w*", re.I),
113
+ "terraform": re.compile(r"\b(terraform|\bhcl\b|\.tf\b|provider|resource block|"
114
+ r"infrastructure as code|\biac\b|tfstate)\w*", re.I),
115
+ "docker": re.compile(r"\b(docker|dockerfile|docker-?compose|container image|"
116
+ r"\bimage\b|\bbuild -t\b|entrypoint)\w*", re.I),
117
+ "sql": re.compile(r"\b(sql|select |insert |update |delete |join|schema|"
118
+ r"\btable\b|\bindex\b|migration|postgres|sqlite|mysql|query)\w*", re.I),
119
+ "powershell": re.compile(r"\b(powershell|pwsh|\.ps1|cmdlet|get-|set-|write-output)\w*", re.I),
120
+ "bsd": re.compile(r"\b(freebsd|openbsd|netbsd|\bbsd\b|pf\.conf|rc\.d|pkg_add)\w*", re.I),
121
+ "rust": re.compile(r"\b(rust|cargo|crate|rustc|\.rs\b|borrow checker|tokio)\w*", re.I),
122
+ "go": re.compile(r"\b(golang|\bgo\b|goroutine|go mod|go test|\.go\b)\w*", re.I),
123
+ "cpp": re.compile(r"\b(c\+\+|cpp|g\+\+|clang|std::|cmake|\.cpp\b|template)\w*", re.I),
124
+ "java": re.compile(r"\b(java|maven|gradle|\bjvm\b|junit|\.java\b)\w*", re.I),
125
+ "dotnet": re.compile(r"\b(\.net|dotnet|nuget|asp\.net|\.csproj|msbuild)\w*", re.I),
126
+ "csharp": re.compile(r"\b(c#|csharp|\blinq\b|\.cs\b|\bxunit\b)\w*", re.I),
127
+ "bash": re.compile(r"\b(shell script|\bbash\b|\bzsh\b|chmod|grep|sed|awk|"
128
+ r"\bpipe\b|cron|stdout|stderr|\$PATH)\w*", re.I),
129
+ "js": re.compile(r"\b(javascript|typescript|node|npm|react|vue|jsx|tsx|"
130
+ r"webpack|vite|eslint|package\.json)\w*", re.I),
131
+ "py": re.compile(r"\b(python|pytest|pandas|numpy|django|flask|pip|venv|"
132
+ r"def |async def|decorator)\w*", re.I),
133
+ }
134
+
135
+
136
+ def classify_specialty(task: str, *, default: str = "py") -> str:
137
+ """Pick the specialist family key for a task. Cheap, transparent, deterministic.
138
+
139
+ Precedence (most explicit signal first): SMALLCODE_SPECIALTY env override ->
140
+ code-fence language tag -> file extensions mentioned -> keyword-cue scoring ->
141
+ default. Mirrors classify_tier's style; pairs with it for 2D routing.
142
+ """
143
+ forced = os.environ.get("SMALLCODE_SPECIALTY")
144
+ if forced:
145
+ return forced.strip().lower()
146
+
147
+ # A fenced code block (```lang) is the single most explicit signal -> hard win.
148
+ for lang in _FENCE_LANG.findall(task):
149
+ s = _FENCE_TO_SPECIALTY.get(lang.lower())
150
+ if s:
151
+ return s
152
+
153
+ # Otherwise SCORE keyword cues AND file-extension mentions together, so a strong
154
+ # action signal (e.g. "rebase ... merge conflict") beats an incidental ".py"
155
+ # filename. Ties broken by _SPECIALTY_ORDER (earlier = higher priority).
156
+ scores = {s: len(rx.findall(task)) for s, rx in _SPECIALTY_HINTS.items()}
157
+ for e in _EXT_RE.findall(task):
158
+ s = _EXT_TO_SPECIALTY.get(e.lower())
159
+ if s:
160
+ scores[s] = scores.get(s, 0) + 1
161
+ best = max(scores, key=lambda s: (scores[s], -_SPECIALTY_ORDER.index(s)))
162
+ if scores[best] > 0:
163
+ return best
164
+
165
+ return default
166
+
167
+
168
+ @dataclass
169
+ class RouteResult:
170
+ final: str
171
+ steps: list[Step]
172
+ tier_name: str
173
+ tier_model: str
174
+ start_tier: str
175
+ escalations: int
176
+ verified: bool
177
+ specialty: str = "general"
178
+ files: dict[str, str] = field(default_factory=dict)
179
+ trace_events: list[TraceEvent] = field(default_factory=list)
180
+ agent: SmallCodeAgent | None = None
181
+
182
+
183
+ def _smoke_command(files: list[str]) -> str | None:
184
+ """A best-effort 'does it build/run (and pass any tests)?' shell command for a
185
+ NON-Python solution, or None if the language isn't recognized. Mirrors the
186
+ per-specialty run commands (finetune/specialties.py) so the router can escalate
187
+ on go/rust/js/sql/… exactly like it does on Python via run_python."""
188
+ def ext(e: str) -> list[str]:
189
+ return [f for f in files if f.endswith(e)]
190
+
191
+ if ext(".go"):
192
+ if any(f.endswith("_test.go") for f in files):
193
+ return "go test ./... 2>&1"
194
+ return "go run . 2>&1 || go run *.go 2>&1"
195
+ if "Cargo.toml" in files:
196
+ return "cargo test -q 2>&1 || cargo build -q 2>&1"
197
+ if ext(".rs"):
198
+ return f"rustc {ext('.rs')[0]} -o /tmp/_smv 2>&1 && /tmp/_smv"
199
+ js = ext(".js") + ext(".mjs") + ext(".cjs") + ext(".ts")
200
+ if "package.json" in files:
201
+ return "npm test --silent 2>&1 || node --test 2>&1"
202
+ if js:
203
+ if any(".test." in f or ".spec." in f for f in js):
204
+ return "node --test 2>&1"
205
+ entry = next((f for f in js if f in ("index.js", "main.js")), js[0])
206
+ return f"node {entry} 2>&1"
207
+ if ext(".sql"):
208
+ return f"sqlite3 :memory: < {ext('.sql')[0]} 2>&1"
209
+ if ext(".cpp") or ext(".cc"):
210
+ srcs = " ".join(ext(".cpp") + ext(".cc"))
211
+ return f"g++ -std=c++17 {srcs} -o /tmp/_smv 2>&1 && /tmp/_smv"
212
+ if ext(".java"):
213
+ main = "Main" if "Main.java" in files else ext(".java")[0][:-5]
214
+ return f"javac *.java 2>&1 && java {main} 2>&1"
215
+ if ext(".sh"):
216
+ return f"bash {ext('.sh')[0]} 2>&1"
217
+ if ext(".tf"):
218
+ return "terraform init -backend=false 2>&1 && terraform validate 2>&1"
219
+ if "Program.cs" in files or ext(".cs"):
220
+ return "dotnet run 2>&1"
221
+ return None
222
+
223
+
224
+ def _verify(agent: SmallCodeAgent) -> bool | None:
225
+ """Independently check the agent's output actually works.
226
+
227
+ Returns True/False if there's something runnable to check, else None
228
+ (unverifiable — don't escalate purely on a missing signal). Python uses the
229
+ pytest/run_python fast paths; other languages smoke-run via run_shell so the
230
+ specialist router escalates on a broken go/rust/sql/… solution instead of
231
+ silently accepting the smallest tier.
232
+ """
233
+ ws = agent.workspace
234
+ files = ws.list_files()
235
+ pys = [f for f in files if f.endswith(".py")]
236
+ if pys:
237
+ if any("test" in f.lower() for f in pys):
238
+ return ws.run_tests().ok
239
+ entry = next((f for f in pys if f in ("main.py", "solution.py")), None) or pys[0]
240
+ return ws.run_python(path=entry).ok
241
+ # Web app (index.html + browser JS): render it in a real browser — must come
242
+ # BEFORE the shell smoke-run so we don't `node` browser-side JS. Same signal
243
+ # smolbuilder's WebBuilder uses (engine/builder._evaluate).
244
+ web_files = agent.files()
245
+ if find_entry(web_files) is not None:
246
+ ok, _errors = browsercheck.check_html(inline_app(web_files))
247
+ return ok
248
+ cmd = _smoke_command(files)
249
+ if cmd is not None:
250
+ return ws.run_shell(cmd, timeout=90).ok
251
+ return None
252
+
253
+
254
+ def _build_result(agent: SmallCodeAgent, final: str, steps: list[Step], tier: Tier,
255
+ start_name: str, escalations: int, verified: bool,
256
+ specialty: str = "general") -> RouteResult:
257
+ events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
258
+ return RouteResult(
259
+ final=final, steps=steps, tier_name=tier.name, tier_model=tier.model,
260
+ start_tier=start_name, escalations=escalations, verified=verified,
261
+ specialty=specialty, files=agent.files(), trace_events=events, agent=agent,
262
+ )
263
+
264
+
265
+ # Difficulty buckets the tier head predicts (matches route_clf.TIER_BUCKETS). Kept as
266
+ # a local constant so router.py imports even when route_clf's deps (pydantic) are
267
+ # absent. The bucket drives BOTH the thinking level and the start-tier clamp, so it's
268
+ # decoupled from the ladder length — think stays meaningful even for a pinned 1-tier
269
+ # preset.
270
+ _THINK_BUCKETS = 3
271
+
272
+
273
+ class Router:
274
+ def __init__(
275
+ self,
276
+ preset: Preset | None = None,
277
+ max_steps: int = 12,
278
+ approval_handler=None,
279
+ workspace_dir: str | None = None,
280
+ think: str = "off",
281
+ yolo: bool = False,
282
+ agent: str = "build",
283
+ size_floor: str | None = None,
284
+ ) -> None:
285
+ self.preset = preset or load_preset()
286
+ self.tiers: list[Tier] = self.preset.tiers
287
+ self.max_steps = max_steps
288
+ self.approval_handler = approval_handler
289
+ self.workspace_dir = workspace_dir
290
+ self.think = think
291
+ self.yolo = yolo
292
+ self.agent_name = agent
293
+ # "Auto · <size>" pins the START rung to this specialist size (e.g. "3b") while
294
+ # the router still picks the specialty and escalation still climbs the ladder.
295
+ self.size_floor = size_floor
296
+
297
+ async def run(self, task: str) -> RouteResult:
298
+ result: RouteResult | None = None
299
+ async for frame in self.run_live(task):
300
+ if frame.done and isinstance(frame.result, RouteResult):
301
+ result = frame.result
302
+ assert result is not None
303
+ return result
304
+
305
+ def _ladder_for(self, task: str, specialty: str | None = None) -> SpecialistLadder:
306
+ """The size ladder for this task's specialty (generic if not a matrix preset).
307
+
308
+ `specialty` may be supplied by the learned classifier; falls back to the
309
+ regex classify_specialty when not given.
310
+ """
311
+ if isinstance(self.preset, SpecialistPreset):
312
+ if specialty is None:
313
+ specialty = classify_specialty(task)
314
+ return self.preset.ladder_for(specialty)
315
+ return SpecialistLadder(specialty="general", tiers=self.preset.tiers)
316
+
317
+ def _size_floor_index(self, tiers: list[Tier], size_floor: str) -> int:
318
+ """Start-rung index for an 'Auto · <size>' pin: the first ladder tier whose
319
+ size is >= the floor (closest available, then escalates). Falls back to 0."""
320
+ from .config import parse_size_b
321
+ target = parse_size_b(size_floor if str(size_floor).lower().endswith("b")
322
+ else f"{size_floor}b")
323
+ if target <= 0:
324
+ return 0
325
+ for i, t in enumerate(tiers):
326
+ if parse_size_b(t.model) >= target:
327
+ return i
328
+ return max(len(tiers) - 1, 0)
329
+
330
+ def _route(self, task: str) -> tuple[SpecialistLadder, int, str]:
331
+ """Pick (ladder, start-tier index, thinking level) for a task.
332
+
333
+ Uses the learned RouteClassifier when it's confident; otherwise the regex
334
+ baseline. A difficulty bucket (decoupled from ladder length) drives both the
335
+ start rung and the thinking level. `size_floor` (Auto · <size>) overrides the
336
+ start rung; an explicit user `/think` (anything but the default "off") wins.
337
+ """
338
+ clf = _route_classifier()
339
+ has_clf = clf is not None and clf.available
340
+
341
+ # 1. specialty -> size ladder
342
+ if has_clf and isinstance(self.preset, SpecialistPreset):
343
+ specialty = clf.pick_specialty(task, list(self.preset.ladders))[0]
344
+ ladder = self._ladder_for(task, specialty=specialty)
345
+ else:
346
+ ladder = self._ladder_for(task)
347
+ tiers = ladder.tiers
348
+
349
+ # 2. difficulty bucket (0..TIER_BUCKETS-1) + escalation hint
350
+ if has_clf:
351
+ bucket = clf.pick_tier(task, _THINK_BUCKETS)[0]
352
+ esc = clf.pick_escalate(task)[0]
353
+ else:
354
+ bucket = classify_tier(task, _THINK_BUCKETS)
355
+ esc = False
356
+
357
+ # 3. start rung: an explicit size floor wins; else the difficulty bucket
358
+ if self.size_floor:
359
+ start = self._size_floor_index(tiers, self.size_floor)
360
+ else:
361
+ start = min(bucket, max(len(tiers) - 1, 0))
362
+
363
+ # 4. thinking level: explicit /think wins; else router-derived (clf only)
364
+ if self.think != "off":
365
+ think = self.think
366
+ elif has_clf:
367
+ think = clf.think_for(bucket, _THINK_BUCKETS, esc)
368
+ else:
369
+ think = "off"
370
+ return ladder, start, think
371
+
372
+ async def run_live(
373
+ self,
374
+ task: str,
375
+ *,
376
+ rust_session=None,
377
+ ) -> AsyncIterator[LiveFrame]:
378
+ """Yield live frames while routing; final frame carries RouteResult."""
379
+ ladder, start, think = self._route(task)
380
+ specialty = ladder.specialty
381
+ tiers = ladder.tiers
382
+ escalations = 0
383
+ last: RouteResult | None = None
384
+ prev_tier_name: str | None = None
385
+
386
+ for idx in range(start, len(tiers)):
387
+ tier = tiers[idx]
388
+ if prev_tier_name is not None:
389
+ yield LiveFrame(events=[
390
+ TraceEvent(kind="tier_escalation", name=tier.name,
391
+ detail=f"escalated from {prev_tier_name}"),
392
+ ])
393
+ # The start tier reuses the caller's session; make it run the ROUTED model
394
+ # (not whatever the UI last pinned), so "Auto" honors the router's pick and
395
+ # a concrete pin (single-tier ladder) runs exactly that model.
396
+ if idx == start and rust_session is not None:
397
+ try:
398
+ rust_session.set_model(tier.model)
399
+ except Exception:
400
+ pass
401
+ agent = SmallCodeAgent(
402
+ preset=self.preset,
403
+ model=tier.model,
404
+ max_steps=self.max_steps,
405
+ approval_handler=self.approval_handler,
406
+ workspace_dir=self.workspace_dir,
407
+ agent=self.agent_name,
408
+ yolo=self.yolo,
409
+ rust_session=rust_session if idx == start else None,
410
+ )
411
+ async for frame in agent.run_live_turn(
412
+ task, think=think, yolo=self.yolo,
413
+ ):
414
+ if not frame.done:
415
+ yield frame
416
+ continue
417
+ final, steps = frame.result
418
+ ok = False if (agent.hit_max_steps or agent.errored) else _verify(agent)
419
+ # _verify only proves the code RAN, not that it's correct. If it ran
420
+ # clean (ok is True) but a bigger tier exists, ask a judge whether the
421
+ # solution actually satisfies the task; a concrete "no" -> escalate.
422
+ if ok is True and idx < len(tiers) - 1 and judge_enabled():
423
+ correct = await judge_correct(
424
+ self.preset, tiers[idx + 1].model, task, agent.files(), final,
425
+ )
426
+ if not correct:
427
+ ok = False
428
+ last = _build_result(
429
+ agent, final, steps, tier, tiers[start].name,
430
+ escalations, bool(ok), specialty=specialty,
431
+ )
432
+ if ok is not False:
433
+ yield LiveFrame(
434
+ steps=steps,
435
+ events=last.trace_events,
436
+ files=last.files,
437
+ done=True,
438
+ result=last,
439
+ )
440
+ return
441
+ if idx < len(tiers) - 1:
442
+ agent.trace_collector.record_escalation(tier.name, tiers[idx + 1].name)
443
+ agent.cleanup()
444
+ escalations += 1
445
+ prev_tier_name = tier.name
446
+ break
447
+
448
+ if last is not None:
449
+ yield LiveFrame(
450
+ steps=last.steps,
451
+ events=last.trace_events,
452
+ files=last.files,
453
+ done=True,
454
+ result=last,
455
+ )
engine/rust_session.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Python facade over the Rust smolcode agent engine (smolcode_core)."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import json
6
+ import os
7
+ import tempfile
8
+ from collections.abc import Awaitable, Callable
9
+ from dataclasses import dataclass, field
10
+ from typing import Any
11
+
12
+ from .trace_collector import TraceCollector, TraceEvent
13
+
14
+ try:
15
+ import smolcode_core as _rust
16
+ except ImportError:
17
+ _rust = None # type: ignore
18
+
19
+
20
+ def rust_available() -> bool:
21
+ return _rust is not None
22
+
23
+
24
+ ApprovalHandler = Callable[[str], Awaitable[bool]]
25
+
26
+
27
+ @dataclass
28
+ class RustRunResult:
29
+ final: str
30
+ hit_max_steps: bool = False
31
+ errored: bool = False
32
+
33
+
34
+ class RustSession:
35
+ """Thin wrapper around smolcode_core.Session."""
36
+
37
+ def __init__(
38
+ self,
39
+ *,
40
+ workspace: str | None = None,
41
+ agent: str = "build",
42
+ yolo: bool = False,
43
+ model: str | None = None,
44
+ base_url: str | None = None,
45
+ api_key: str | None = None,
46
+ profile: str = "full",
47
+ approval_handler: ApprovalHandler | None = None,
48
+ ) -> None:
49
+ if _rust is None:
50
+ raise RuntimeError(
51
+ "smolcode_core is not installed; build with "
52
+ "`maturin develop --release` in smolcode-cli/crates/smolcode-py"
53
+ )
54
+ if workspace is None:
55
+ workspace = os.environ.get(
56
+ "SMALLCODE_WORKSPACE",
57
+ tempfile.mkdtemp(prefix="smolcode-"),
58
+ )
59
+ self._session = _rust.Session(
60
+ workspace=workspace,
61
+ agent=agent,
62
+ yolo=yolo,
63
+ model=model,
64
+ base_url=base_url,
65
+ api_key=api_key,
66
+ profile=profile,
67
+ )
68
+ self.trace_collector = TraceCollector()
69
+ self.approval_handler = approval_handler
70
+ self.hit_max_steps = False
71
+ self.errored = False
72
+ self._steps: list[dict[str, Any]] = []
73
+ self._final: str = ""
74
+ self._cancelled = False
75
+
76
+ def request_cancel(self) -> None:
77
+ self._cancelled = True
78
+ self.cancel_turn()
79
+
80
+ @property
81
+ def cancelled(self) -> bool:
82
+ return self._cancelled
83
+
84
+ def clear_cancel(self) -> None:
85
+ self._cancelled = False
86
+
87
+ @property
88
+ def session_id(self) -> str:
89
+ return self._session.session_id
90
+
91
+ @property
92
+ def workspace_path(self) -> str:
93
+ return self._session.workspace()
94
+
95
+ def set_model(self, model: str) -> None:
96
+ self._session.set_model(model)
97
+
98
+ def set_agent(self, agent: str) -> None:
99
+ self._session.set_agent(agent)
100
+
101
+ def set_think(self, level: str) -> None:
102
+ self._session.set_think(level)
103
+
104
+ def register_tool(self, name: str, fn: Callable[[dict], dict]) -> None:
105
+ self._session.register_tool(name, fn)
106
+
107
+ def files(self) -> dict[str, str]:
108
+ out: dict[str, str] = {}
109
+ for path in self._session.workspace_files():
110
+ content = self._session.read_file(path)
111
+ if content is not None:
112
+ out[path] = content
113
+ return out
114
+
115
+ def run_shell(self, command: str) -> str:
116
+ return self._session.run_shell(command)
117
+
118
+ async def run(
119
+ self,
120
+ task: str,
121
+ *,
122
+ think: str | None = None,
123
+ yolo: bool | None = None,
124
+ ) -> RustRunResult:
125
+ """Run one agent turn to completion."""
126
+ self.hit_max_steps = False
127
+ self.errored = False
128
+ self._final = ""
129
+ self.clear_cancel()
130
+ self._session.start_turn(task, think=think, yolo=yolo)
131
+ final_text = ""
132
+ while True:
133
+ if self._cancelled:
134
+ break
135
+ ev = await asyncio.to_thread(self._session.poll_event)
136
+ if ev is None:
137
+ await asyncio.sleep(0.05)
138
+ continue
139
+ kind = ev.get("kind")
140
+ if kind == "approval":
141
+ approved = True
142
+ if self.approval_handler is not None:
143
+ approved = await self.approval_handler(ev.get("desc", ""))
144
+ elif not (yolo if yolo is not None else False):
145
+ approved = False
146
+ self._session.approve(approved)
147
+ continue
148
+ self._ingest_event(ev)
149
+ if kind == "final":
150
+ final_text = ev.get("text", "")
151
+ if kind == "done":
152
+ break
153
+ if kind == "error":
154
+ self.errored = True
155
+ self._final = final_text
156
+ if "step" in self._final.lower() and "without finishing" in self._final.lower():
157
+ self.hit_max_steps = True
158
+ self._session.record_turn(task, final_text)
159
+ return RustRunResult(
160
+ final=final_text,
161
+ hit_max_steps=self.hit_max_steps,
162
+ errored=self.errored,
163
+ )
164
+
165
+ async def poll_events_once(self) -> list[dict[str, Any]]:
166
+ """Non-blocking poll for live UI updates during a turn."""
167
+ events: list[dict[str, Any]] = []
168
+ while True:
169
+ ev = await asyncio.to_thread(self._session.poll_event)
170
+ if ev is None:
171
+ break
172
+ kind = ev.get("kind")
173
+ if kind == "approval":
174
+ approved = True
175
+ if self.approval_handler is not None:
176
+ approved = await self.approval_handler(ev.get("desc", ""))
177
+ self._session.approve(approved)
178
+ continue
179
+ self._ingest_event(ev)
180
+ events.append(ev)
181
+ if kind in ("done",):
182
+ break
183
+ return events
184
+
185
+ def _ingest_event(self, ev: dict[str, Any]) -> None:
186
+ kind = ev.get("kind")
187
+ if kind == "tool_call":
188
+ args_raw = ev.get("args", "{}")
189
+ try:
190
+ args = json.loads(args_raw) if isinstance(args_raw, str) else args_raw
191
+ except json.JSONDecodeError:
192
+ args = {"raw": args_raw}
193
+ self.trace_collector.record_tool_call(ev.get("name", ""), args)
194
+ elif kind == "tool_result":
195
+ text = ev.get("text", "")
196
+ try:
197
+ result = json.loads(text)
198
+ except json.JSONDecodeError:
199
+ result = {"output": text}
200
+ self.trace_collector.record_tool_result(ev.get("name", ""), result)
201
+ elif kind == "final":
202
+ self.trace_collector.record_final(ev.get("text", ""))
203
+ elif kind == "error":
204
+ self.trace_collector.record_error(ev.get("text", ""))
205
+ self.errored = True
206
+
207
+ def save(self) -> None:
208
+ self._session.save()
209
+
210
+ @staticmethod
211
+ def list_sessions() -> list[dict[str, Any]]:
212
+ if _rust is None:
213
+ return []
214
+ return _rust.Session.list_sessions()
215
+
216
+ def load_session(self, session_id: str) -> bool:
217
+ return self._session.load_session(session_id)
218
+
219
+ def fork(self) -> str | None:
220
+ return self._session.fork()
221
+
222
+ def rename(self, title: str) -> bool:
223
+ return self._session.rename(title)
224
+
225
+ def delete(self) -> bool:
226
+ return self._session.delete()
227
+
228
+ def cancel_turn(self) -> None:
229
+ self._session.cancel_turn()
230
+
231
+ def render_config(self) -> str:
232
+ return self._session.render_config()
233
+
234
+
235
+ def render_config(session: RustSession) -> str:
236
+ return session.render_config()
237
+
238
+
239
+ def apply_settings(session: RustSession, settings: Any) -> None:
240
+ """Apply UI settings to a live Rust session before each agent turn.
241
+
242
+ The "auto" / "auto:<size>" pseudo-selections are NOT real model tags — the Router
243
+ picks the model and sets it on the session (see router.run_live), so we must not
244
+ push them via set_model. Only concrete pins are applied here.
245
+ """
246
+ session.set_think(settings.think)
247
+ model = settings.model or ""
248
+ if model and model != "auto" and not model.startswith("auto:"):
249
+ session.set_model(model)
250
+ session.set_agent(settings.agent)
251
+
252
+
253
+ def list_commands(workspace: str) -> list[str]:
254
+ if _rust is None:
255
+ return []
256
+ return _rust.list_commands(workspace)
257
+
258
+
259
+ def expand_command(workspace: str, name: str, args: str = "") -> str | None:
260
+ if _rust is None:
261
+ return None
262
+ return _rust.expand_command(workspace, name, args)
263
+
264
+
265
+ def list_rules(workspace: str) -> list[dict[str, Any]]:
266
+ if _rust is None:
267
+ return []
268
+ return _rust.list_rules(workspace)
269
+
270
+
271
+ def list_skills(workspace: str) -> list[dict[str, Any]]:
272
+ if _rust is None:
273
+ return []
274
+ return _rust.list_skills(workspace)
275
+
276
+
277
+ def expand_skill(workspace: str, name: str, args: str = "") -> str | None:
278
+ if _rust is None:
279
+ return None
280
+ return _rust.expand_skill(workspace, name, args)
281
+
282
+
283
+ def list_mcp(session: RustSession) -> list[dict[str, Any]]:
284
+ return session._session.list_mcp()
285
+
286
+
287
+ def list_background_jobs() -> str:
288
+ if _rust is None:
289
+ return ""
290
+ return _rust.list_background_jobs()
291
+
292
+
293
+ def write_agents_md(workspace: str) -> str:
294
+ if _rust is None:
295
+ raise RuntimeError("smolcode_core not installed")
296
+ return _rust.write_agents_md(workspace)
297
+
298
+
299
+ def git_status(workspace: str) -> str:
300
+ if _rust is None:
301
+ return ""
302
+ return _rust.git_status(workspace)
303
+
304
+
305
+ def workspace_tree(workspace: str, depth: int = 3) -> str:
306
+ if _rust is None:
307
+ return ""
308
+ return _rust.workspace_tree(workspace, depth=depth)
309
+
310
+
311
+ UI_FILE_LIMIT = 1500
312
+ AUTOCOMPLETE_FILE_LIMIT = 200
313
+
314
+
315
+ ATTACH_FILE_MAX_BYTES = 8192
316
+
317
+
318
+ def read_workspace_file(
319
+ workspace: str,
320
+ path: str,
321
+ *,
322
+ max_bytes: int = ATTACH_FILE_MAX_BYTES,
323
+ rust: RustSession | None = None,
324
+ ) -> str | None:
325
+ """Read a workspace file for @-attachment inlining. Returns None if missing."""
326
+ if _rust is None:
327
+ return None
328
+ try:
329
+ session = rust if rust is not None else RustSession(workspace=workspace, yolo=True)
330
+ content = session._session.read_file(path)
331
+ if content is None:
332
+ return None
333
+ if len(content) > max_bytes:
334
+ return content[:max_bytes] + "\n… (truncated)"
335
+ return content
336
+ except Exception:
337
+ return None
338
+
339
+
340
+ def workspace_paths(workspace: str, *, limit: int = UI_FILE_LIMIT) -> tuple[list[str], int]:
341
+ """Workspace paths for UI sidebars (no file reads). Returns (paths, total_count)."""
342
+ if _rust is None:
343
+ return [], 0
344
+ session = RustSession(workspace=workspace, yolo=True)
345
+ paths = sorted(session._session.workspace_files())
346
+ total = len(paths)
347
+ if total > limit:
348
+ paths = paths[:limit]
349
+ return paths, total
350
+
351
+
352
+ def workspace_files(workspace: str) -> dict[str, str]:
353
+ session = RustSession(workspace=workspace, yolo=True)
354
+ return session.files()
355
+
356
+
357
+ def export_transcript(session_id: str, path: str | None = None) -> str:
358
+ if _rust is None:
359
+ raise RuntimeError("smolcode_core not installed")
360
+ return _rust.export_transcript(session_id, path)
361
+
362
+
363
+ def session_timeline(session_id: str) -> list[str]:
364
+ if _rust is None:
365
+ return []
366
+ return _rust.session_timeline(session_id)
367
+
368
+
369
+ def get_session_chat(session_id: str) -> list[dict[str, str]]:
370
+ if _rust is None:
371
+ return []
372
+ return _rust.get_session_chat(session_id)
373
+
374
+
375
+ def chat_from_stored(lines: list[dict[str, str]]) -> list[dict[str, str]]:
376
+ """Convert stored session lines to Gradio chat messages."""
377
+ out: list[dict[str, str]] = []
378
+ for m in lines:
379
+ role = m.get("role", "assistant")
380
+ text = m.get("text", "")
381
+ if role == "user":
382
+ out.append({"role": "user", "content": text})
383
+ else:
384
+ out.append({"role": "assistant", "content": text})
385
+ return out
386
+
387
+
388
+ def session_choices() -> list[str]:
389
+ """Dropdown labels: `title (id)`."""
390
+ return [
391
+ f"{r['title']} ({r['id']})"
392
+ for r in RustSession.list_sessions()
393
+ ]
394
+
395
+
396
+ def parse_session_label(label: str) -> str | None:
397
+ if not label or "(" not in label:
398
+ return None
399
+ return label.rsplit("(", 1)[-1].rstrip(")")
400
+
401
+
402
+ def load_rust_config(
403
+ *,
404
+ model: str | None = None,
405
+ base_url: str | None = None,
406
+ api_key: str | None = None,
407
+ agent: str | None = None,
408
+ yolo: bool = False,
409
+ ) -> dict[str, Any]:
410
+ """Load layered config.toml via Rust Config."""
411
+ if _rust is None:
412
+ return {}
413
+ cfg = _rust.Config.load(
414
+ model=model,
415
+ base_url=base_url,
416
+ api_key=api_key,
417
+ agent=agent,
418
+ yolo=yolo,
419
+ )
420
+ return {
421
+ "model": cfg.model,
422
+ "base_url": cfg.base_url,
423
+ "agent": cfg.agent,
424
+ "yolo": cfg.yolo,
425
+ }
engine/sandbox.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Execution sandbox for model-generated code.
2
+
3
+ This is the agentic core's "hands": it runs code the model writes and reports
4
+ back stdout/stderr/exit so the agent can iterate to green.
5
+
6
+ SECURITY: model-generated code is untrusted. The default here is a *soft*
7
+ sandbox — a subprocess with a wall-clock timeout, a scratch working directory,
8
+ and output caps. It is adequate for local/laptop use. Before exposing a public
9
+ HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an
10
+ e2b/Docker microVM); the interface below does not change.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import shutil
16
+ import subprocess
17
+ import tempfile
18
+ from dataclasses import dataclass
19
+ from pathlib import Path
20
+
21
+ DEFAULT_TIMEOUT = 20 # seconds
22
+ MAX_OUTPUT = 20_000 # chars per stream, to keep the LLM context bounded
23
+
24
+
25
+ @dataclass
26
+ class RunResult:
27
+ ok: bool
28
+ stdout: str
29
+ stderr: str
30
+ exit_code: int
31
+ timed_out: bool = False
32
+
33
+ def as_tool_payload(self) -> dict:
34
+ """Compact dict handed back to the LLM as the tool result."""
35
+ return {
36
+ "ok": self.ok,
37
+ "exit_code": self.exit_code,
38
+ "timed_out": self.timed_out,
39
+ "stdout": _clip(self.stdout),
40
+ "stderr": _clip(self.stderr),
41
+ }
42
+
43
+
44
+ def _clip(s: str, limit: int = MAX_OUTPUT) -> str:
45
+ if len(s) <= limit:
46
+ return s
47
+ return s[:limit] + f"\n...[truncated {len(s) - limit} chars]"
48
+
49
+
50
+ class Workspace:
51
+ """A scratch directory the agent reads/writes/executes within.
52
+
53
+ All file tools are confined to this directory; paths are resolved and
54
+ checked so the model cannot escape via `..` or absolute paths.
55
+ """
56
+
57
+ def __init__(self, root: str | None = None) -> None:
58
+ self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-"))
59
+ self.root.mkdir(parents=True, exist_ok=True)
60
+
61
+ # --- path safety -----------------------------------------------------
62
+ def _resolve(self, rel: str) -> Path:
63
+ p = (self.root / rel).resolve()
64
+ if not str(p).startswith(str(self.root.resolve())):
65
+ raise ValueError(f"path escapes workspace: {rel!r}")
66
+ return p
67
+
68
+ # --- file ops --------------------------------------------------------
69
+ def write_file(self, path: str, content: str) -> dict:
70
+ p = self._resolve(path)
71
+ p.parent.mkdir(parents=True, exist_ok=True)
72
+ p.write_text(content)
73
+ return {"ok": True, "path": path, "bytes": len(content.encode())}
74
+
75
+ def read_file(self, path: str) -> dict:
76
+ p = self._resolve(path)
77
+ if not p.exists():
78
+ return {"ok": False, "error": "not found", "path": path}
79
+ return {"ok": True, "path": path, "content": _clip(p.read_text())}
80
+
81
+ def list_files(self) -> list[str]:
82
+ return sorted(
83
+ str(p.relative_to(self.root))
84
+ for p in self.root.rglob("*")
85
+ if p.is_file()
86
+ )
87
+
88
+ # --- execution -------------------------------------------------------
89
+ def run_python(self, code: str | None = None, path: str | None = None,
90
+ timeout: int = DEFAULT_TIMEOUT) -> RunResult:
91
+ if path:
92
+ target = self._resolve(path)
93
+ argv = ["python3", str(target)]
94
+ else:
95
+ f = self._resolve("_snippet.py")
96
+ f.write_text(code or "")
97
+ argv = ["python3", str(f)]
98
+ return self._run(argv, timeout)
99
+
100
+ def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
101
+ # pytest if available, falling back to unittest discovery.
102
+ argv = ["python3", "-m", "pytest", "-q"]
103
+ return self._run(argv, timeout)
104
+
105
+ def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
106
+ """Run a shell command in the workspace (login shell for full PATH).
107
+
108
+ Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the
109
+ same way run_python checks Python. Mirrors the Rust agent's run_shell and the
110
+ eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`.
111
+ """
112
+ return self._run(["bash", "-lc", command], timeout)
113
+
114
+ def _run(self, argv: list[str], timeout: int) -> RunResult:
115
+ env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}
116
+ try:
117
+ proc = subprocess.run(
118
+ argv,
119
+ cwd=self.root,
120
+ env=env,
121
+ capture_output=True,
122
+ text=True,
123
+ timeout=timeout,
124
+ )
125
+ return RunResult(
126
+ ok=proc.returncode == 0,
127
+ stdout=proc.stdout,
128
+ stderr=proc.stderr,
129
+ exit_code=proc.returncode,
130
+ )
131
+ except subprocess.TimeoutExpired as e:
132
+ return RunResult(
133
+ ok=False,
134
+ stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""),
135
+ stderr=f"timed out after {timeout}s",
136
+ exit_code=124,
137
+ timed_out=True,
138
+ )
139
+
140
+ def cleanup(self) -> None:
141
+ shutil.rmtree(self.root, ignore_errors=True)
engine/themes.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Web UI color themes aligned with the CLI TUI palettes."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class WebTheme:
9
+ name: str
10
+ bg: str
11
+ panel: str
12
+ bg_alt: str
13
+ accent: str
14
+ fg: str
15
+ dim: str
16
+ ok: str
17
+ tool: str
18
+ border: str
19
+ hf_yellow: str = "#FFD21E"
20
+
21
+
22
+ WEB_THEMES: list[WebTheme] = [
23
+ WebTheme("smol-dark", "#0b1020", "#111827", "#1e293b", "#7c3aed", "#e2e8f0", "#64748b", "#34d399", "#a78bfa", "#334155"),
24
+ WebTheme("tokyo", "#1a1b26", "#24283b", "#1f2335", "#7dcfff", "#c0caf5", "#565f89", "#bb9af7", "#7dcfff", "#414868"),
25
+ WebTheme("gruvbox", "#282828", "#32302f", "#3c3836", "#fe8019", "#ebdbb2", "#928374", "#b8bb26", "#83a598", "#504945"),
26
+ WebTheme("mono", "#161616", "#1e1e1e", "#222222", "#e0e0e0", "#c0c0c0", "#707070", "#ffffff", "#a0a0a0", "#404040"),
27
+ WebTheme("catppuccin", "#1e1e2e", "#313244", "#313244", "#cba6f7", "#cdd6f4", "#6c7086", "#a6e3a1", "#89b4fa", "#45475a"),
28
+ WebTheme("nord", "#2e3440", "#3b4252", "#3b4252", "#88c0d0", "#eceff4", "#4c566a", "#a3be8c", "#81a1c1", "#3b4252"),
29
+ WebTheme("dracula", "#282a36", "#44475a", "#282a36", "#bd93f9", "#f8f8f2", "#6272a4", "#50fa7b", "#8be9fd", "#44475a"),
30
+ WebTheme("solarized", "#002b36", "#073642", "#073642", "#268bd2", "#839496", "#586e75", "#859900", "#2aa198", "#073642"),
31
+ ]
32
+
33
+
34
+ def theme_names() -> list[str]:
35
+ return [t.name for t in WEB_THEMES]
36
+
37
+
38
+ def theme_by_name(name: str) -> WebTheme:
39
+ for t in WEB_THEMES:
40
+ if t.name == name:
41
+ return t
42
+ return WEB_THEMES[0]
43
+
44
+
45
+ def theme_at(index: int) -> WebTheme:
46
+ return WEB_THEMES[index % len(WEB_THEMES)]
47
+
48
+
49
+ def theme_css_vars() -> str:
50
+ """Per-theme CSS variable overrides for .sc-tui-shell[data-theme=...]."""
51
+ blocks: list[str] = []
52
+ for t in WEB_THEMES:
53
+ blocks.append(
54
+ f'.sc-tui-shell[data-theme="{t.name}"] {{'
55
+ f" --sc-bg:{t.bg}; --sc-panel:{t.panel}; --sc-bg-alt:{t.bg_alt};"
56
+ f" --sc-accent:{t.accent}; --sc-fg:{t.fg}; --sc-dim:{t.dim};"
57
+ f" --sc-ok:{t.ok}; --sc-tool:{t.tool}; --sc-border:{t.border};"
58
+ f" --hf-yellow:{t.hf_yellow}; }}"
59
+ )
60
+ return "\n".join(blocks)
engine/tools.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Coding tools exposed to the LiteForge agent.
2
+
3
+ Each tool is a Python callable registered via `liteforge.create_tool`. The agent
4
+ (running in Rust) decides when to call them; LiteForge invokes the callable with
5
+ a single `dict` of arguments and feeds the returned JSON-able dict back to the
6
+ model. All file/exec tools are confined to one `Workspace`.
7
+
8
+ Tool surface (kept deliberately small so a 3B model can use it reliably):
9
+ write_file(path, content) -> create/overwrite a file
10
+ read_file(path) -> read a file back
11
+ list_files() -> list workspace files
12
+ run_python(path) -> execute a file, return stdout/stderr/exit
13
+ run_tests() -> run pytest in the workspace
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import liteforge as lf
18
+
19
+ from . import browsercheck
20
+ from .preview import inline_app
21
+ from .sandbox import Workspace
22
+ from .trace_collector import TraceCollector
23
+
24
+
25
+ def _wrap(name: str, fn, collector: TraceCollector | None):
26
+ if collector is None:
27
+ return fn
28
+
29
+ def wrapped(args: dict):
30
+ collector.record_tool_call(name, args)
31
+ result = fn(args)
32
+ collector.record_tool_result(name, result)
33
+ return result
34
+
35
+ return wrapped
36
+
37
+
38
+ # Tool names in the order _tools() returns them — lets a registry select a
39
+ # subset by name without relying on attributes of the opaque lf tool object.
40
+ _TOOL_ORDER = ("write_file", "read_file", "list_files", "run_python", "run_tests")
41
+
42
+ # Tools the web builder needs. Static apps are "verified" by rendering, not by
43
+ # running Python, so we drop run_python/run_tests — a smaller, less confusing
44
+ # surface for a 3B model that should be writing HTML, not spawning processes.
45
+ _WEB_TOOLS = ("write_file", "read_file", "list_files")
46
+
47
+
48
+ def _registry(workspace: Workspace, names, collector: TraceCollector | None = None) -> lf.ToolRegistry:
49
+ reg = lf.ToolRegistry()
50
+ for name, tool in zip(_TOOL_ORDER, _tools(workspace, collector)):
51
+ if name in names:
52
+ reg.register(tool)
53
+ return reg
54
+
55
+
56
+ def build_registry(workspace: Workspace, collector: TraceCollector | None = None) -> lf.ToolRegistry:
57
+ """Return a ToolRegistry of all coding tools bound to `workspace`."""
58
+ return _registry(workspace, _TOOL_ORDER, collector)
59
+
60
+
61
+ def build_web_registry(workspace: Workspace, collector: TraceCollector | None = None) -> lf.ToolRegistry:
62
+ """Return the smolbuilder web agent's tools: file ops + a headless app check."""
63
+ reg = _registry(workspace, _WEB_TOOLS, collector)
64
+ reg.register(_check_app_tool(workspace, collector))
65
+ return reg
66
+
67
+
68
+ def check_app_impl(ws: Workspace, collector: TraceCollector | None, args: dict) -> dict:
69
+ """Run check_app logic (shared by LiteForge tool and Rust python callback)."""
70
+ if not any(f == "index.html" for f in ws.list_files()):
71
+ return {"ok": False,
72
+ "errors": ["index.html not found: create it first with write_file."]}
73
+ files = {}
74
+ for rel in ws.list_files():
75
+ r = ws.read_file(rel)
76
+ if r.get("ok"):
77
+ files[rel] = r["content"]
78
+ ok, errors = browsercheck.check_html(inline_app(files))
79
+ if ok is None:
80
+ return {"ok": True, "errors": [],
81
+ "note": "runtime check unavailable here; assuming ok"}
82
+ if ok:
83
+ return {"ok": True, "errors": [],
84
+ "message": "The app loads and every button works."}
85
+ return {"ok": False, "errors": errors,
86
+ "hint": "Fix these JavaScript errors in index.html, then call check_app again."}
87
+
88
+
89
+ def _check_app_tool(ws: Workspace, collector: TraceCollector | None = None):
90
+ """A `check_app` tool: actually run the built app and report JS errors."""
91
+ def check_app(args: dict) -> dict:
92
+ return check_app_impl(ws, collector, args)
93
+
94
+ check_app = _wrap("check_app", check_app, collector)
95
+
96
+ return lf.create_tool(
97
+ "check_app",
98
+ "Run the current web app in a headless browser: load index.html, execute "
99
+ "its JavaScript, click every button, and report any errors. Use this to "
100
+ "verify the app actually works before finishing.",
101
+ {"type": "object", "properties": {}},
102
+ check_app,
103
+ )
104
+
105
+
106
+ def _tools(ws: Workspace, collector: TraceCollector | None = None) -> list:
107
+ def write_file(args: dict) -> dict:
108
+ return ws.write_file(args["path"], args.get("content", ""))
109
+
110
+ def read_file(args: dict) -> dict:
111
+ return ws.read_file(args["path"])
112
+
113
+ def list_files(args: dict) -> dict:
114
+ return {"ok": True, "files": ws.list_files()}
115
+
116
+ def run_python(args: dict) -> dict:
117
+ return ws.run_python(path=args["path"]).as_tool_payload()
118
+
119
+ def run_tests(args: dict) -> dict:
120
+ return ws.run_tests().as_tool_payload()
121
+
122
+ write_file = _wrap("write_file", write_file, collector)
123
+ read_file = _wrap("read_file", read_file, collector)
124
+ list_files = _wrap("list_files", list_files, collector)
125
+ run_python = _wrap("run_python", run_python, collector)
126
+ run_tests = _wrap("run_tests", run_tests, collector)
127
+
128
+ return [
129
+ lf.create_tool(
130
+ "write_file",
131
+ "Create or overwrite a file in the workspace with the given text content.",
132
+ {
133
+ "type": "object",
134
+ "properties": {
135
+ "path": {"type": "string", "description": "Relative path, e.g. main.py"},
136
+ "content": {"type": "string", "description": "Full file contents"},
137
+ },
138
+ "required": ["path", "content"],
139
+ },
140
+ write_file,
141
+ ),
142
+ lf.create_tool(
143
+ "read_file",
144
+ "Read a file from the workspace and return its contents.",
145
+ {
146
+ "type": "object",
147
+ "properties": {"path": {"type": "string"}},
148
+ "required": ["path"],
149
+ },
150
+ read_file,
151
+ ),
152
+ lf.create_tool(
153
+ "list_files",
154
+ "List all files currently in the workspace.",
155
+ {"type": "object", "properties": {}},
156
+ list_files,
157
+ ),
158
+ lf.create_tool(
159
+ "run_python",
160
+ "Run a Python file in the workspace. Returns stdout, stderr and exit code.",
161
+ {
162
+ "type": "object",
163
+ "properties": {"path": {"type": "string", "description": "File to run, e.g. main.py"}},
164
+ "required": ["path"],
165
+ },
166
+ run_python,
167
+ ),
168
+ lf.create_tool(
169
+ "run_tests",
170
+ "Run the test suite (pytest) in the workspace. Returns pass/fail output.",
171
+ {"type": "object", "properties": {}},
172
+ run_tests,
173
+ ),
174
+ ]
engine/trace.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shareable agent traces (Build Small "Sharing is Caring" badge).
2
+
3
+ Turns a completed smolcode run into an OpenTelemetry-style JSON trace: a root
4
+ span minted by LiteForge's `Tracer` plus one child span per agent step, carrying
5
+ the step kind, duration, and token counts read from `AgentStep`. Publish a trace
6
+ file to the Hub so others can see exactly how the tiny model reasoned.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import time
12
+ from pathlib import Path
13
+
14
+ import liteforge as lf
15
+
16
+
17
+ def build_trace(agent, task: str, final: str, *, preset: str, model: str) -> dict:
18
+ """Build an OTel-ish trace document from a finished agent run."""
19
+ tracer = lf.Tracer("smolcode")
20
+ root = tracer.start_span("coding_task")
21
+ root.set_attribute("preset", preset)
22
+ root.set_attribute("model", model)
23
+ root.set_attribute("task", task)
24
+ trace_id = root.context.trace_id
25
+ root_id = root.context.span_id
26
+
27
+ spans: list[dict] = []
28
+ total_tokens = 0
29
+ history = agent.raw_history() if hasattr(agent, "raw_history") else getattr(agent, "history", lambda: [])()
30
+ for i, s in enumerate(history):
31
+ dur = getattr(s, "duration_ms", None) or 0
32
+ tot = getattr(s, "total_tokens", None) or 0
33
+ step_no = getattr(s, "step_number", getattr(s, "number", i))
34
+ step_type = getattr(s, "step_type", getattr(s, "kind", "step"))
35
+ result_text = getattr(s, "result", getattr(s, "detail", ""))
36
+ total_tokens += tot or 0
37
+ spans.append({
38
+ "trace_id": trace_id,
39
+ "span_id": f"{root_id[:-len(str(step_no))-1]}{step_no:02d}",
40
+ "parent_span_id": root_id,
41
+ "name": str(step_type),
42
+ "duration_ms": dur,
43
+ "attributes": {
44
+ "step_number": step_no,
45
+ "prompt_tokens": getattr(s, "prompt_tokens", None),
46
+ "completion_tokens": getattr(s, "completion_tokens", None),
47
+ "total_tokens": tot,
48
+ "result": str(result_text)[:200],
49
+ },
50
+ })
51
+ root.end()
52
+
53
+ return {
54
+ "trace_id": trace_id,
55
+ "service": "smolcode",
56
+ "preset": preset,
57
+ "model": model,
58
+ "task": task,
59
+ "final": final,
60
+ "n_steps": len(spans),
61
+ "total_tokens": total_tokens,
62
+ "root": {"span_id": root_id, "name": "coding_task"},
63
+ "spans": spans,
64
+ }
65
+
66
+
67
+ def save_trace(trace: dict, out_dir: str | Path = "traces") -> Path:
68
+ d = Path(out_dir)
69
+ d.mkdir(parents=True, exist_ok=True)
70
+ stamp = time.strftime("%Y%m%d-%H%M%S")
71
+ path = d / f"trace-{stamp}.json"
72
+ path.write_text(json.dumps(trace, indent=2))
73
+ return path
engine/trace_collector.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Append-only trace event log for live UI updates.
2
+
3
+ Tool call args/results are captured by wrapping LiteForge tool callables.
4
+ LiteForge's agent history only exposes step kinds, not tool I/O.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import re
10
+ from dataclasses import dataclass, field
11
+ from typing import Any, Literal
12
+
13
+ TraceKind = Literal["tool_call", "tool_result", "tier_escalation", "final", "error"]
14
+
15
+ _REDACTED = "[REDACTED]"
16
+ _PREFIXES = ("sk-", "ghp_", "gho_", "ghs_", "github_pat_", "xoxb-", "xoxp-", "AKIA", "AIza", "glpat-")
17
+ _SENSITIVE_KEYS = (
18
+ "api_key", "apikey", "token", "secret", "password", "passwd",
19
+ "access_key", "client_secret", "private_key",
20
+ )
21
+
22
+
23
+ @dataclass
24
+ class TraceEvent:
25
+ kind: TraceKind
26
+ name: str
27
+ detail: str
28
+ step: int | None = None
29
+ duration_ms: int | None = None
30
+ tokens: int | None = None
31
+
32
+
33
+ @dataclass
34
+ class TraceCollector:
35
+ """Thread-safe enough for asyncio single-task agent runs."""
36
+
37
+ events: list[TraceEvent] = field(default_factory=list)
38
+ _tool_step: int = 0
39
+
40
+ def record(self, kind: TraceKind, name: str, detail: str, **meta) -> None:
41
+ self.events.append(TraceEvent(kind=kind, name=name, detail=detail, **meta))
42
+
43
+ def record_tool_call(self, name: str, args: dict[str, Any]) -> None:
44
+ self.record("tool_call", name, _format_payload(args), step=self._tool_step)
45
+
46
+ def record_tool_result(self, name: str, result: dict[str, Any]) -> None:
47
+ self.record("tool_result", name, _format_payload(result), step=self._tool_step)
48
+ self._tool_step += 1
49
+
50
+ def record_escalation(self, from_tier: str, to_tier: str) -> None:
51
+ self.record("tier_escalation", to_tier, f"escalated from {from_tier}")
52
+
53
+ def record_final(self, text: str) -> None:
54
+ self.record("final", "response", redact(text))
55
+
56
+ def record_error(self, text: str) -> None:
57
+ self.record("error", "error", redact(text))
58
+
59
+ def snapshot(self) -> list[TraceEvent]:
60
+ return list(self.events)
61
+
62
+
63
+ def redact(text: str) -> str:
64
+ """Conservative secret redaction for UI display."""
65
+ lines = []
66
+ for line in text.splitlines(keepends=True):
67
+ content, nl = (line[:-1], "\n") if line.endswith("\n") else (line, "")
68
+ lines.append(_redact_line(content) + nl)
69
+ return "".join(lines)
70
+
71
+
72
+ def _redact_line(line: str) -> str:
73
+ out: list[str] = []
74
+ i = 0
75
+ while i < len(line):
76
+ ch = line[i]
77
+ if ch in "\"'`" or ch.isalnum() or ch in "_-":
78
+ j = i
79
+ while j < len(line) and not line[j].isspace() and line[j] not in ",;)]}":
80
+ j += 1
81
+ token = line[i:j]
82
+ if _looks_secret(token):
83
+ out.append(_REDACTED)
84
+ else:
85
+ out.append(token)
86
+ i = j
87
+ continue
88
+ if ch == "=" and i + 1 < len(line):
89
+ key_start = i
90
+ while key_start > 0 and (line[key_start - 1].isalnum() or line[key_start - 1] in "_-"):
91
+ key_start -= 1
92
+ key = line[key_start:i].lower()
93
+ if any(s in key for s in _SENSITIVE_KEYS):
94
+ out.append(line[i : i + 1])
95
+ i += 1
96
+ j = i
97
+ while j < len(line) and not line[j].isspace():
98
+ j += 1
99
+ out.append(_REDACTED)
100
+ i = j
101
+ continue
102
+ out.append(ch)
103
+ i += 1
104
+ return "".join(out)
105
+
106
+
107
+ def _looks_secret(token: str) -> bool:
108
+ for prefix in _PREFIXES:
109
+ if token.startswith(prefix) and len(token) >= len(prefix) + 8:
110
+ return True
111
+ if len(token) >= 32 and re.fullmatch(r"[A-Za-z0-9_\-+/=]+", token):
112
+ upper = sum(1 for c in token if c.isupper())
113
+ lower = sum(1 for c in token if c.islower())
114
+ digit = sum(1 for c in token if c.isdigit())
115
+ if upper >= 4 and lower >= 4 and digit >= 2:
116
+ return True
117
+ return False
118
+
119
+
120
+ def _format_payload(data: dict[str, Any], *, max_content: int = 600) -> str:
121
+ """JSON-format tool args/results, truncating large file content."""
122
+ out = dict(data)
123
+ if "content" in out and isinstance(out["content"], str):
124
+ text = out["content"]
125
+ if len(text) > max_content:
126
+ out["content"] = text[:max_content] + f"\n… ({len(text)} chars total)"
127
+ raw = json.dumps(out, indent=2, ensure_ascii=False)
128
+ return redact(raw)
engine/ui_trace.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Trace rendering for the Gradio web UI."""
2
+ from __future__ import annotations
3
+
4
+ from .agent import Step
5
+ from .trace_collector import TraceEvent
6
+
7
+ _TOOL_ICON = {
8
+ "write_file": "✏️", "read_file": "📖", "list_files": "📂",
9
+ "run_python": "▶️", "run_tests": "🧪", "check_app": "🌐",
10
+ }
11
+
12
+
13
+ def merge_step_metadata(events: list[TraceEvent], raw_history: list) -> list[TraceEvent]:
14
+ """Attach LiteForge timing/token stats to tool_call events."""
15
+ if not raw_history:
16
+ return events
17
+ calls = [e for e in events if e.kind == "tool_call"]
18
+ merged: list[TraceEvent] = []
19
+ call_idx = 0
20
+ for ev in events:
21
+ if ev.kind != "tool_call" or call_idx >= len(raw_history):
22
+ merged.append(ev)
23
+ continue
24
+ step = raw_history[call_idx]
25
+ call_idx += 1
26
+ merged.append(TraceEvent(
27
+ kind=ev.kind, name=ev.name, detail=ev.detail, step=ev.step,
28
+ duration_ms=getattr(step, "duration_ms", None),
29
+ tokens=getattr(step, "total_tokens", None),
30
+ ))
31
+ return merged
32
+
33
+
34
+ def format_trace_md(
35
+ events: list[TraceEvent],
36
+ *,
37
+ steps: list[Step] | None = None,
38
+ max_detail: int = 500,
39
+ idle: str = "_waiting for the model…_",
40
+ ) -> str:
41
+ """Render trace events as markdown with expandable tool I/O."""
42
+ if not events and not steps:
43
+ return idle
44
+ if not events and steps:
45
+ return _steps_only_md(steps)
46
+
47
+ lines: list[str] = []
48
+ step_no = 0
49
+ i = 0
50
+ while i < len(events):
51
+ ev = events[i]
52
+ if ev.kind == "tool_call":
53
+ icon = _TOOL_ICON.get(ev.name, "🔧")
54
+ meta = _meta_badge(ev)
55
+ summary = f"`{step_no}` &nbsp; {icon} **{ev.name}**{meta}"
56
+ detail = _truncate(ev.detail, max_detail)
57
+ block = f"<details><summary>{summary}</summary>\n\n```json\n{detail}\n```\n</details>"
58
+ if i + 1 < len(events) and events[i + 1].kind == "tool_result":
59
+ result = _truncate(events[i + 1].detail, max_detail)
60
+ block += f"\n\n↳ result:\n\n```json\n{result}\n```"
61
+ i += 1
62
+ lines.append(block)
63
+ step_no += 1
64
+ elif ev.kind == "tier_escalation":
65
+ lines.append(f"⬆️ **escalated** → `{ev.name}`: {ev.detail}")
66
+ elif ev.kind == "final":
67
+ lines.append("✅ **final answer**")
68
+ elif ev.kind == "error":
69
+ lines.append(f"⚠️ **error**: {_truncate(ev.detail, max_detail)}")
70
+ i += 1
71
+ return "\n\n".join(lines) if lines else idle
72
+
73
+
74
+ def format_fanout_trace_md(results) -> str:
75
+ """Per-subagent expandable traces for fan-out mode."""
76
+ if not results:
77
+ return "_no subagents_"
78
+ blocks = []
79
+ for r in results:
80
+ events = getattr(r, "trace_events", None) or []
81
+ inner = format_trace_md(events, steps=r.steps, idle="_no steps yet_")
82
+ verdict = "✓ verified" if r.verified else ("⚠️ error" if r.error else "· unverified")
83
+ blocks.append(
84
+ f"<details><summary>`{r.index + 1}` **subagent** ({r.model}): "
85
+ f"{len(r.steps)} steps · {verdict}</summary>\n\n{inner}\n</details>"
86
+ )
87
+ return "\n\n".join(blocks)
88
+
89
+
90
+ def _steps_only_md(steps: list[Step]) -> str:
91
+ lines = []
92
+ for s in steps:
93
+ kind = s.kind
94
+ if kind.startswith("tool_call:"):
95
+ tool = kind.split(":", 1)[1]
96
+ icon = _TOOL_ICON.get(tool, "🔧")
97
+ meta = ""
98
+ if s.total_tokens:
99
+ meta = f" · {s.total_tokens} tok"
100
+ lines.append(f"`{s.number}` &nbsp; {icon} **{tool}**{meta}")
101
+ elif kind == "response":
102
+ lines.append("✅ **final answer**")
103
+ else:
104
+ lines.append(f"• {kind}")
105
+ return "\n\n".join(lines) if lines else "_waiting for the model…_"
106
+
107
+
108
+ def _meta_badge(ev: TraceEvent) -> str:
109
+ parts = []
110
+ if ev.duration_ms is not None:
111
+ parts.append(f"{ev.duration_ms}ms")
112
+ if ev.tokens is not None:
113
+ parts.append(f"{ev.tokens} tok")
114
+ return f" <span class='trace-meta'>({', '.join(parts)})</span>" if parts else ""
115
+
116
+
117
+ def _truncate(text: str, limit: int) -> str:
118
+ text = text.strip()
119
+ if len(text) <= limit:
120
+ return text
121
+ return text[:limit] + f"\n… ({len(text)} chars total)"
engine/web_tui.py ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CLI-shaped web UI: transcript buffer, HTML rendering, layout helpers."""
2
+ from __future__ import annotations
3
+
4
+ import html
5
+ from dataclasses import dataclass, field
6
+
7
+ from .gradio_shell import UiSettings
8
+ from .rust_session import list_commands
9
+ from .themes import theme_at, theme_names
10
+
11
+ _BUILTIN_SLASH = [
12
+ "/help", "/mode", "/think", "/mcp", "/rules", "/skills", "/skill", "/bg",
13
+ "/init", "/new", "/sessions", "/rename", "/fork", "/delete", "/timeline",
14
+ "/stats", "/export", "/search", "/config", "/commit", "/agents", "/models",
15
+ "/themes", "/files", "/clear", "/quit",
16
+ ]
17
+
18
+ _KIND_STYLE = {
19
+ "user": ("›", "#e2e8f0", "#1e293b"),
20
+ "assistant": ("◆", "#c4b5fd", "#1e1b4b"),
21
+ "tool": ("⚙", "#a78bfa", "#0f172a"),
22
+ "result": ("·", "#94a3b8", "#0f172a"),
23
+ "info": ("·", "#94a3b8", "#0f172a"),
24
+ "error": ("✕", "#f87171", "#450a0a"),
25
+ "final": ("✓", "#34d399", "#052e16"),
26
+ }
27
+
28
+
29
+ @dataclass
30
+ class TranscriptLine:
31
+ kind: str
32
+ text: str
33
+
34
+
35
+ @dataclass
36
+ class Transcript:
37
+ lines: list[TranscriptLine] = field(default_factory=list)
38
+ partial: str = ""
39
+
40
+ def clear(self) -> None:
41
+ self.lines.clear()
42
+ self.partial = ""
43
+
44
+ def append(self, kind: str, text: str) -> None:
45
+ text = (text or "").strip()
46
+ if not text:
47
+ return
48
+ self.lines.append(TranscriptLine(kind=kind, text=text))
49
+
50
+ def append_user(self, text: str) -> None:
51
+ self.append("user", text)
52
+
53
+ def append_assistant(self, text: str) -> None:
54
+ self.append("assistant", text)
55
+
56
+ def append_info(self, text: str) -> None:
57
+ self.append("info", text)
58
+
59
+ def append_error(self, text: str) -> None:
60
+ self.append("error", text)
61
+
62
+ def append_tool_call(self, name: str, args: str) -> None:
63
+ self.append("tool", f"{name} {args[:200]}")
64
+
65
+ def append_tool_result(self, name: str, text: str) -> None:
66
+ clipped = text[:400] + ("…" if len(text) > 400 else "")
67
+ self.append("result", f"{name}: {clipped}")
68
+
69
+ def set_partial(self, text: str) -> None:
70
+ self.partial = text
71
+
72
+ def from_stored_chat(self, stored: list[dict[str, str]]) -> None:
73
+ self.clear()
74
+ for m in stored:
75
+ role = m.get("role", "assistant")
76
+ kind = "user" if role == "user" else "assistant"
77
+ self.append(kind, m.get("text", ""))
78
+
79
+ def append_final(self, text: str) -> None:
80
+ self.append("final", text)
81
+
82
+ def plain_texts(self) -> list[str]:
83
+ return [ln.text for ln in self.lines]
84
+
85
+ def search(self, query: str, limit: int = 20) -> list[str]:
86
+ if not query.strip():
87
+ return []
88
+ q = query.lower()
89
+ hits: list[str] = []
90
+ for ln in self.lines:
91
+ if q in ln.text.lower():
92
+ hits.append(f"[{ln.kind}] {ln.text[:120]}")
93
+ if len(hits) >= limit:
94
+ break
95
+ return hits
96
+
97
+ def render_html(self, *, running: bool = False) -> str:
98
+ if not self.lines and not self.partial and not running:
99
+ return (
100
+ '<div class="sc-transcript-wrap">'
101
+ '<div class="sc-transcript-empty">'
102
+ "smolcode — describe a coding task, or type <code>/help</code>"
103
+ "</div></div>"
104
+ )
105
+ parts: list[str] = ['<div class="sc-transcript-inner">']
106
+ for ln in self.lines:
107
+ parts.append(_line_html(ln.kind, ln.text))
108
+ if self.partial:
109
+ parts.append(_line_html("assistant", self.partial + "▏"))
110
+ if running and not self.partial:
111
+ parts.append('<div class="sc-tline sc-tline-info">· thinking…</div>')
112
+ parts.append("</div>")
113
+ return f'<div class="sc-transcript-wrap">\n' + "\n".join(parts) + "\n</div>"
114
+
115
+
116
+ def _line_html(kind: str, text: str) -> str:
117
+ glyph, color, _bg = _KIND_STYLE.get(kind, _KIND_STYLE["info"])
118
+ body = html.escape(text).replace("\n", "<br>")
119
+ return (
120
+ f'<div class="sc-tline sc-tline-{kind}">'
121
+ f'<span class="sc-tglyph" style="color:{color}">{glyph}</span> '
122
+ f'<span class="sc-ttext">{body}</span></div>'
123
+ )
124
+
125
+
126
+ def slash_commands(workspace: str) -> list[str]:
127
+ custom = [f"/{n}" for n in list_commands(workspace)]
128
+ return _BUILTIN_SLASH + custom
129
+
130
+
131
+ def filter_slash_commands(prefix: str, workspace: str) -> list[str]:
132
+ p = prefix if prefix.startswith("/") else f"/{prefix}"
133
+ return [c for c in slash_commands(workspace) if c.startswith(p)]
134
+
135
+
136
+ def header_bar_html(
137
+ *,
138
+ git_branch: str = "",
139
+ git_dirty: bool = False,
140
+ model: str = "",
141
+ host: str = "",
142
+ theme: str = "default",
143
+ ) -> str:
144
+ git_part = ""
145
+ if git_branch:
146
+ dirty = " ●" if git_dirty else ""
147
+ git_part = f'<span class="sc-hgit">⎇ {html.escape(git_branch)}{dirty}</span>'
148
+ model_part = html.escape(model) if model else "—"
149
+ host_part = html.escape(host) if host else ""
150
+ return (
151
+ '<div class="sc-header-bar">'
152
+ f'<span class="sc-hbrand">◆ smol<span class="hf-accent">code</span></span>'
153
+ f"{git_part}"
154
+ f'<span class="sc-hmodel">{model_part}</span>'
155
+ f'<span class="sc-hhost">@ {host_part}</span>'
156
+ f'<span class="sc-htheme">{html.escape(theme)}</span>'
157
+ "</div>"
158
+ )
159
+
160
+
161
+ def status_bar_html(
162
+ settings: UiSettings,
163
+ *,
164
+ session_title: str = "new session",
165
+ model: str = "",
166
+ running: bool = False,
167
+ ) -> str:
168
+ mode = settings.mode.upper()
169
+ if settings.mode == "auto":
170
+ mode = "AUTO"
171
+ elif settings.mode == "plan":
172
+ mode = "PLAN"
173
+ else:
174
+ mode = "EDIT"
175
+ think = ""
176
+ if settings.think and settings.think != "off":
177
+ think = f'<span class="sc-chip sc-chip-think">think:{settings.think}</span>'
178
+ run = '<span class="sc-chip sc-chip-run">running</span>' if running else ""
179
+ ws = html.escape(settings.workspace[:48])
180
+ sess = html.escape(session_title[:32])
181
+ ag = html.escape(settings.agent)
182
+ mdl = html.escape(model or settings.model or "—")
183
+ return (
184
+ '<div class="sc-status-bar">'
185
+ f'<span class="sc-chip sc-chip-brand">smolcode</span>'
186
+ f'<span class="sc-chip">{sess}</span>'
187
+ f'<span class="sc-chip sc-chip-dim">{ws}</span>'
188
+ f'<button type="button" class="sc-chip sc-chip-clickable" data-picker="agents">{ag}</button>'
189
+ f'<button type="button" class="sc-chip sc-chip-clickable sc-chip-mode" data-action="cycle-mode">{mode}</button>'
190
+ f"{think}{run}"
191
+ f'<button type="button" class="sc-chip sc-chip-clickable sc-chip-model" data-picker="models">{mdl}</button>'
192
+ f'<button type="button" class="sc-chip sc-chip-clickable sc-chip-dim" data-picker="themes">theme</button>'
193
+ "</div>"
194
+ )
195
+
196
+
197
+ def parse_git_header(git_text: str) -> tuple[str, bool]:
198
+ branch = ""
199
+ dirty = False
200
+ for line in git_text.splitlines():
201
+ if line.startswith("##"):
202
+ branch = line[2:].strip().split("...")[0]
203
+ if line.strip() and not line.startswith("#"):
204
+ dirty = True
205
+ return branch, dirty
206
+
207
+
208
+ def host_from_url(base_url: str) -> str:
209
+ u = base_url.strip()
210
+ for prefix in ("https://", "http://"):
211
+ if u.startswith(prefix):
212
+ u = u[len(prefix):]
213
+ return u.split("/")[0] if u else ""
214
+
215
+
216
+ def cycle_mode(current: str) -> str:
217
+ order = ["normal", "auto", "plan"]
218
+ try:
219
+ i = order.index(current)
220
+ except ValueError:
221
+ return "normal"
222
+ return order[(i + 1) % len(order)]
223
+
224
+
225
+ def cycle_think(current: str) -> str:
226
+ order = ["off", "low", "high", "xtra"]
227
+ try:
228
+ i = order.index(current)
229
+ except ValueError:
230
+ return "off"
231
+ return order[(i + 1) % len(order)]
232
+
233
+
234
+ def cycle_agent(current: str) -> str:
235
+ order = ["build", "plan"]
236
+ try:
237
+ i = order.index(current)
238
+ except ValueError:
239
+ return "build"
240
+ return order[(i + 1) % len(order)]
241
+
242
+
243
+ def cycle_model(models: list[str], current: str) -> str:
244
+ if not models:
245
+ return current
246
+ try:
247
+ i = models.index(current)
248
+ except ValueError:
249
+ return models[0]
250
+ return models[(i + 1) % len(models)]
251
+
252
+
253
+ def ingest_agent_event(transcript: Transcript, ev: dict) -> None:
254
+ kind = ev.get("kind")
255
+ if kind == "token":
256
+ transcript.set_partial(transcript.partial + ev.get("text", ""))
257
+ elif kind == "assistant":
258
+ transcript.set_partial(ev.get("text", ""))
259
+ elif kind == "tool_call":
260
+ transcript.set_partial("")
261
+ transcript.append_tool_call(ev.get("name", ""), ev.get("args", ""))
262
+ elif kind == "tool_result":
263
+ transcript.append_tool_result(ev.get("name", ""), ev.get("text", ""))
264
+ elif kind == "final":
265
+ transcript.set_partial("")
266
+ transcript.append_final(ev.get("text", ""))
267
+ elif kind == "error":
268
+ transcript.set_partial("")
269
+ transcript.append_error(ev.get("text", ""))
270
+
271
+
272
+ def help_overlay_html() -> str:
273
+ lines = [
274
+ "Enter — run task",
275
+ "Shift+Enter — newline",
276
+ "/ — slash commands (Tab complete)",
277
+ "@ — attach file",
278
+ "! cmd — shell (no LLM)",
279
+ "Ctrl+L — clear transcript",
280
+ "Ctrl+X — leader key menu",
281
+ "Tab — cycle agent",
282
+ "Shift+Tab — cycle mode",
283
+ "F2 — cycle model",
284
+ "Esc — interrupt / close overlay",
285
+ ]
286
+ body = "<br>".join(html.escape(ln) for ln in lines)
287
+ return f'<div class="sc-overlay-body"><b>smolcode keys</b><br><br>{body}</div>'
288
+
289
+
290
+ def whichkey_overlay_html() -> str:
291
+ lines = [
292
+ "m models", "a agents", "t themes", "l sessions",
293
+ "n new session", "b sidebar", "s stats/files", "f focus files",
294
+ "h help", "o mode", "e think", "q quit",
295
+ ]
296
+ body = "<br>".join(html.escape(ln) for ln in lines)
297
+ return f'<div class="sc-overlay-body"><b>ctrl+x leader</b><br><br>{body}</div>'
298
+
299
+
300
+ def render_picker_html(
301
+ kind: str,
302
+ items: list[str],
303
+ selected: int,
304
+ *,
305
+ title: str | None = None,
306
+ ) -> str:
307
+ """TUI-style bordered picker list with scroll window."""
308
+ label = title or kind
309
+ if not items:
310
+ return (
311
+ f'<div class="sc-picker" data-kind="{html.escape(kind)}">'
312
+ f'<div class="sc-picker-title">{html.escape(label)}</div>'
313
+ '<div class="sc-picker-empty">(empty)</div></div>'
314
+ )
315
+ win = 12
316
+ sel = min(max(0, selected), len(items) - 1)
317
+ start = max(0, sel - win // 2)
318
+ end = min(len(items), start + win)
319
+ start = max(0, end - win)
320
+ rows: list[str] = []
321
+ for i in range(start, end):
322
+ item = items[i]
323
+ marker = "❯" if i == sel else " "
324
+ cls = "sc-picker-item sc-picker-sel" if i == sel else "sc-picker-item"
325
+ rows.append(
326
+ f'<button type="button" class="{cls}" data-idx="{i}" '
327
+ f'onclick="window.__smolcodePick && window.__smolcodePick({i})">'
328
+ f'<span class="sc-picker-mark">{marker}</span>'
329
+ f"<span>{html.escape(item)}</span></button>"
330
+ )
331
+ body = "\n".join(rows)
332
+ return (
333
+ f'<div class="sc-picker" data-kind="{html.escape(kind)}">'
334
+ f'<div class="sc-picker-title">{html.escape(label)}</div>'
335
+ f'<div class="sc-picker-list">{body}</div>'
336
+ f'<div class="sc-picker-hint">↑↓ navigate · Enter select · Esc close</div>'
337
+ f"</div>"
338
+ )
339
+
340
+
341
+ def shell_theme_html(theme_idx: int) -> str:
342
+ """Inject data-theme on the TUI shell wrapper."""
343
+ name = theme_at(theme_idx).name
344
+ safe = html.escape(name, quote=True)
345
+ return (
346
+ f'<script>(function(){{var el=document.querySelector(".sc-tui-shell");'
347
+ f'if(el)el.setAttribute("data-theme","{safe}");}})();</script>'
348
+ )
349
+
350
+
351
+ def agent_choices() -> list[str]:
352
+ return ["build", "plan"]
353
+
354
+
355
+ def theme_picker_items() -> list[str]:
356
+ return theme_names()
357
+
358
+
359
+ def _sorted_file_paths(files: dict[str, str] | list[str]) -> list[str]:
360
+ if isinstance(files, dict):
361
+ return sorted(files.keys())
362
+ return sorted(files)
363
+
364
+
365
+ def _paths_for_ui(files: dict[str, str] | list[str] | None) -> list[str]:
366
+ return _sorted_file_paths(files or [])
367
+
368
+
369
+ def _files_sidebar_body(paths: list[str], *, selected: int = 0, max_rows: int = 48) -> str:
370
+ """Flat file list grouped by directory, matching the CLI TUI sidebar."""
371
+ if not paths:
372
+ return '<div class="sc-sb-empty">no files</div>'
373
+
374
+ rows: list[str] = []
375
+ sel_row: int | None = None
376
+ last_dir = ""
377
+ sel = min(selected, max(0, len(paths) - 1))
378
+
379
+ for i, path in enumerate(paths):
380
+ if "/" in path:
381
+ j = path.rfind("/")
382
+ dir_part, file_part = path[:j], path[j + 1 :]
383
+ else:
384
+ dir_part, file_part = "", path
385
+ if dir_part != last_dir:
386
+ last_dir = dir_part
387
+ label = "." if not dir_part else f"{dir_part}/"
388
+ rows.append(f'<div class="sc-sb-dir">▾ {html.escape(label)}</div>')
389
+ is_sel = i == sel
390
+ if is_sel:
391
+ sel_row = len(rows)
392
+ prefix = "❯" if is_sel else ""
393
+ cls = "sc-sb-file sc-sb-sel" if is_sel else "sc-sb-file"
394
+ rows.append(
395
+ f'<div class="{cls}">'
396
+ f'<span class="sc-sb-mark">{prefix}</span>'
397
+ f'<span class="sc-sb-glyph"> </span>'
398
+ f'<span class="sc-sb-name">{html.escape(file_part)}</span>'
399
+ f"</div>"
400
+ )
401
+
402
+ total = len(rows)
403
+ start = 0
404
+ if total > max_rows:
405
+ anchor = sel_row if sel_row is not None else 0
406
+ start = min(max(0, anchor - max_rows + 1), total - max_rows)
407
+
408
+ visible = rows[start : start + max_rows]
409
+ if total > max_rows and start + max_rows < total:
410
+ more = total - (start + max_rows) + 1
411
+ visible.append(f'<div class="sc-sb-more">… +{more} more</div>')
412
+
413
+ return "\n".join(visible)
414
+
415
+
416
+ def _stats_sidebar_body(
417
+ *,
418
+ session_id: str,
419
+ file_count: int,
420
+ agent: str,
421
+ extra_lines: list[str] | None = None,
422
+ ) -> str:
423
+ parts = [
424
+ f'<div class="sc-sb-stat sc-sb-dim">{html.escape(session_id[:26])}</div>',
425
+ '<div class="sc-sb-stat"></div>',
426
+ ]
427
+ for line in extra_lines or []:
428
+ parts.append(f'<div class="sc-sb-stat">{html.escape(line)}</div>')
429
+ parts.append(f'<div class="sc-sb-stat">files: {file_count}</div>')
430
+ parts.append(f'<div class="sc-sb-stat">agent: {html.escape(agent)}</div>')
431
+ return "\n".join(parts)
432
+
433
+
434
+ def render_sidebar_html(
435
+ *,
436
+ view: str = "files",
437
+ files: dict[str, str] | list[str] | None = None,
438
+ selected: int = 0,
439
+ focused: bool = False,
440
+ session_id: str = "(none)",
441
+ agent: str = "build",
442
+ stats_lines: list[str] | None = None,
443
+ file_total: int | None = None,
444
+ ) -> str:
445
+ """CLI TUI-shaped sidebar panel (flat file list or stats)."""
446
+ paths = _paths_for_ui(files)
447
+ total = file_total if file_total is not None else len(paths)
448
+ title = "stats" if view == "stats" else ("files ▸" if focused else "files")
449
+ panel_cls = "sc-sidebar-panel"
450
+ if focused:
451
+ panel_cls += " sc-sidebar-focused"
452
+
453
+ if view == "stats":
454
+ body = _stats_sidebar_body(
455
+ session_id=session_id,
456
+ file_count=total,
457
+ agent=agent,
458
+ extra_lines=stats_lines,
459
+ )
460
+ else:
461
+ body = _files_sidebar_body(paths, selected=selected)
462
+ if total > len(paths):
463
+ body += f'\n<div class="sc-sb-more">… {total - len(paths)} more files</div>'
464
+
465
+ return (
466
+ f'<div class="{panel_cls}">'
467
+ f'<div class="sc-sidebar-title">{html.escape(title)}</div>'
468
+ f'<div class="sc-sidebar-body">{body}</div>'
469
+ f"</div>"
470
+ )
471
+
engine/webcheck.js ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Headless smoke-check for a model-built web app, used by smolbuilder so the
2
+ // agent can actually *test* what it builds (the web equivalent of run_python).
3
+ //
4
+ // Loads index.html in jsdom, runs its scripts, then clicks every <button>, and
5
+ // reports any JavaScript errors. The goal is high precision: a correct app
6
+ // reports zero errors; a broken one (null element refs, undefined functions,
7
+ // syntax errors, exceptions on click) reports them so the agent can fix it.
8
+ //
9
+ // We stub the browser APIs jsdom doesn't implement (canvas 2d/webgl context,
10
+ // alert/confirm/prompt, matchMedia, media play) so apps that *use* them aren't
11
+ // falsely flagged — we're checking the app's own logic, not jsdom's coverage.
12
+ //
13
+ // Output: a single JSON line {ok, errors, buttons, clicked}. Exit 0 always
14
+ // (the verdict is in the JSON); exit 3 only if jsdom itself is missing.
15
+ 'use strict';
16
+
17
+ let JSDOM, VirtualConsole;
18
+ try {
19
+ ({ JSDOM, VirtualConsole } = require('jsdom'));
20
+ } catch (e) {
21
+ process.stdout.write(JSON.stringify({ ok: null, infra: 'jsdom not installed' }) + '\n');
22
+ process.exit(3);
23
+ }
24
+
25
+ const fs = require('fs');
26
+
27
+ function makeCtx() {
28
+ // A permissive 2d/webgl context stub: method calls no-op, the few methods
29
+ // whose *return value* is used hand back something safe to deref.
30
+ return new Proxy({}, {
31
+ get(_t, p) {
32
+ if (p === 'measureText') return () => ({ width: 0 });
33
+ if (p === 'getImageData') return () => ({ data: new Uint8ClampedArray(4), width: 1, height: 1 });
34
+ if (p === 'createLinearGradient' || p === 'createRadialGradient' || p === 'createPattern')
35
+ return () => ({ addColorStop() {} });
36
+ if (p === 'canvas') return { width: 300, height: 150 };
37
+ return () => undefined;
38
+ },
39
+ set() { return true; },
40
+ });
41
+ }
42
+
43
+ function stubBrowser(window) {
44
+ try { window.HTMLCanvasElement.prototype.getContext = () => makeCtx(); } catch (e) {}
45
+ const noop = () => {};
46
+ window.alert = noop;
47
+ window.confirm = () => true;
48
+ window.prompt = () => '';
49
+ window.scrollTo = noop;
50
+ window.scroll = noop;
51
+ if (!window.matchMedia)
52
+ window.matchMedia = () => ({ matches: false, media: '', addListener: noop, removeListener: noop, addEventListener: noop, removeEventListener: noop });
53
+ try { window.HTMLMediaElement.prototype.play = () => Promise.resolve(); } catch (e) {}
54
+ try { window.HTMLMediaElement.prototype.pause = noop; } catch (e) {}
55
+ }
56
+
57
+ const file = process.argv[2];
58
+ const html = fs.readFileSync(file, 'utf8');
59
+ const errors = [];
60
+ const push = (m) => { if (m && errors.indexOf(m) === -1) errors.push(String(m).slice(0, 400)); };
61
+
62
+ const vc = new VirtualConsole();
63
+ vc.on('jsdomError', (e) => push('script error: ' + (e && e.detail ? (e.detail.message || e.detail) : (e && e.message))));
64
+
65
+ let dom;
66
+ try {
67
+ dom = new JSDOM(html, {
68
+ runScripts: 'dangerously',
69
+ pretendToBeVisual: true,
70
+ virtualConsole: vc,
71
+ beforeParse(window) {
72
+ stubBrowser(window);
73
+ window.addEventListener('error', (ev) => push('uncaught: ' + (ev.error ? (ev.error.message || ev.error) : ev.message)));
74
+ window.addEventListener('unhandledrejection', (ev) => push('promise rejection: ' + (ev.reason && ev.reason.message ? ev.reason.message : ev.reason)));
75
+ },
76
+ });
77
+ } catch (e) {
78
+ push('load failed: ' + e.message);
79
+ process.stdout.write(JSON.stringify({ ok: false, errors, buttons: 0, clicked: 0 }) + '\n');
80
+ process.exit(0);
81
+ }
82
+
83
+ const { window } = dom;
84
+ const doc = window.document;
85
+
86
+ function clickAll() {
87
+ const buttons = Array.from(doc.querySelectorAll('button, [onclick], input[type=button], input[type=submit]'));
88
+ let clicked = 0;
89
+ for (const el of buttons) {
90
+ try {
91
+ if (el.disabled) el.disabled = false; // exercise the handler regardless of initial state
92
+ el.click();
93
+ clicked++;
94
+ } catch (e) {
95
+ push('click "' + (el.textContent || el.id || el.tagName).trim().slice(0, 30) + '": ' + e.message);
96
+ }
97
+ }
98
+ return { n: buttons.length, clicked };
99
+ }
100
+
101
+ // Let inline scripts settle, click, then let one timer tick surface late errors.
102
+ setTimeout(() => {
103
+ const { n, clicked } = clickAll();
104
+ setTimeout(() => {
105
+ process.stdout.write(JSON.stringify({ ok: errors.length === 0, errors, buttons: n, clicked }) + '\n');
106
+ process.exit(0);
107
+ }, 250);
108
+ }, 50);
engine/webcheck.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Headless verification of model-built web apps (the web `run_python`).
2
+
3
+ smolbuilder's agent writes HTML/CSS/JS but, unlike the Python path, had no way
4
+ to *run* it — so it shipped broken apps and couldn't tell. This bridges to a
5
+ small Node + jsdom checker (engine/webcheck.js) that loads the page, runs its
6
+ scripts, clicks every button, and reports JavaScript errors.
7
+
8
+ Graceful degradation is deliberate: if Node or jsdom isn't available (e.g. a
9
+ minimal Space image), we return `None` ("unverifiable") rather than failing the
10
+ build — the agent/router fall back to the structural check.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import shutil
16
+ import subprocess
17
+ import tempfile
18
+ from pathlib import Path
19
+
20
+ _CHECKER = Path(__file__).with_name("webcheck.js")
21
+
22
+
23
+ def available() -> bool:
24
+ """True if we can actually run the headless check (Node present)."""
25
+ return shutil.which("node") is not None and _CHECKER.exists()
26
+
27
+
28
+ def check_html(html: str, timeout: int = 20) -> tuple[bool | None, list[str]]:
29
+ """Run the headless check on an HTML document.
30
+
31
+ Returns (ok, errors):
32
+ - (True, []) the app loaded and all buttons clicked without error
33
+ - (False, [...]) real JavaScript errors were found
34
+ - (None, [...]) unverifiable (Node/jsdom missing, or the checker broke)
35
+ """
36
+ node = shutil.which("node")
37
+ if not node or not _CHECKER.exists():
38
+ return None, ["node/jsdom unavailable (skipped runtime check)"]
39
+
40
+ with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
41
+ f.write(html)
42
+ path = f.name
43
+ try:
44
+ proc = subprocess.run(
45
+ [node, str(_CHECKER), path],
46
+ capture_output=True, text=True, timeout=timeout,
47
+ )
48
+ except subprocess.TimeoutExpired:
49
+ return None, [f"runtime check timed out after {timeout}s"]
50
+ finally:
51
+ Path(path).unlink(missing_ok=True)
52
+
53
+ if proc.returncode == 3: # jsdom not installed
54
+ return None, ["jsdom not installed (skipped runtime check)"]
55
+ line = (proc.stdout or "").strip().splitlines()
56
+ if not line:
57
+ return None, [f"runtime check produced no output: {proc.stderr.strip()[:200]}"]
58
+ try:
59
+ data = json.loads(line[-1])
60
+ except json.JSONDecodeError:
61
+ return None, [f"runtime check output unparseable: {line[-1][:200]}"]
62
+
63
+ if data.get("ok") is None:
64
+ return None, [data.get("infra", "unverifiable")]
65
+ return bool(data.get("ok")), list(data.get("errors", []))
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio>=5.49,<6
2
+ liteforge==0.2.5
smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d179e40e7e38999081cfdd9461c0879b1843f81ea39d4dac3262a3eab5d7931
3
+ size 13694530
static/web_tui.js ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ (function () {
2
+ "use strict";
3
+
4
+ if (window.__smolcodeTuiInit) return;
5
+ window.__smolcodeTuiInit = true;
6
+
7
+ let leaderPending = false;
8
+ let leaderTimer = null;
9
+
10
+ function click(id) {
11
+ const root = document.getElementById(id);
12
+ if (!root) return;
13
+ const btn = root.tagName === "BUTTON" ? root : root.querySelector("button");
14
+ (btn || root).click();
15
+ }
16
+
17
+ function setHiddenValue(id, value) {
18
+ const root = document.getElementById(id);
19
+ if (!root) return;
20
+ const el = root.tagName === "TEXTAREA" || root.tagName === "INPUT"
21
+ ? root
22
+ : root.querySelector("textarea, input");
23
+ if (!el) return;
24
+ el.value = value;
25
+ el.dispatchEvent(new Event("input", { bubbles: true }));
26
+ }
27
+
28
+ window.__smolcodePick = function (idx) {
29
+ setHiddenValue("sc-picker-pick", String(idx));
30
+ click("sc-picker-confirm");
31
+ };
32
+
33
+ function editor() {
34
+ const root = document.getElementById("sc-editor");
35
+ if (root) {
36
+ if (root.tagName === "TEXTAREA" || root.tagName === "INPUT") return root;
37
+ const inner = root.querySelector("textarea, input[type='text']");
38
+ if (inner) return inner;
39
+ }
40
+ const boxes = document.querySelectorAll("[data-testid='textbox']");
41
+ return boxes.length ? boxes[boxes.length - 1] : null;
42
+ }
43
+
44
+ function PopupController(popupEl, kind) {
45
+ this.popup = popupEl;
46
+ this.kind = kind || "slash";
47
+ this.matches = [];
48
+ this.sel = 0;
49
+ }
50
+
51
+ PopupController.prototype.hide = function () {
52
+ if (this.popup) this.popup.style.display = "none";
53
+ this.matches = [];
54
+ this.sel = 0;
55
+ };
56
+
57
+ PopupController.prototype.render = function (matches, ta, replaceFrom) {
58
+ this.matches = matches;
59
+ this.sel = 0;
60
+ this.replaceFrom = replaceFrom;
61
+ this.ta = ta;
62
+ if (!this.popup) return;
63
+ this.popup.innerHTML = "";
64
+ const self = this;
65
+ matches.slice(0, 12).forEach(function (item, i) {
66
+ const row = document.createElement("div");
67
+ row.className = "sc-popup-item" + (i === 0 ? " sc-popup-sel" : "");
68
+ row.textContent = item;
69
+ row.onclick = function () {
70
+ self.sel = i;
71
+ self.accept();
72
+ };
73
+ self.popup.appendChild(row);
74
+ });
75
+ const rect = ta.getBoundingClientRect();
76
+ this.popup.style.display = matches.length ? "block" : "none";
77
+ this.popup.style.left = rect.left + "px";
78
+ this.popup.style.top = Math.max(0, rect.top - 160) + "px";
79
+ this.popup.style.width = Math.max(220, rect.width) + "px";
80
+ this._highlight();
81
+ };
82
+
83
+ PopupController.prototype._highlight = function () {
84
+ if (!this.popup) return;
85
+ const items = this.popup.querySelectorAll(".sc-popup-item");
86
+ items.forEach(function (el, i) {
87
+ el.classList.toggle("sc-popup-sel", i === this.sel);
88
+ }, this);
89
+ };
90
+
91
+ PopupController.prototype.move = function (delta) {
92
+ if (!this.matches.length) return;
93
+ this.sel = (this.sel + delta + this.matches.length) % this.matches.length;
94
+ this._highlight();
95
+ };
96
+
97
+ PopupController.prototype.accept = function () {
98
+ if (!this.matches.length || !this.ta) return;
99
+ const val = this.ta.value;
100
+ if (this.kind === "file") {
101
+ const atMatch = val.match(/(?:^|\s)@(\S*)$/);
102
+ if (!atMatch) return;
103
+ const atPos = val.length - atMatch[0].length + (atMatch[0].charAt(0) === " " ? 1 : 0);
104
+ const item = this.matches[this.sel];
105
+ this.ta.value = val.slice(0, atPos) + "@" + item + " ";
106
+ } else {
107
+ const item = this.matches[this.sel];
108
+ const rest = val.slice(this.replaceFrom);
109
+ this.ta.value = item + rest;
110
+ }
111
+ this.ta.dispatchEvent(new Event("input", { bubbles: true }));
112
+ this.hide();
113
+ this.ta.focus();
114
+ };
115
+
116
+ PopupController.prototype.tabComplete = function () {
117
+ if (!this.matches.length) return false;
118
+ this.accept();
119
+ return true;
120
+ };
121
+
122
+ PopupController.prototype.visible = function () {
123
+ return this.popup && this.popup.style.display === "block" && this.matches.length > 0;
124
+ };
125
+
126
+ function ensurePopup(cls) {
127
+ let el = document.querySelector("." + cls);
128
+ if (!el) {
129
+ el = document.createElement("div");
130
+ el.className = cls + " sc-popup";
131
+ document.body.appendChild(el);
132
+ }
133
+ return el;
134
+ }
135
+
136
+ const slashPopup = new PopupController(ensurePopup("sc-slash-popup"), "slash");
137
+ const filePopup = new PopupController(ensurePopup("sc-file-popup"), "file");
138
+
139
+ function hidePopups() {
140
+ slashPopup.hide();
141
+ filePopup.hide();
142
+ }
143
+
144
+ function onEditorInput(ta) {
145
+ const val = ta.value;
146
+ const cmds = window.__smolcode_commands || [];
147
+
148
+ if (val.startsWith("/") && !val.includes(" ")) {
149
+ const m = cmds.filter(function (c) { return c.startsWith(val); });
150
+ slashPopup.render(m, ta, val.length);
151
+ filePopup.hide();
152
+ return;
153
+ }
154
+
155
+ slashPopup.hide();
156
+ const atMatch = val.match(/(?:^|\s)@(\S*)$/);
157
+ if (atMatch) {
158
+ const prefix = atMatch[1];
159
+ const files = window.__smolcode_files || [];
160
+ const m = files.filter(function (f) { return f.startsWith(prefix); });
161
+ const atPos = val.length - atMatch[0].length + (atMatch[0].charAt(0) === " " ? 1 : 0);
162
+ filePopup.render(m, ta, atPos);
163
+ return;
164
+ }
165
+ filePopup.hide();
166
+ }
167
+
168
+ function activePopup() {
169
+ if (slashPopup.visible()) return slashPopup;
170
+ if (filePopup.visible()) return filePopup;
171
+ return null;
172
+ }
173
+
174
+ function onEditorKeyDown(e) {
175
+ const ta = e.target;
176
+ const popup = activePopup();
177
+
178
+ if (popup && (e.key === "ArrowDown" || e.key === "ArrowUp")) {
179
+ e.preventDefault();
180
+ popup.move(e.key === "ArrowDown" ? 1 : -1);
181
+ return;
182
+ }
183
+
184
+ if (popup && e.key === "Enter" && !e.shiftKey) {
185
+ e.preventDefault();
186
+ popup.accept();
187
+ return;
188
+ }
189
+
190
+ if (e.key === "Tab" && popup && !e.shiftKey) {
191
+ e.preventDefault();
192
+ popup.tabComplete();
193
+ return;
194
+ }
195
+
196
+ if (e.key === "Enter" && !e.shiftKey && !e.altKey) {
197
+ e.preventDefault();
198
+ hidePopups();
199
+ click("sc-submit");
200
+ return;
201
+ }
202
+
203
+ if (e.key === "Escape") {
204
+ hidePopups();
205
+ if (document.querySelector(".sc-overlay")) {
206
+ click("sc-close-overlay");
207
+ } else {
208
+ click("sc-interrupt");
209
+ }
210
+ return;
211
+ }
212
+
213
+ if (e.ctrlKey && (e.key === "l" || e.key === "L")) {
214
+ e.preventDefault();
215
+ click("sc-clear");
216
+ return;
217
+ }
218
+
219
+ if (e.ctrlKey && (e.key === "x" || e.key === "X")) {
220
+ e.preventDefault();
221
+ leaderPending = true;
222
+ if (leaderTimer) clearTimeout(leaderTimer);
223
+ leaderTimer = setTimeout(function () { leaderPending = false; }, 2000);
224
+ click("sc-whichkey");
225
+ return;
226
+ }
227
+
228
+ if (leaderPending && !e.ctrlKey && !e.metaKey && e.key.length === 1) {
229
+ leaderPending = false;
230
+ if (leaderTimer) clearTimeout(leaderTimer);
231
+ const map = {
232
+ m: "sc-open-picker-models",
233
+ a: "sc-open-picker-agents",
234
+ t: "sc-open-picker-themes",
235
+ l: "sc-open-picker-sessions",
236
+ n: "sc-new-session",
237
+ b: "sc-toggle-sidebar",
238
+ s: "sc-toggle-sidebar-view",
239
+ h: "sc-help",
240
+ o: "sc-cycle-mode",
241
+ e: "sc-cycle-think",
242
+ };
243
+ const btn = map[e.key.toLowerCase()];
244
+ if (btn) {
245
+ e.preventDefault();
246
+ click(btn);
247
+ }
248
+ return;
249
+ }
250
+
251
+ if (e.key === "Tab" && !e.shiftKey) {
252
+ if (trySlashTabComplete(ta, e)) return;
253
+ }
254
+
255
+ if (e.key === "Tab" && !e.shiftKey && !activePopup()) {
256
+ e.preventDefault();
257
+ click("sc-cycle-agent");
258
+ return;
259
+ }
260
+
261
+ if (e.key === "Tab" && e.shiftKey) {
262
+ e.preventDefault();
263
+ click("sc-cycle-mode");
264
+ return;
265
+ }
266
+
267
+ if (e.key === "F2") {
268
+ e.preventDefault();
269
+ click("sc-cycle-model");
270
+ return;
271
+ }
272
+
273
+ if (document.querySelector(".sc-picker") && !ta) {
274
+ if (e.key === "ArrowDown") {
275
+ e.preventDefault();
276
+ click("sc-picker-down");
277
+ } else if (e.key === "ArrowUp") {
278
+ e.preventDefault();
279
+ click("sc-picker-up");
280
+ } else if (e.key === "Enter") {
281
+ e.preventDefault();
282
+ click("sc-picker-confirm");
283
+ }
284
+ }
285
+ }
286
+
287
+ function onGlobalKeyDown(e) {
288
+ if (document.querySelector(".sc-picker") && document.activeElement !== editor()) {
289
+ if (e.key === "ArrowDown") {
290
+ e.preventDefault();
291
+ click("sc-picker-down");
292
+ } else if (e.key === "ArrowUp") {
293
+ e.preventDefault();
294
+ click("sc-picker-up");
295
+ } else if (e.key === "Enter") {
296
+ e.preventDefault();
297
+ click("sc-picker-confirm");
298
+ }
299
+ }
300
+ }
301
+
302
+ function bindEditor() {
303
+ const ta = editor();
304
+ if (!ta || ta.dataset.scBound) return;
305
+ ta.dataset.scBound = "1";
306
+ ta.addEventListener("input", function () { onEditorInput(ta); });
307
+ ta.addEventListener("keydown", onEditorKeyDown);
308
+ }
309
+
310
+ function bindChips() {
311
+ /* chips re-render with status HTML; use delegation in init() */
312
+ }
313
+
314
+ function onDocumentClick(e) {
315
+ const chip = e.target.closest("[data-picker]");
316
+ if (chip) {
317
+ const kind = chip.getAttribute("data-picker");
318
+ const map = {
319
+ models: "sc-open-picker-models",
320
+ agents: "sc-open-picker-agents",
321
+ themes: "sc-open-picker-themes",
322
+ sessions: "sc-open-picker-sessions",
323
+ };
324
+ if (map[kind]) {
325
+ e.preventDefault();
326
+ click(map[kind]);
327
+ }
328
+ return;
329
+ }
330
+ const modeBtn = e.target.closest("[data-action='cycle-mode']");
331
+ if (modeBtn) {
332
+ e.preventDefault();
333
+ click("sc-cycle-mode");
334
+ }
335
+ }
336
+
337
+ function slashMatches(val) {
338
+ if (!val.startsWith("/") || val.includes(" ")) return [];
339
+ const cmds = window.__smolcode_commands || [];
340
+ return cmds.filter(function (c) { return c.startsWith(val); });
341
+ }
342
+
343
+ function trySlashTabComplete(ta, e) {
344
+ const val = ta.value;
345
+ const matches = slashMatches(val);
346
+ if (!matches.length) return false;
347
+ e.preventDefault();
348
+ const popup = activePopup();
349
+ if (popup && popup.kind === "slash" && popup.matches.length) {
350
+ popup.tabComplete();
351
+ return true;
352
+ }
353
+ ta.value = matches[0];
354
+ ta.dispatchEvent(new Event("input", { bubbles: true }));
355
+ hidePopups();
356
+ return true;
357
+ }
358
+
359
+ function init() {
360
+ document.addEventListener("click", onDocumentClick);
361
+ document.addEventListener("click", function (e) {
362
+ const overlay = document.querySelector(".sc-overlay");
363
+ if (overlay && e.target === overlay) click("sc-close-overlay");
364
+ });
365
+ document.addEventListener("keydown", onGlobalKeyDown);
366
+ const obs = new MutationObserver(function () {
367
+ bindEditor();
368
+ });
369
+ obs.observe(document.body, { childList: true, subtree: true });
370
+ bindEditor();
371
+ setTimeout(bindEditor, 300);
372
+ setTimeout(bindEditor, 1500);
373
+ }
374
+
375
+ if (document.readyState === "loading") {
376
+ document.addEventListener("DOMContentLoaded", init);
377
+ } else {
378
+ init();
379
+ }
380
+ })();