Spaces:
Paused
Paused
Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- Dockerfile +25 -0
- README.md +90 -4
- app.py +849 -0
- demo.mp4 +3 -0
- engine/__init__.py +22 -0
- engine/agent.py +197 -0
- engine/branding.py +172 -0
- engine/browser_runner.py +145 -0
- engine/browsercheck.py +111 -0
- engine/builder.py +270 -0
- engine/config.py +290 -0
- engine/fanout.py +128 -0
- engine/file_tree.py +92 -0
- engine/gradio_shell.py +425 -0
- engine/judge.py +90 -0
- engine/live_run.py +93 -0
- engine/playwright_runner.py +132 -0
- engine/preflight.py +116 -0
- engine/preview.py +161 -0
- engine/route_clf.py +243 -0
- engine/router.py +455 -0
- engine/rust_session.py +425 -0
- engine/sandbox.py +141 -0
- engine/themes.py +60 -0
- engine/tools.py +174 -0
- engine/trace.py +73 -0
- engine/trace_collector.py +128 -0
- engine/ui_trace.py +121 -0
- engine/web_tui.py +471 -0
- engine/webcheck.js +108 -0
- engine/webcheck.py +65 -0
- requirements.txt +2 -0
- smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl +3 -0
- static/web_tui.js +380 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
demo.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ubuntu:24.04
|
| 2 |
+
ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1
|
| 3 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 4 |
+
python3 python3-pip python3-venv ca-certificates && \
|
| 5 |
+
rm -rf /var/lib/apt/lists/*
|
| 6 |
+
RUN python3 -m venv /opt/venv
|
| 7 |
+
ENV PATH="/opt/venv/bin:$PATH"
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
COPY requirements.txt smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl ./
|
| 10 |
+
RUN pip install --no-cache-dir -r requirements.txt \
|
| 11 |
+
./smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl
|
| 12 |
+
COPY app.py demo.mp4 ./
|
| 13 |
+
COPY engine ./engine
|
| 14 |
+
COPY static ./static
|
| 15 |
+
# HF Docker Spaces run as uid 1000; let the agent write its workspace
|
| 16 |
+
RUN mkdir -p /app/.workspace && chmod -R 777 /app
|
| 17 |
+
ENV SMOLCODE_HOST=0.0.0.0 SMOLCODE_PORT=7860 HF_HOME=/tmp/hf
|
| 18 |
+
# Backend: full specialist matrix served from HAL via the public tunnel. Baked in
|
| 19 |
+
# (URL + "ollama" key are not secret) so it reaches the container reliably; swap
|
| 20 |
+
# this URL + rebuild to point at a durable endpoint for judging.
|
| 21 |
+
ENV SMALLCODE_PRESET=hal-matrix \
|
| 22 |
+
SMALLCODE_BASE_URL=https://collapse-snake-achieving-controversial.trycloudflare.com/v1 \
|
| 23 |
+
SMALLCODE_API_KEY=ollama
|
| 24 |
+
EXPOSE 7860
|
| 25 |
+
CMD ["python3", "app.py"]
|
README.md
CHANGED
|
@@ -1,10 +1,96 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: smolcode
|
| 3 |
+
emoji: 🤖
|
| 4 |
+
colorFrom: purple
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
short_description: A tiny local model that writes code, runs it, and fixes it.
|
| 11 |
+
tags:
|
| 12 |
+
- build-small-hackathon
|
| 13 |
+
- agent
|
| 14 |
+
- code-generation
|
| 15 |
---
|
| 16 |
|
| 17 |
+
|
| 18 |
+
# smolcode 🤖
|
| 19 |
+
|
| 20 |
+
**A tiny local model that writes code, runs it, and fixes it — until it works.**
|
| 21 |
+
|
| 22 |
+
smolcode is an *agentic* coding assistant built for **small** language models. Instead of
|
| 23 |
+
autocompleting, it runs a **plan → write → execute → repair** loop: it writes a file, runs
|
| 24 |
+
it in a sandbox, reads the real error, and iterates until a test passes — on a model small
|
| 25 |
+
enough to run on your own machine (a ≤4B model on a laptop, scaling up to 32B on a
|
| 26 |
+
workstation). **No cloud APIs.**
|
| 27 |
+
|
| 28 |
+
Built for the [Hugging Face × Gradio **Build Small** Hackathon](https://huggingface.co/build-small-hackathon).
|
| 29 |
+
|
| 30 |
+
## Why it's a "Build Small" entry
|
| 31 |
+
- **Agentic on a 3B model.** The loop — not the model size — does the work. A ≤4B model
|
| 32 |
+
drives tool calls reliably enough to write, run, and self-correct code.
|
| 33 |
+
- **Local-first & private.** Talks to any OpenAI-compatible endpoint (Ollama, llama.cpp).
|
| 34 |
+
Nothing leaves your machine.
|
| 35 |
+
- **Specialty routing.** A 2D router classifies tasks into 16 language/function
|
| 36 |
+
families and escalates within each family's fine-tuned ladder before falling back
|
| 37 |
+
to bigger Granite models.
|
| 38 |
+
- **Fine-tuned tiny coder.** We fine-tuned **Qwen2.5-Coder-1.5B** to emit native tool calls
|
| 39 |
+
so a ≤2B model can be the cheap entry tier — published at
|
| 40 |
+
[`seanpoyner/smolcode-coder-1.5b-tools`](https://huggingface.co/seanpoyner/smolcode-coder-1.5b-tools).
|
| 41 |
+
- **Rust core.** Agent loop, tool execution, and tracing run through
|
| 42 |
+
[**LiteForge**](https://github.com/seanpoyner/liteforge) and **smolcode-core**
|
| 43 |
+
(Rust/PyO3). Gradio is the (required) shell; the brain is Rust.
|
| 44 |
+
|
| 45 |
+
## How to use this Space
|
| 46 |
+
1. Type a coding task, e.g. *"write a function that validates an email and test it."*
|
| 47 |
+
2. Watch the **agent trace** stream live: `write_file → run_python → (error) → fix → pass`.
|
| 48 |
+
3. The **router** badge shows which tier solved it and whether it's **✓ verified**.
|
| 49 |
+
4. Tick **⚡ fan out** and enter several lines to run independent tasks as **parallel subagents**.
|
| 50 |
+
|
| 51 |
+
## Benchmark — the loop is the product
|
| 52 |
+
The agentic loop is what makes a tiny model useful. On the same HumanEval-style suite
|
| 53 |
+
(`bench/tasks.py`, 10 tasks, pass@1):
|
| 54 |
+
|
| 55 |
+
<!-- BENCH_TABLE_START -->
|
| 56 |
+
| System | Model | pass@1 |
|
| 57 |
+
|--------|-------|--------|
|
| 58 |
+
| single-shot | fine-tuned **1.5B** | 50% |
|
| 59 |
+
| **agentic loop** | fine-tuned **1.5B** | **70%** |
|
| 60 |
+
| single-shot | granite4.1:3b | 90% |
|
| 61 |
+
|
| 62 |
+
*The write→run→fix loop lifts the fine-tuned 1.5B from **50% → 70%** (+20 pts) — the
|
| 63 |
+
loop, not raw model size, does the work. A larger model (granite 3B) scores higher
|
| 64 |
+
single-shot, which is exactly why the router escalates only when the small tier can't
|
| 65 |
+
verify. Measured with `bench/run.py` on the hal backend.*
|
| 66 |
+
<!-- BENCH_TABLE_END -->
|
| 67 |
+
|
| 68 |
+
## Under the hood
|
| 69 |
+
```
|
| 70 |
+
Gradio UI → smolcode-core / LiteForge (Rust/PyO3) → OpenAI-compatible endpoint
|
| 71 |
+
specialty router + agent loop
|
| 72 |
+
tools: write_file, read_file, run_python, run_tests
|
| 73 |
+
served by Ollama / llama.cpp
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
There's also a full terminal agent (`smolcode-cli`, a Rust ratatui TUI) and a
|
| 77 |
+
Replit/Lovable-style app builder (`smolbuilder.py`) on the same engine.
|
| 78 |
+
|
| 79 |
+
- **Code:** https://github.com/seanpoyner/smolcode
|
| 80 |
+
- **Model:** https://huggingface.co/seanpoyner/smolcode-coder-1.5b-tools
|
| 81 |
+
- **Engine:** https://github.com/seanpoyner/liteforge
|
| 82 |
+
- **App builder companion:** https://huggingface.co/spaces/seanpoyner/smolbuilder
|
| 83 |
+
|
| 84 |
+
## Demo video
|
| 85 |
+
<video controls src="https://huggingface.co/spaces/seanpoyner/smolcode/resolve/main/demo.mp4"></video>
|
| 86 |
+
|
| 87 |
+
[▶️ Watch the demo](https://huggingface.co/spaces/seanpoyner/smolcode/resolve/main/demo.mp4) — the agent writes code, runs it, fixes the failing test, and shows the router tier that solved it.
|
| 88 |
+
|
| 89 |
+
## Share
|
| 90 |
+
> Most coding tasks don't need a giant model. **smolcode** is an agentic coding agent that runs entirely on a *small local model* — it writes the code, runs it, reads the real error, and fixes itself until tests pass. Fine-tuned **1.5B** coder; the router escalates a tier only when needed (all ≤32B). Less compute, same result.
|
| 91 |
+
>
|
| 92 |
+
> Built for the #BuildSmall hackathon with @huggingface + @Gradio. 🦀 Rust core.
|
| 93 |
+
> ▶️ https://huggingface.co/spaces/seanpoyner/smolcode
|
| 94 |
+
> #SmallModels #LocalAI #Gradio #BuildSmall
|
| 95 |
+
|
| 96 |
+
📣 **Posted on LinkedIn:** https://www.linkedin.com/posts/sean-poyner_buildsmall-smallmodels-localai-share-7472421438109650944-bQGy/
|
app.py
ADDED
|
@@ -0,0 +1,849 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""smolcode — CLI-parity web UI over the Rust engine."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
|
| 11 |
+
from engine import Router, load_preset
|
| 12 |
+
from engine.config import (
|
| 13 |
+
Preset,
|
| 14 |
+
Tier,
|
| 15 |
+
is_specialty_model,
|
| 16 |
+
parse_size_b,
|
| 17 |
+
specialist_sizes,
|
| 18 |
+
)
|
| 19 |
+
from engine.branding import SMOLCODE_CSS
|
| 20 |
+
from engine.gradio_shell import (
|
| 21 |
+
AppSessionState,
|
| 22 |
+
SlashResult,
|
| 23 |
+
UiSettings,
|
| 24 |
+
dispatch_slash,
|
| 25 |
+
parse_input,
|
| 26 |
+
)
|
| 27 |
+
from engine.preflight import list_models
|
| 28 |
+
from engine.router import RouteResult
|
| 29 |
+
from engine.rust_session import (
|
| 30 |
+
RustSession,
|
| 31 |
+
apply_settings,
|
| 32 |
+
get_session_chat,
|
| 33 |
+
git_status,
|
| 34 |
+
list_background_jobs,
|
| 35 |
+
load_rust_config,
|
| 36 |
+
parse_session_label,
|
| 37 |
+
session_choices,
|
| 38 |
+
workspace_paths,
|
| 39 |
+
AUTOCOMPLETE_FILE_LIMIT,
|
| 40 |
+
UI_FILE_LIMIT,
|
| 41 |
+
)
|
| 42 |
+
from engine.trace import build_trace, save_trace
|
| 43 |
+
from engine.themes import theme_at
|
| 44 |
+
from engine.web_tui import (
|
| 45 |
+
Transcript,
|
| 46 |
+
agent_choices,
|
| 47 |
+
cycle_agent,
|
| 48 |
+
cycle_mode,
|
| 49 |
+
cycle_model,
|
| 50 |
+
cycle_think,
|
| 51 |
+
header_bar_html,
|
| 52 |
+
help_overlay_html,
|
| 53 |
+
host_from_url,
|
| 54 |
+
ingest_agent_event,
|
| 55 |
+
parse_git_header,
|
| 56 |
+
render_picker_html,
|
| 57 |
+
render_sidebar_html,
|
| 58 |
+
shell_theme_html,
|
| 59 |
+
slash_commands,
|
| 60 |
+
status_bar_html,
|
| 61 |
+
theme_picker_items,
|
| 62 |
+
whichkey_overlay_html,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
PRESET = load_preset()
|
| 66 |
+
_JS_HEAD = (Path(__file__).parent / "static" / "web_tui.js").read_text()
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@dataclass
|
| 70 |
+
class WebUiState:
|
| 71 |
+
sidebar_visible: bool = True
|
| 72 |
+
sidebar_view: str = "files"
|
| 73 |
+
sidebar_sel: int = 0
|
| 74 |
+
theme_idx: int = 0
|
| 75 |
+
overlay: str = ""
|
| 76 |
+
picker_kind: str = ""
|
| 77 |
+
picker_items: list[str] = field(default_factory=list)
|
| 78 |
+
picker_sel: int = 0
|
| 79 |
+
file_total: int = 0
|
| 80 |
+
# Blocking startup model pick: true until the user chooses from the modal.
|
| 81 |
+
needs_model_pick: bool = True
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _normalize_paths(files: list[str] | dict[str, str] | None) -> list[str]:
|
| 85 |
+
if not files:
|
| 86 |
+
return []
|
| 87 |
+
if isinstance(files, dict):
|
| 88 |
+
paths = sorted(files.keys())
|
| 89 |
+
else:
|
| 90 |
+
paths = sorted(files)
|
| 91 |
+
return paths[:UI_FILE_LIMIT]
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _cfg() -> dict:
|
| 95 |
+
return load_rust_config()
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _ensure_rust(app_state: AppSessionState, settings: UiSettings) -> RustSession:
|
| 99 |
+
if app_state.rust is None:
|
| 100 |
+
app_state.rust = RustSession(
|
| 101 |
+
workspace=settings.workspace,
|
| 102 |
+
agent=settings.agent,
|
| 103 |
+
yolo=settings.yolo,
|
| 104 |
+
model=_pinned_model(settings.model), # None for Auto -> router sets it
|
| 105 |
+
base_url=_cfg().get("base_url"),
|
| 106 |
+
approval_handler=app_state.approval.ask,
|
| 107 |
+
)
|
| 108 |
+
apply_settings(app_state.rust, settings)
|
| 109 |
+
return app_state.rust
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# --- curated model picker (Auto-first, <=32B, specialty fine-tunes collapsed) -------
|
| 113 |
+
# Each row is (label, model, think). model "auto"/"auto:<size>" are router pseudo-tags
|
| 114 |
+
# interpreted by engine/router.py + rust_session.apply_settings; think "off" means the
|
| 115 |
+
# router derives the level.
|
| 116 |
+
_AUTO_ENTRIES: list[tuple[str, str, str]] = [
|
| 117 |
+
("Auto", "auto", "off"),
|
| 118 |
+
("Auto · think low", "auto", "low"),
|
| 119 |
+
("Auto · think high", "auto", "high"),
|
| 120 |
+
("Auto · think xtra", "auto", "xtra"),
|
| 121 |
+
]
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _model_entries() -> list[tuple[str, str, str]]:
|
| 125 |
+
"""All picker rows: Auto options, one Auto·<size> per served specialist size, then
|
| 126 |
+
generic concrete models filtered to <=32B with the per-specialty fine-tunes hidden."""
|
| 127 |
+
entries = list(_AUTO_ENTRIES)
|
| 128 |
+
for sz in specialist_sizes(PRESET):
|
| 129 |
+
entries.append((f"Auto · {sz.upper()}", f"auto:{sz}", "off"))
|
| 130 |
+
seen: set[str] = set()
|
| 131 |
+
base = [t.model for t in PRESET.tiers if t.model]
|
| 132 |
+
api = list_models(_cfg().get("base_url", PRESET.base_url))
|
| 133 |
+
for m in api + base:
|
| 134 |
+
if not m or m in seen or is_specialty_model(m) or parse_size_b(m) > 32:
|
| 135 |
+
continue
|
| 136 |
+
seen.add(m)
|
| 137 |
+
entries.append((m, m, "off"))
|
| 138 |
+
return entries
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def _model_labels() -> list[str]:
|
| 142 |
+
return [lbl for lbl, _m, _t in _model_entries()]
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _label_to_selection(label: str) -> tuple[str, str] | None:
|
| 146 |
+
"""(model, think) for a picker label, or None if unknown."""
|
| 147 |
+
for lbl, m, t in _model_entries():
|
| 148 |
+
if lbl == label:
|
| 149 |
+
return m, t
|
| 150 |
+
return None
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def _model_sel_index(settings: UiSettings) -> int:
|
| 154 |
+
"""Row index matching the current (model, think); falls back to 0 (Auto)."""
|
| 155 |
+
entries = _model_entries()
|
| 156 |
+
cur_m = settings.model or "auto"
|
| 157 |
+
cur_t = settings.think or "off"
|
| 158 |
+
for i, (_l, m, t) in enumerate(entries): # exact (model, think) wins
|
| 159 |
+
if m == cur_m and t == cur_t:
|
| 160 |
+
return i
|
| 161 |
+
for i, (_l, m, _t) in enumerate(entries): # else first model match
|
| 162 |
+
if m == cur_m:
|
| 163 |
+
return i
|
| 164 |
+
return 0
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def _selection_label(settings: UiSettings) -> str:
|
| 168 |
+
"""Friendly label for the current selection (model chip in header/status)."""
|
| 169 |
+
entries = _model_entries()
|
| 170 |
+
return entries[_model_sel_index(settings)][0] if entries else "Auto"
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def _pinned_model(model_sel: str | None) -> str | None:
|
| 174 |
+
"""The concrete model tag to pin, or None for Auto/Auto·size (router-driven)."""
|
| 175 |
+
m = model_sel or ""
|
| 176 |
+
return None if (not m or m == "auto" or m.startswith("auto:")) else m
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def _effective_preset(model_sel: str | None):
|
| 180 |
+
"""(preset, size_floor) for a picker selection.
|
| 181 |
+
|
| 182 |
+
'auto' -> matrix preset (router picks size); 'auto:<size>' -> matrix + start pinned
|
| 183 |
+
to that size (still escalates); '<tag>' -> single-tier preset (pinned, no escalation).
|
| 184 |
+
"""
|
| 185 |
+
sel = model_sel or "auto"
|
| 186 |
+
if sel == "auto":
|
| 187 |
+
return PRESET, None
|
| 188 |
+
if sel.startswith("auto:"):
|
| 189 |
+
return PRESET, (sel.split(":", 1)[1] or None)
|
| 190 |
+
return (
|
| 191 |
+
Preset(key=PRESET.key, base_url=PRESET.base_url, api_key=PRESET.api_key,
|
| 192 |
+
tiers=[Tier("custom", sel)]),
|
| 193 |
+
None,
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def _picker_items(kind: str, settings: UiSettings) -> list[str]:
|
| 198 |
+
if kind == "models":
|
| 199 |
+
return _model_labels()
|
| 200 |
+
if kind == "themes":
|
| 201 |
+
return theme_picker_items()
|
| 202 |
+
if kind == "agents":
|
| 203 |
+
return agent_choices()
|
| 204 |
+
if kind == "sessions":
|
| 205 |
+
return session_choices()
|
| 206 |
+
return []
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def _picker_sel_for(kind: str, settings: UiSettings, ui: WebUiState, items: list[str]) -> int:
|
| 210 |
+
if not items:
|
| 211 |
+
return 0
|
| 212 |
+
if kind == "models":
|
| 213 |
+
return _model_sel_index(settings)
|
| 214 |
+
if kind == "themes":
|
| 215 |
+
name = theme_at(ui.theme_idx).name
|
| 216 |
+
return items.index(name) if name in items else 0
|
| 217 |
+
if kind == "agents":
|
| 218 |
+
cur = settings.agent if settings.mode != "plan" else "plan"
|
| 219 |
+
return items.index(cur) if cur in items else 0
|
| 220 |
+
return 0
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def _header(settings: UiSettings, ui: WebUiState) -> str:
|
| 224 |
+
git = git_status(settings.workspace)
|
| 225 |
+
branch, dirty = parse_git_header(git)
|
| 226 |
+
return header_bar_html(
|
| 227 |
+
git_branch=branch,
|
| 228 |
+
git_dirty=dirty,
|
| 229 |
+
model=_selection_label(settings),
|
| 230 |
+
host=host_from_url(_cfg().get("base_url", "")),
|
| 231 |
+
theme=theme_at(ui.theme_idx).name,
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def _status(settings: UiSettings, app_state: AppSessionState, *, running: bool = False) -> str:
|
| 236 |
+
title = f"session {app_state.rust.session_id[:8]}" if app_state.rust else "new session"
|
| 237 |
+
return status_bar_html(
|
| 238 |
+
settings, session_title=title,
|
| 239 |
+
model=_selection_label(settings),
|
| 240 |
+
running=running,
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _sidebar_html(ui: WebUiState, settings: UiSettings, files: list[str], app_state: AppSessionState) -> str:
|
| 245 |
+
sid = app_state.rust.session_id if app_state.rust else "(none)"
|
| 246 |
+
return render_sidebar_html(
|
| 247 |
+
view=ui.sidebar_view,
|
| 248 |
+
files=files,
|
| 249 |
+
selected=ui.sidebar_sel,
|
| 250 |
+
session_id=sid,
|
| 251 |
+
agent=settings.agent,
|
| 252 |
+
file_total=ui.file_total or len(files),
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def _overlay_html(ui: WebUiState) -> str:
|
| 257 |
+
if ui.overlay == "help":
|
| 258 |
+
return f'<div class="sc-overlay"><div class="sc-overlay-panel">{help_overlay_html()}</div></div>'
|
| 259 |
+
if ui.overlay == "whichkey":
|
| 260 |
+
return f'<div class="sc-overlay"><div class="sc-overlay-panel">{whichkey_overlay_html()}</div></div>'
|
| 261 |
+
if ui.overlay == "picker" and ui.picker_kind:
|
| 262 |
+
panel = render_picker_html(
|
| 263 |
+
ui.picker_kind,
|
| 264 |
+
ui.picker_items,
|
| 265 |
+
ui.picker_sel,
|
| 266 |
+
title=ui.picker_kind,
|
| 267 |
+
)
|
| 268 |
+
return f'<div class="sc-overlay"><div class="sc-overlay-panel">{panel}</div></div>'
|
| 269 |
+
return ""
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def _js_boot_lines(settings: UiSettings, files: list[str]) -> str:
|
| 273 |
+
cmds = slash_commands(settings.workspace)
|
| 274 |
+
paths = sorted(files)[:AUTOCOMPLETE_FILE_LIMIT]
|
| 275 |
+
return (
|
| 276 |
+
f"window.__smolcode_workspace={json.dumps(settings.workspace)};"
|
| 277 |
+
f"window.__smolcode_commands={json.dumps(cmds)};"
|
| 278 |
+
f"window.__smolcode_files={json.dumps(paths)};"
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def _embed_js(settings: UiSettings, files: list[str]) -> str:
|
| 283 |
+
return f"<script>{_js_boot_lines(settings, files)}</script>"
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def _outputs(
|
| 287 |
+
transcript: Transcript,
|
| 288 |
+
app_state: AppSessionState,
|
| 289 |
+
settings: UiSettings,
|
| 290 |
+
ui: WebUiState,
|
| 291 |
+
files: list[str],
|
| 292 |
+
*,
|
| 293 |
+
running: bool = False,
|
| 294 |
+
trace_path: str | None = None,
|
| 295 |
+
):
|
| 296 |
+
overlay_val = _overlay_html(ui)
|
| 297 |
+
return (
|
| 298 |
+
transcript.render_html(running=running),
|
| 299 |
+
_header(settings, ui),
|
| 300 |
+
_status(settings, app_state, running=running),
|
| 301 |
+
gr.update(value=_sidebar_html(ui, settings, files, app_state), visible=ui.sidebar_visible),
|
| 302 |
+
gr.update(value=overlay_val, visible=bool(overlay_val)),
|
| 303 |
+
shell_theme_html(ui.theme_idx),
|
| 304 |
+
gr.update(visible=bool(app_state.approval.pending_desc)),
|
| 305 |
+
app_state.approval.pending_desc or "",
|
| 306 |
+
files,
|
| 307 |
+
trace_path,
|
| 308 |
+
app_state,
|
| 309 |
+
settings,
|
| 310 |
+
ui,
|
| 311 |
+
transcript,
|
| 312 |
+
"", # clear editor
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def _apply_slash_ui(sr: SlashResult, settings: UiSettings, ui: WebUiState, transcript: Transcript):
|
| 317 |
+
if sr.cycle_mode:
|
| 318 |
+
settings.mode = cycle_mode(settings.mode)
|
| 319 |
+
transcript.append_info(f"mode → {settings.mode}")
|
| 320 |
+
if sr.cycle_think:
|
| 321 |
+
settings.think = cycle_think(settings.think)
|
| 322 |
+
transcript.append_info(f"think → {settings.think}")
|
| 323 |
+
if sr.set_think:
|
| 324 |
+
settings.think = sr.set_think
|
| 325 |
+
transcript.append_info(f"think → {settings.think}")
|
| 326 |
+
if sr.toggle_sidebar:
|
| 327 |
+
ui.sidebar_visible = not ui.sidebar_visible
|
| 328 |
+
if sr.toggle_sidebar_view:
|
| 329 |
+
ui.sidebar_view = "stats" if ui.sidebar_view == "files" else "files"
|
| 330 |
+
if sr.show_help:
|
| 331 |
+
ui.overlay = "help"
|
| 332 |
+
if sr.show_whichkey:
|
| 333 |
+
ui.overlay = "whichkey"
|
| 334 |
+
if sr.open_picker:
|
| 335 |
+
ui.overlay = "picker"
|
| 336 |
+
ui.picker_kind = sr.open_picker
|
| 337 |
+
ui.picker_items = _picker_items(sr.open_picker, settings)
|
| 338 |
+
ui.picker_sel = _picker_sel_for(sr.open_picker, settings, ui, ui.picker_items)
|
| 339 |
+
transcript.append_info(f"picker → {sr.open_picker}")
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
async def _run_agent_turn(
|
| 343 |
+
task: str,
|
| 344 |
+
transcript: Transcript,
|
| 345 |
+
app_state: AppSessionState,
|
| 346 |
+
settings: UiSettings,
|
| 347 |
+
ui: WebUiState,
|
| 348 |
+
files: list[str],
|
| 349 |
+
):
|
| 350 |
+
# Blocking model pick: refuse to run until the user has chosen from the modal.
|
| 351 |
+
if ui.needs_model_pick:
|
| 352 |
+
ui.overlay = "picker"
|
| 353 |
+
ui.picker_kind = "models"
|
| 354 |
+
ui.picker_items = _model_labels()
|
| 355 |
+
ui.picker_sel = _model_sel_index(settings)
|
| 356 |
+
transcript.append_info("pick a model to start — Auto is recommended")
|
| 357 |
+
yield _outputs(transcript, app_state, settings, ui, files)
|
| 358 |
+
return
|
| 359 |
+
|
| 360 |
+
rust = _ensure_rust(app_state, settings)
|
| 361 |
+
rust.clear_cancel()
|
| 362 |
+
preset, size_floor = _effective_preset(settings.model)
|
| 363 |
+
router = Router(
|
| 364 |
+
preset=preset,
|
| 365 |
+
approval_handler=app_state.approval.ask,
|
| 366 |
+
workspace_dir=settings.workspace,
|
| 367 |
+
think=settings.think,
|
| 368 |
+
yolo=settings.yolo,
|
| 369 |
+
agent=settings.agent,
|
| 370 |
+
size_floor=size_floor,
|
| 371 |
+
)
|
| 372 |
+
ladder, start, _think = router._route(task) # real routing for the badge
|
| 373 |
+
transcript.append_user(task)
|
| 374 |
+
transcript.append_info(f"routed to {ladder.tiers[start].name}")
|
| 375 |
+
ui.overlay = ""
|
| 376 |
+
yield _outputs(transcript, app_state, settings, ui, files, running=True)
|
| 377 |
+
|
| 378 |
+
result: RouteResult | None = None
|
| 379 |
+
async for frame in router.run_live(task, rust_session=rust):
|
| 380 |
+
if frame.raw_event:
|
| 381 |
+
ingest_agent_event(transcript, frame.raw_event)
|
| 382 |
+
if frame.files:
|
| 383 |
+
files = _normalize_paths(frame.files)
|
| 384 |
+
if frame.done and isinstance(frame.result, RouteResult):
|
| 385 |
+
result = frame.result
|
| 386 |
+
if rust.cancelled:
|
| 387 |
+
transcript.append_error("interrupted")
|
| 388 |
+
yield _outputs(transcript, app_state, settings, ui, files, running=not frame.done)
|
| 389 |
+
|
| 390 |
+
trace_path = None
|
| 391 |
+
if result and result.agent and not rust.cancelled:
|
| 392 |
+
app_state.bg_jobs = list_background_jobs()
|
| 393 |
+
rust.save()
|
| 394 |
+
try:
|
| 395 |
+
trace_path = str(save_trace(build_trace(
|
| 396 |
+
result.agent, task, result.final,
|
| 397 |
+
preset=PRESET.key, model=result.tier_model,
|
| 398 |
+
)))
|
| 399 |
+
except Exception:
|
| 400 |
+
pass
|
| 401 |
+
yield _outputs(transcript, app_state, settings, ui, files, trace_path=trace_path)
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
async def respond(
|
| 405 |
+
message: str,
|
| 406 |
+
transcript: Transcript,
|
| 407 |
+
app_state: AppSessionState,
|
| 408 |
+
settings: UiSettings,
|
| 409 |
+
ui: WebUiState,
|
| 410 |
+
files: list[str],
|
| 411 |
+
):
|
| 412 |
+
message = (message or "").strip()
|
| 413 |
+
app_state.settings = settings
|
| 414 |
+
|
| 415 |
+
if not message:
|
| 416 |
+
yield _outputs(transcript, app_state, settings, ui, files)
|
| 417 |
+
return
|
| 418 |
+
|
| 419 |
+
_task, slash, shell_cmd = parse_input(
|
| 420 |
+
message,
|
| 421 |
+
workspace_files=files,
|
| 422 |
+
workspace=settings.workspace,
|
| 423 |
+
rust=app_state.rust,
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
if shell_cmd:
|
| 427 |
+
rust = _ensure_rust(app_state, settings)
|
| 428 |
+
out = rust.run_shell(shell_cmd)
|
| 429 |
+
transcript.append_user(f"!{shell_cmd}")
|
| 430 |
+
transcript.append_info(out)
|
| 431 |
+
yield _outputs(transcript, app_state, settings, ui, files)
|
| 432 |
+
return
|
| 433 |
+
|
| 434 |
+
if slash:
|
| 435 |
+
if slash.startswith("/search "):
|
| 436 |
+
q = slash.split(maxsplit=1)[1]
|
| 437 |
+
hits = transcript.search(q)
|
| 438 |
+
transcript.append_user(slash)
|
| 439 |
+
transcript.append_info("\n".join(hits) if hits else f"no matches for '{q}'")
|
| 440 |
+
yield _outputs(transcript, app_state, settings, ui, files)
|
| 441 |
+
return
|
| 442 |
+
|
| 443 |
+
sr = dispatch_slash(slash, app_state)
|
| 444 |
+
_apply_slash_ui(sr, settings, ui, transcript)
|
| 445 |
+
if sr.clear_chat:
|
| 446 |
+
transcript.clear()
|
| 447 |
+
if sr.reply:
|
| 448 |
+
transcript.append_user(slash)
|
| 449 |
+
plain = sr.reply.replace("**", "").replace("`", "")
|
| 450 |
+
transcript.append_info(plain)
|
| 451 |
+
if sr.queued_task:
|
| 452 |
+
async for out in _run_agent_turn(sr.queued_task, transcript, app_state, settings, ui, files):
|
| 453 |
+
yield out
|
| 454 |
+
return
|
| 455 |
+
yield _outputs(transcript, app_state, settings, ui, files, trace_path=sr.download_path)
|
| 456 |
+
return
|
| 457 |
+
|
| 458 |
+
async for out in _run_agent_turn(_task, transcript, app_state, settings, ui, files):
|
| 459 |
+
yield out
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def on_interrupt(app_state: AppSessionState):
|
| 463 |
+
if app_state.rust:
|
| 464 |
+
app_state.rust.request_cancel()
|
| 465 |
+
return app_state
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
def on_clear(transcript: Transcript, ui: WebUiState):
|
| 469 |
+
transcript.clear()
|
| 470 |
+
ui.overlay = ""
|
| 471 |
+
ui.picker_kind = ""
|
| 472 |
+
ui.picker_items = []
|
| 473 |
+
ui.picker_sel = 0
|
| 474 |
+
return transcript, ui, ""
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def on_close_overlay(ui: WebUiState):
|
| 478 |
+
ui.overlay = ""
|
| 479 |
+
ui.picker_kind = ""
|
| 480 |
+
ui.picker_items = []
|
| 481 |
+
ui.picker_sel = 0
|
| 482 |
+
return ui, gr.update(value="", visible=False)
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
def on_open_picker(kind: str, ui: WebUiState, settings: UiSettings):
|
| 486 |
+
ui.overlay = "picker"
|
| 487 |
+
ui.picker_kind = kind
|
| 488 |
+
ui.picker_items = _picker_items(kind, settings)
|
| 489 |
+
ui.picker_sel = _picker_sel_for(kind, settings, ui, ui.picker_items)
|
| 490 |
+
val = _overlay_html(ui)
|
| 491 |
+
return ui, gr.update(value=val, visible=True)
|
| 492 |
+
|
| 493 |
+
|
| 494 |
+
def on_picker_nav(delta: int, ui: WebUiState):
|
| 495 |
+
if ui.picker_items:
|
| 496 |
+
ui.picker_sel = max(0, min(len(ui.picker_items) - 1, ui.picker_sel + delta))
|
| 497 |
+
val = _overlay_html(ui)
|
| 498 |
+
return ui, gr.update(value=val, visible=bool(val))
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
def on_picker_select(
|
| 502 |
+
pick_idx: str,
|
| 503 |
+
ui: WebUiState,
|
| 504 |
+
settings: UiSettings,
|
| 505 |
+
app_state: AppSessionState,
|
| 506 |
+
transcript: Transcript,
|
| 507 |
+
files: list[str],
|
| 508 |
+
):
|
| 509 |
+
try:
|
| 510 |
+
idx = int(pick_idx) if pick_idx else ui.picker_sel
|
| 511 |
+
except ValueError:
|
| 512 |
+
idx = ui.picker_sel
|
| 513 |
+
kind = ui.picker_kind
|
| 514 |
+
items = ui.picker_items
|
| 515 |
+
if items:
|
| 516 |
+
idx = max(0, min(len(items) - 1, idx))
|
| 517 |
+
item = items[idx]
|
| 518 |
+
if kind == "models":
|
| 519 |
+
sel = _label_to_selection(item)
|
| 520 |
+
if sel:
|
| 521 |
+
settings.model, settings.think = sel
|
| 522 |
+
ui.needs_model_pick = False
|
| 523 |
+
transcript.append_info(f"model → {item}")
|
| 524 |
+
elif kind == "themes":
|
| 525 |
+
if item in theme_names():
|
| 526 |
+
ui.theme_idx = theme_names().index(item)
|
| 527 |
+
transcript.append_info(f"theme → {item}")
|
| 528 |
+
elif kind == "agents":
|
| 529 |
+
if settings.mode != "plan":
|
| 530 |
+
settings.agent = item
|
| 531 |
+
transcript.append_info(f"agent → {item}")
|
| 532 |
+
elif kind == "sessions":
|
| 533 |
+
sid = parse_session_label(item)
|
| 534 |
+
if sid:
|
| 535 |
+
rust = RustSession(workspace=settings.workspace, agent=settings.agent, yolo=settings.yolo)
|
| 536 |
+
if rust.load_session(sid):
|
| 537 |
+
app_state.rust = rust
|
| 538 |
+
transcript.clear()
|
| 539 |
+
transcript.from_stored_chat(get_session_chat(sid))
|
| 540 |
+
transcript.append_info(f"loaded session {sid[:8]}")
|
| 541 |
+
ui.overlay = ""
|
| 542 |
+
ui.picker_kind = ""
|
| 543 |
+
ui.picker_items = []
|
| 544 |
+
ui.picker_sel = 0
|
| 545 |
+
overlay_val = _overlay_html(ui)
|
| 546 |
+
return (
|
| 547 |
+
transcript.render_html(),
|
| 548 |
+
_header(settings, ui),
|
| 549 |
+
_status(settings, app_state),
|
| 550 |
+
gr.update(value=overlay_val, visible=False),
|
| 551 |
+
shell_theme_html(ui.theme_idx),
|
| 552 |
+
settings,
|
| 553 |
+
ui,
|
| 554 |
+
transcript,
|
| 555 |
+
app_state,
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
|
| 559 |
+
def _cycle_outputs(
|
| 560 |
+
settings: UiSettings,
|
| 561 |
+
ui: WebUiState,
|
| 562 |
+
app_state: AppSessionState,
|
| 563 |
+
transcript: Transcript,
|
| 564 |
+
):
|
| 565 |
+
return (
|
| 566 |
+
settings,
|
| 567 |
+
transcript,
|
| 568 |
+
transcript.render_html(),
|
| 569 |
+
_header(settings, ui),
|
| 570 |
+
_status(settings, app_state),
|
| 571 |
+
shell_theme_html(ui.theme_idx),
|
| 572 |
+
)
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
def on_toggle_sidebar(ui: WebUiState, settings: UiSettings, files: list[str], app_state: AppSessionState):
|
| 576 |
+
ui.sidebar_visible = not ui.sidebar_visible
|
| 577 |
+
return ui, gr.update(
|
| 578 |
+
value=_sidebar_html(ui, settings, files, app_state),
|
| 579 |
+
visible=ui.sidebar_visible,
|
| 580 |
+
)
|
| 581 |
+
|
| 582 |
+
|
| 583 |
+
def on_toggle_sidebar_view(
|
| 584 |
+
ui: WebUiState, settings: UiSettings, files: list[str], app_state: AppSessionState,
|
| 585 |
+
):
|
| 586 |
+
ui.sidebar_view = "stats" if ui.sidebar_view == "files" else "files"
|
| 587 |
+
return ui, gr.update(value=_sidebar_html(ui, settings, files, app_state))
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
def on_load(settings: UiSettings, app_state: AppSessionState, ui: WebUiState):
|
| 591 |
+
paths, total = workspace_paths(settings.workspace)
|
| 592 |
+
ui.file_total = total
|
| 593 |
+
overlay_val = ""
|
| 594 |
+
if ui.needs_model_pick: # blocking startup model picker
|
| 595 |
+
ui.overlay = "picker"
|
| 596 |
+
ui.picker_kind = "models"
|
| 597 |
+
ui.picker_items = _model_labels()
|
| 598 |
+
ui.picker_sel = _model_sel_index(settings)
|
| 599 |
+
overlay_val = _overlay_html(ui)
|
| 600 |
+
return (
|
| 601 |
+
_sidebar_html(ui, settings, paths, app_state),
|
| 602 |
+
paths,
|
| 603 |
+
_embed_js(settings, paths),
|
| 604 |
+
gr.update(choices=session_choices()),
|
| 605 |
+
gr.update(value=overlay_val, visible=bool(overlay_val)),
|
| 606 |
+
ui,
|
| 607 |
+
)
|
| 608 |
+
|
| 609 |
+
|
| 610 |
+
def on_cycle_mode(settings: UiSettings, ui: WebUiState, app_state: AppSessionState, transcript: Transcript):
|
| 611 |
+
settings.mode = cycle_mode(settings.mode)
|
| 612 |
+
if settings.mode == "plan":
|
| 613 |
+
settings.agent = "plan"
|
| 614 |
+
elif settings.agent == "plan":
|
| 615 |
+
settings.agent = "build"
|
| 616 |
+
settings.yolo = settings.mode == "auto"
|
| 617 |
+
transcript.append_info(f"mode → {settings.mode}")
|
| 618 |
+
return _cycle_outputs(settings, ui, app_state, transcript)
|
| 619 |
+
|
| 620 |
+
|
| 621 |
+
def on_cycle_agent(settings: UiSettings, ui: WebUiState, app_state: AppSessionState, transcript: Transcript):
|
| 622 |
+
if settings.mode != "plan":
|
| 623 |
+
settings.agent = cycle_agent(settings.agent)
|
| 624 |
+
transcript.append_info(f"agent → {settings.agent}")
|
| 625 |
+
return _cycle_outputs(settings, ui, app_state, transcript)
|
| 626 |
+
|
| 627 |
+
|
| 628 |
+
def on_cycle_model(settings: UiSettings, ui: WebUiState, app_state: AppSessionState, transcript: Transcript):
|
| 629 |
+
labels = _model_labels()
|
| 630 |
+
nxt = cycle_model(labels, _selection_label(settings))
|
| 631 |
+
sel = _label_to_selection(nxt)
|
| 632 |
+
if sel:
|
| 633 |
+
settings.model, settings.think = sel
|
| 634 |
+
ui.needs_model_pick = False
|
| 635 |
+
transcript.append_info(f"model → {nxt}")
|
| 636 |
+
return _cycle_outputs(settings, ui, app_state, transcript)
|
| 637 |
+
|
| 638 |
+
|
| 639 |
+
def on_cycle_think(settings: UiSettings, ui: WebUiState, app_state: AppSessionState, transcript: Transcript):
|
| 640 |
+
settings.think = cycle_think(settings.think)
|
| 641 |
+
transcript.append_info(f"think → {settings.think}")
|
| 642 |
+
return _cycle_outputs(settings, ui, app_state, transcript)
|
| 643 |
+
|
| 644 |
+
|
| 645 |
+
def on_help(ui: WebUiState):
|
| 646 |
+
ui.overlay = "help"
|
| 647 |
+
val = _overlay_html(ui)
|
| 648 |
+
return ui, gr.update(value=val, visible=True)
|
| 649 |
+
|
| 650 |
+
|
| 651 |
+
def on_whichkey(ui: WebUiState):
|
| 652 |
+
ui.overlay = "whichkey"
|
| 653 |
+
val = _overlay_html(ui)
|
| 654 |
+
return ui, gr.update(value=val, visible=True)
|
| 655 |
+
|
| 656 |
+
|
| 657 |
+
def on_new_session():
|
| 658 |
+
settings = UiSettings(workspace=os.environ.get("SMALLCODE_WORKSPACE", "."), model="auto")
|
| 659 |
+
ui = WebUiState() # needs_model_pick defaults True -> reopen the blocking picker
|
| 660 |
+
ui.overlay = "picker"
|
| 661 |
+
ui.picker_kind = "models"
|
| 662 |
+
ui.picker_items = _model_labels()
|
| 663 |
+
ui.picker_sel = _model_sel_index(settings)
|
| 664 |
+
return (
|
| 665 |
+
Transcript(), AppSessionState(), settings, ui, [], None,
|
| 666 |
+
gr.update(value=_overlay_html(ui), visible=True),
|
| 667 |
+
)
|
| 668 |
+
|
| 669 |
+
|
| 670 |
+
def on_approval(yes: bool, app_state: AppSessionState):
|
| 671 |
+
if app_state:
|
| 672 |
+
app_state.approval.approve(yes)
|
| 673 |
+
return gr.update(visible=False), ""
|
| 674 |
+
|
| 675 |
+
|
| 676 |
+
def on_session_pick(label: str, app_state: AppSessionState, settings: UiSettings):
|
| 677 |
+
sid = parse_session_label(label or "")
|
| 678 |
+
if not sid:
|
| 679 |
+
return Transcript(), app_state
|
| 680 |
+
rust = RustSession(workspace=settings.workspace, agent=settings.agent, yolo=settings.yolo)
|
| 681 |
+
if not rust.load_session(sid):
|
| 682 |
+
return Transcript(), app_state
|
| 683 |
+
app_state.rust = rust
|
| 684 |
+
t = Transcript()
|
| 685 |
+
t.from_stored_chat(get_session_chat(sid))
|
| 686 |
+
return t, app_state
|
| 687 |
+
|
| 688 |
+
|
| 689 |
+
def build() -> gr.Blocks:
|
| 690 |
+
default_ws = os.environ.get("SMALLCODE_WORKSPACE", ".")
|
| 691 |
+
# Default selection is Auto (router-driven); the blocking startup modal lets the
|
| 692 |
+
# user confirm or change it before the first task.
|
| 693 |
+
settings = UiSettings(workspace=default_ws, model="auto")
|
| 694 |
+
|
| 695 |
+
with gr.Blocks(
|
| 696 |
+
css=SMOLCODE_CSS,
|
| 697 |
+
title="smolcode",
|
| 698 |
+
theme=gr.themes.Soft(primary_hue="purple", neutral_hue="slate"),
|
| 699 |
+
head=f"<script>{_JS_HEAD}\n{_js_boot_lines(settings, [])}</script>",
|
| 700 |
+
fill_height=True,
|
| 701 |
+
fill_width=True,
|
| 702 |
+
) as demo:
|
| 703 |
+
transcript = gr.State(Transcript())
|
| 704 |
+
app_state = gr.State(AppSessionState(settings=settings))
|
| 705 |
+
settings_state = gr.State(settings)
|
| 706 |
+
ui_state = gr.State(WebUiState())
|
| 707 |
+
files_state = gr.State([])
|
| 708 |
+
trace_state = gr.State(None)
|
| 709 |
+
|
| 710 |
+
with gr.Column(elem_classes="sc-tui-shell"):
|
| 711 |
+
header = gr.HTML(_header(settings, WebUiState()))
|
| 712 |
+
shell_theme = gr.HTML(shell_theme_html(0), visible=False)
|
| 713 |
+
with gr.Row(elem_classes="sc-main-row"):
|
| 714 |
+
sidebar = gr.HTML(
|
| 715 |
+
_sidebar_html(WebUiState(), settings, [], AppSessionState()),
|
| 716 |
+
elem_classes="sc-sidebar",
|
| 717 |
+
visible=True,
|
| 718 |
+
)
|
| 719 |
+
with gr.Column(elem_classes="sc-main-col"):
|
| 720 |
+
transcript_view = gr.HTML(Transcript().render_html())
|
| 721 |
+
with gr.Group(elem_classes="sc-editor-wrap"):
|
| 722 |
+
gr.HTML(
|
| 723 |
+
'<div class="sc-editor-hint">'
|
| 724 |
+
"Enter run · Shift+Enter newline · / commands · ctrl+x leader"
|
| 725 |
+
"</div>"
|
| 726 |
+
)
|
| 727 |
+
editor = gr.Textbox(
|
| 728 |
+
placeholder="type a task…",
|
| 729 |
+
lines=5,
|
| 730 |
+
max_lines=8,
|
| 731 |
+
show_label=False,
|
| 732 |
+
elem_id="sc-editor",
|
| 733 |
+
interactive=True,
|
| 734 |
+
autofocus=True,
|
| 735 |
+
)
|
| 736 |
+
with gr.Group(visible=False) as approval_box:
|
| 737 |
+
approval_desc = gr.Markdown("", elem_classes="sc-approval")
|
| 738 |
+
with gr.Row():
|
| 739 |
+
gr.Button("Approve", variant="primary").click(
|
| 740 |
+
lambda s: on_approval(True, s), app_state, [approval_box, approval_desc])
|
| 741 |
+
gr.Button("Deny").click(
|
| 742 |
+
lambda s: on_approval(False, s), app_state, [approval_box, approval_desc])
|
| 743 |
+
status = gr.HTML(_status(settings, AppSessionState()), elem_classes="sc-status-wrap")
|
| 744 |
+
|
| 745 |
+
overlay = gr.HTML("", visible=False)
|
| 746 |
+
js_boot = gr.HTML(_embed_js(settings, []), elem_classes=["sc-hidden-controls"])
|
| 747 |
+
|
| 748 |
+
# Off-screen controls (visible=True so Gradio mounts them for JS shortcuts).
|
| 749 |
+
_hid = ["sc-hidden-btn"]
|
| 750 |
+
with gr.Row(elem_classes="sc-hidden-controls"):
|
| 751 |
+
btn_submit = gr.Button("submit", elem_id="sc-submit", elem_classes=_hid)
|
| 752 |
+
btn_clear = gr.Button("clear", elem_id="sc-clear", elem_classes=_hid)
|
| 753 |
+
btn_interrupt = gr.Button("interrupt", elem_id="sc-interrupt", elem_classes=_hid)
|
| 754 |
+
btn_toggle_sidebar = gr.Button("sidebar", elem_id="sc-toggle-sidebar", elem_classes=_hid)
|
| 755 |
+
btn_toggle_view = gr.Button("view", elem_id="sc-toggle-sidebar-view", elem_classes=_hid)
|
| 756 |
+
btn_cycle_mode = gr.Button("mode", elem_id="sc-cycle-mode", elem_classes=_hid)
|
| 757 |
+
btn_cycle_agent = gr.Button("agent", elem_id="sc-cycle-agent", elem_classes=_hid)
|
| 758 |
+
btn_cycle_model = gr.Button("model", elem_id="sc-cycle-model", elem_classes=_hid)
|
| 759 |
+
btn_cycle_think = gr.Button("think", elem_id="sc-cycle-think", elem_classes=_hid)
|
| 760 |
+
btn_help = gr.Button("help", elem_id="sc-help", elem_classes=_hid)
|
| 761 |
+
btn_whichkey = gr.Button("wk", elem_id="sc-whichkey", elem_classes=_hid)
|
| 762 |
+
btn_close = gr.Button("close", elem_id="sc-close-overlay", elem_classes=_hid)
|
| 763 |
+
btn_new = gr.Button("new", elem_id="sc-new-session", elem_classes=_hid)
|
| 764 |
+
btn_open_models = gr.Button("models", elem_id="sc-open-picker-models", elem_classes=_hid)
|
| 765 |
+
btn_open_themes = gr.Button("themes", elem_id="sc-open-picker-themes", elem_classes=_hid)
|
| 766 |
+
btn_open_agents = gr.Button("agents", elem_id="sc-open-picker-agents", elem_classes=_hid)
|
| 767 |
+
btn_open_sessions = gr.Button("sessions", elem_id="sc-open-picker-sessions", elem_classes=_hid)
|
| 768 |
+
btn_picker_up = gr.Button("up", elem_id="sc-picker-up", elem_classes=_hid)
|
| 769 |
+
btn_picker_down = gr.Button("down", elem_id="sc-picker-down", elem_classes=_hid)
|
| 770 |
+
btn_picker_confirm = gr.Button("confirm", elem_id="sc-picker-confirm", elem_classes=_hid)
|
| 771 |
+
picker_pick = gr.Textbox("", elem_id="sc-picker-pick", elem_classes=_hid, show_label=False)
|
| 772 |
+
session_pick = gr.Dropdown(choices=session_choices(), label="session", elem_id="sc-pick-sessions", elem_classes=_hid)
|
| 773 |
+
trace_dl = gr.DownloadButton("trace", elem_classes=_hid)
|
| 774 |
+
|
| 775 |
+
out = [
|
| 776 |
+
transcript_view, header, status, sidebar,
|
| 777 |
+
overlay, shell_theme, approval_box, approval_desc,
|
| 778 |
+
files_state, trace_state, app_state, settings_state, ui_state, transcript, editor,
|
| 779 |
+
]
|
| 780 |
+
|
| 781 |
+
cycle_out = [
|
| 782 |
+
settings_state, transcript, transcript_view, header, status, shell_theme,
|
| 783 |
+
]
|
| 784 |
+
picker_out = [
|
| 785 |
+
transcript_view, header, status, overlay, shell_theme,
|
| 786 |
+
settings_state, ui_state, transcript, app_state,
|
| 787 |
+
]
|
| 788 |
+
|
| 789 |
+
respond_in = [editor, transcript, app_state, settings_state, ui_state, files_state]
|
| 790 |
+
btn_submit.click(respond, respond_in, out).then(lambda p: p, trace_state, trace_dl)
|
| 791 |
+
editor.submit(respond, respond_in, out).then(lambda p: p, trace_state, trace_dl)
|
| 792 |
+
|
| 793 |
+
btn_clear.click(on_clear, [transcript, ui_state], [transcript, ui_state, editor])
|
| 794 |
+
btn_interrupt.click(on_interrupt, app_state, app_state)
|
| 795 |
+
btn_toggle_sidebar.click(
|
| 796 |
+
on_toggle_sidebar, [ui_state, settings_state, files_state, app_state], [ui_state, sidebar])
|
| 797 |
+
btn_toggle_view.click(
|
| 798 |
+
on_toggle_sidebar_view,
|
| 799 |
+
[ui_state, settings_state, files_state, app_state],
|
| 800 |
+
[ui_state, sidebar],
|
| 801 |
+
)
|
| 802 |
+
btn_cycle_mode.click(
|
| 803 |
+
on_cycle_mode, [settings_state, ui_state, app_state, transcript], cycle_out)
|
| 804 |
+
btn_cycle_agent.click(
|
| 805 |
+
on_cycle_agent, [settings_state, ui_state, app_state, transcript], cycle_out)
|
| 806 |
+
btn_cycle_model.click(
|
| 807 |
+
on_cycle_model, [settings_state, ui_state, app_state, transcript], cycle_out)
|
| 808 |
+
btn_cycle_think.click(
|
| 809 |
+
on_cycle_think, [settings_state, ui_state, app_state, transcript], cycle_out)
|
| 810 |
+
btn_help.click(on_help, ui_state, [ui_state, overlay])
|
| 811 |
+
btn_whichkey.click(on_whichkey, ui_state, [ui_state, overlay])
|
| 812 |
+
btn_close.click(on_close_overlay, ui_state, [ui_state, overlay])
|
| 813 |
+
btn_new.click(on_new_session, None, [transcript, app_state, settings_state, ui_state, files_state, trace_state, overlay])
|
| 814 |
+
btn_open_models.click(lambda ui, s: on_open_picker("models", ui, s), [ui_state, settings_state], [ui_state, overlay])
|
| 815 |
+
btn_open_themes.click(lambda ui, s: on_open_picker("themes", ui, s), [ui_state, settings_state], [ui_state, overlay])
|
| 816 |
+
btn_open_agents.click(lambda ui, s: on_open_picker("agents", ui, s), [ui_state, settings_state], [ui_state, overlay])
|
| 817 |
+
btn_open_sessions.click(lambda ui, s: on_open_picker("sessions", ui, s), [ui_state, settings_state], [ui_state, overlay])
|
| 818 |
+
btn_picker_up.click(lambda ui: on_picker_nav(-1, ui), ui_state, [ui_state, overlay])
|
| 819 |
+
btn_picker_down.click(lambda ui: on_picker_nav(1, ui), ui_state, [ui_state, overlay])
|
| 820 |
+
btn_picker_confirm.click(
|
| 821 |
+
on_picker_select,
|
| 822 |
+
[picker_pick, ui_state, settings_state, app_state, transcript, files_state],
|
| 823 |
+
picker_out,
|
| 824 |
+
)
|
| 825 |
+
session_pick.change(on_session_pick, [session_pick, app_state, settings_state], [transcript, app_state])
|
| 826 |
+
|
| 827 |
+
demo.load(
|
| 828 |
+
on_load,
|
| 829 |
+
[settings_state, app_state, ui_state],
|
| 830 |
+
[sidebar, files_state, js_boot, session_pick, overlay, ui_state],
|
| 831 |
+
)
|
| 832 |
+
|
| 833 |
+
return demo
|
| 834 |
+
|
| 835 |
+
|
| 836 |
+
if __name__ == "__main__":
|
| 837 |
+
from engine.preflight import preflight
|
| 838 |
+
|
| 839 |
+
preflight(PRESET)
|
| 840 |
+
host = os.environ.get("SMOLCODE_HOST", "127.0.0.1")
|
| 841 |
+
os.environ["GRADIO_SERVER_PORT"] = os.environ.get("SMOLCODE_PORT", "7860")
|
| 842 |
+
os.environ["GRADIO_SERVER_NAME"] = host
|
| 843 |
+
# server_port=None lets Gradio scan GRADIO_SERVER_PORT..+99 (skips ghost 7860-7862).
|
| 844 |
+
# ssr_mode=False: SSR (default on HF when Node is present) renders before the
|
| 845 |
+
# custom web_tui.js applies the fixed-height layout, leaving the file sidebar
|
| 846 |
+
# uncapped (grows forever, hides the bottom bar/model picker). Client-side render
|
| 847 |
+
# applies the layout immediately.
|
| 848 |
+
build().queue().launch(server_name=host, server_port=None, show_api=False,
|
| 849 |
+
ssr_mode=False)
|
demo.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d786d4033bd453a36291aeb17f5999f5ca579c9553762d25bf72770b5d37c165
|
| 3 |
+
size 5896625
|
engine/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""smolcode engine package."""
|
| 2 |
+
from .agent import SmallCodeAgent, Step
|
| 3 |
+
from .builder import BuildResult, WebBuilder
|
| 4 |
+
from .config import (
|
| 5 |
+
Preset,
|
| 6 |
+
SpecialistLadder,
|
| 7 |
+
SpecialistPreset,
|
| 8 |
+
Tier,
|
| 9 |
+
default_ui_model,
|
| 10 |
+
load_preset,
|
| 11 |
+
)
|
| 12 |
+
from .fanout import FanoutResult, fan_out, fan_out_live, summarize
|
| 13 |
+
from .preview import inline_app, preview_iframe
|
| 14 |
+
from .router import Router, RouteResult, classify_specialty, classify_tier
|
| 15 |
+
from .rust_session import RustSession, rust_available
|
| 16 |
+
|
| 17 |
+
__all__ = ["SmallCodeAgent", "Step", "Preset", "Tier", "load_preset", "default_ui_model",
|
| 18 |
+
"SpecialistLadder", "SpecialistPreset",
|
| 19 |
+
"Router", "RouteResult", "classify_tier", "classify_specialty",
|
| 20 |
+
"FanoutResult", "fan_out", "fan_out_live", "summarize",
|
| 21 |
+
"WebBuilder", "BuildResult", "inline_app", "preview_iframe",
|
| 22 |
+
"RustSession", "rust_available"]
|
engine/agent.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""smolcode agent engine — backed by the Rust smolcode_core agent loop."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import asyncio
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
from collections.abc import Callable
|
| 8 |
+
from dataclasses import dataclass
|
| 9 |
+
|
| 10 |
+
from .config import Preset, load_preset
|
| 11 |
+
from .rust_session import RustRunResult, RustSession, rust_available
|
| 12 |
+
from .sandbox import Workspace
|
| 13 |
+
from .trace_collector import TraceCollector
|
| 14 |
+
|
| 15 |
+
# Legacy prompt kept for docs; Rust agent uses prompts.rs system prompts.
|
| 16 |
+
SYSTEM_PROMPT = """You are smolcode, a precise coding assistant running on a small local model."""
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@dataclass
|
| 20 |
+
class Step:
|
| 21 |
+
number: int
|
| 22 |
+
kind: str
|
| 23 |
+
detail: str
|
| 24 |
+
total_tokens: int | None = None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class SmallCodeAgent:
|
| 28 |
+
"""Agent facade: uses the Rust engine when smolcode_core is installed."""
|
| 29 |
+
|
| 30 |
+
def __init__(
|
| 31 |
+
self,
|
| 32 |
+
preset: Preset | None = None,
|
| 33 |
+
model: str | None = None,
|
| 34 |
+
max_steps: int = 12,
|
| 35 |
+
*,
|
| 36 |
+
system_prompt: str | None = None,
|
| 37 |
+
registry_builder: Callable | None = None,
|
| 38 |
+
workspace: Workspace | None = None,
|
| 39 |
+
name: str = "smolcode",
|
| 40 |
+
agent: str = "build",
|
| 41 |
+
profile: str = "full",
|
| 42 |
+
yolo: bool = False,
|
| 43 |
+
workspace_dir: str | None = None,
|
| 44 |
+
approval_handler=None,
|
| 45 |
+
rust_session: RustSession | None = None,
|
| 46 |
+
) -> None:
|
| 47 |
+
self.preset = preset or load_preset()
|
| 48 |
+
self.model = model or self.preset.default_model
|
| 49 |
+
self.max_steps = max_steps
|
| 50 |
+
self._system_prompt = system_prompt # unused by Rust; kept for API compat
|
| 51 |
+
self._registry_builder = registry_builder
|
| 52 |
+
self.hit_max_steps = False
|
| 53 |
+
self.errored = False
|
| 54 |
+
|
| 55 |
+
ws_path = workspace_dir or os.environ.get("SMALLCODE_WORKSPACE")
|
| 56 |
+
if workspace is not None:
|
| 57 |
+
ws_path = str(workspace.root)
|
| 58 |
+
elif ws_path is None:
|
| 59 |
+
ws_path = tempfile.mkdtemp(prefix="smallcode-")
|
| 60 |
+
self._owns_workspace = True
|
| 61 |
+
else:
|
| 62 |
+
self._owns_workspace = False
|
| 63 |
+
|
| 64 |
+
self.workspace = workspace or Workspace(root=ws_path)
|
| 65 |
+
|
| 66 |
+
profile_name = profile
|
| 67 |
+
if registry_builder is not None:
|
| 68 |
+
profile_name = "web"
|
| 69 |
+
|
| 70 |
+
if not rust_available():
|
| 71 |
+
raise RuntimeError(
|
| 72 |
+
"smolcode_core required; install with maturin in smolcode-cli/crates/smolcode-py"
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
if rust_session is not None:
|
| 76 |
+
self._rust = rust_session
|
| 77 |
+
else:
|
| 78 |
+
self._rust = RustSession(
|
| 79 |
+
workspace=ws_path,
|
| 80 |
+
agent=agent,
|
| 81 |
+
yolo=yolo,
|
| 82 |
+
model=self.model,
|
| 83 |
+
base_url=self.preset.base_url,
|
| 84 |
+
api_key=self.preset.api_key,
|
| 85 |
+
profile=profile_name,
|
| 86 |
+
approval_handler=approval_handler,
|
| 87 |
+
)
|
| 88 |
+
self.trace_collector = self._rust.trace_collector
|
| 89 |
+
|
| 90 |
+
if registry_builder is not None:
|
| 91 |
+
self._register_web_tools()
|
| 92 |
+
|
| 93 |
+
def _register_web_tools(self) -> None:
|
| 94 |
+
from .tools import check_app_impl
|
| 95 |
+
|
| 96 |
+
ws = self.workspace
|
| 97 |
+
collector = self.trace_collector
|
| 98 |
+
|
| 99 |
+
def check_app(args: dict) -> dict:
|
| 100 |
+
return check_app_impl(ws, collector, args)
|
| 101 |
+
|
| 102 |
+
self._rust.register_tool("check_app", check_app)
|
| 103 |
+
|
| 104 |
+
async def run(self, task: str, *, think: str | None = None, yolo: bool | None = None) -> tuple[str, list[Step]]:
|
| 105 |
+
self.hit_max_steps = False
|
| 106 |
+
self.errored = False
|
| 107 |
+
result: RustRunResult = await self._rust.run(task, think=think, yolo=yolo)
|
| 108 |
+
self.hit_max_steps = result.hit_max_steps
|
| 109 |
+
self.errored = result.errored
|
| 110 |
+
steps = self._steps_from_trace()
|
| 111 |
+
return result.final, steps
|
| 112 |
+
|
| 113 |
+
async def run_live_turn(
|
| 114 |
+
self,
|
| 115 |
+
task: str,
|
| 116 |
+
*,
|
| 117 |
+
think: str | None = None,
|
| 118 |
+
yolo: bool | None = None,
|
| 119 |
+
poll_interval: float = 0.35,
|
| 120 |
+
):
|
| 121 |
+
"""Async generator yielding LiveFrame snapshots during a Rust agent turn."""
|
| 122 |
+
from .live_run import LiveFrame
|
| 123 |
+
|
| 124 |
+
self.hit_max_steps = False
|
| 125 |
+
self.errored = False
|
| 126 |
+
self.trace_collector.events.clear()
|
| 127 |
+
self._rust.clear_cancel()
|
| 128 |
+
self._rust._session.start_turn(task, think=think, yolo=yolo)
|
| 129 |
+
final_text = ""
|
| 130 |
+
done = False
|
| 131 |
+
interrupted = False
|
| 132 |
+
while not done:
|
| 133 |
+
if self._rust.cancelled:
|
| 134 |
+
interrupted = True
|
| 135 |
+
done = True
|
| 136 |
+
break
|
| 137 |
+
ev = await asyncio.to_thread(self._rust._session.poll_event)
|
| 138 |
+
if ev is None:
|
| 139 |
+
yield LiveFrame(
|
| 140 |
+
events=self.trace_collector.snapshot(),
|
| 141 |
+
files=self.files(),
|
| 142 |
+
)
|
| 143 |
+
await asyncio.sleep(poll_interval)
|
| 144 |
+
continue
|
| 145 |
+
kind = ev.get("kind")
|
| 146 |
+
if kind == "approval":
|
| 147 |
+
approved = True
|
| 148 |
+
if self._rust.approval_handler is not None:
|
| 149 |
+
approved = await self._rust.approval_handler(ev.get("desc", ""))
|
| 150 |
+
self._rust._session.approve(approved)
|
| 151 |
+
continue
|
| 152 |
+
self._rust._ingest_event(ev)
|
| 153 |
+
if kind == "final":
|
| 154 |
+
final_text = ev.get("text", "")
|
| 155 |
+
if kind == "done":
|
| 156 |
+
done = True
|
| 157 |
+
yield LiveFrame(
|
| 158 |
+
events=self.trace_collector.snapshot(),
|
| 159 |
+
files=self.files(),
|
| 160 |
+
raw_event=ev,
|
| 161 |
+
)
|
| 162 |
+
if interrupted:
|
| 163 |
+
final_text = final_text or "interrupted"
|
| 164 |
+
self.errored = True
|
| 165 |
+
if final_text and not interrupted:
|
| 166 |
+
self._rust._session.record_turn(task, final_text)
|
| 167 |
+
steps = self._steps_from_trace()
|
| 168 |
+
yield LiveFrame(
|
| 169 |
+
steps=steps,
|
| 170 |
+
events=self.trace_collector.snapshot(),
|
| 171 |
+
files=self.files(),
|
| 172 |
+
done=True,
|
| 173 |
+
result=(final_text, steps),
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
def _steps_from_trace(self) -> list[Step]:
|
| 177 |
+
out: list[Step] = []
|
| 178 |
+
for i, ev in enumerate(self.trace_collector.events):
|
| 179 |
+
out.append(Step(number=i, kind=ev.kind, detail=ev.detail))
|
| 180 |
+
return out
|
| 181 |
+
|
| 182 |
+
def current_steps(self) -> list[Step]:
|
| 183 |
+
return self._steps_from_trace()
|
| 184 |
+
|
| 185 |
+
def raw_history(self) -> list:
|
| 186 |
+
return self.current_steps()
|
| 187 |
+
|
| 188 |
+
def files(self) -> dict[str, str]:
|
| 189 |
+
return self._rust.files()
|
| 190 |
+
|
| 191 |
+
@property
|
| 192 |
+
def rust_session(self) -> RustSession:
|
| 193 |
+
return self._rust
|
| 194 |
+
|
| 195 |
+
def cleanup(self) -> None:
|
| 196 |
+
if getattr(self, "_owns_workspace", False):
|
| 197 |
+
self.workspace.cleanup()
|
engine/branding.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared Hugging Face branding for smolcode Gradio UIs."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from .themes import theme_css_vars
|
| 5 |
+
|
| 6 |
+
# Official HF icon (huggingface.co/front/assets/huggingface_logo-noborder.svg)
|
| 7 |
+
HF_LOGO_SVG = (
|
| 8 |
+
'<svg class="hf-logo" xmlns="http://www.w3.org/2000/svg" width="32" height="30" '
|
| 9 |
+
'viewBox="0 0 95 88" fill="none" aria-label="Hugging Face">'
|
| 10 |
+
'<path fill="#FFD21E" d="M47.21 76.5a34.75 34.75 0 1 0 0-69.5 34.75 34.75 0 0 0 0 69.5Z" />'
|
| 11 |
+
'<path fill="#FF9D0B" d="M81.96 41.75a34.75 34.75 0 1 0-69.5 0 34.75 34.75 0 0 0 69.5 0Zm-73.5 0a38.75 38.75 0 1 1 77.5 0 38.75 38.75 0 0 1-77.5 0Z" />'
|
| 12 |
+
'<path fill="#3A3B45" d="M58.5 32.3c1.28.44 1.78 3.06 3.07 2.38a5 5 0 1 0-6.76-2.07c.61 1.15 2.55-.72 3.7-.32ZM34.95 32.3c-1.28.44-1.79 3.06-3.07 2.38a5 5 0 1 1 6.76-2.07c-.61 1.15-2.56-.72-3.7-.32Z" />'
|
| 13 |
+
'<path fill="#FF323D" d="M46.96 56.29c9.83 0 13-8.76 13-13.26 0-2.34-1.57-1.6-4.09-.36-2.33 1.15-5.46 2.74-8.9 2.74-7.19 0-13-6.88-13-2.38s3.16 13.26 13 13.26Z" />'
|
| 14 |
+
'<path fill="#3A3B45" fill-rule="evenodd" d="M39.43 54a8.7 8.7 0 0 1 5.3-4.49c.4-.12.81.57 1.24 1.28.4.68.82 1.37 1.24 1.37.45 0 .9-.68 1.33-1.35.45-.7.89-1.38 1.32-1.25a8.61 8.61 0 0 1 5 4.17c3.73-2.94 5.1-7.74 5.1-10.7 0-2.34-1.57-1.6-4.09-.36l-.14.07c-2.31 1.15-5.39 2.67-8.77 2.67s-6.45-1.52-8.77-2.67c-2.6-1.29-4.23-2.1-4.23.29 0 3.05 1.46 8.06 5.47 10.97Z" clip-rule="evenodd" />'
|
| 15 |
+
'<path fill="#FF9D0B" d="M70.71 37a3.25 3.25 0 1 0 0-6.5 3.25 3.25 0 0 0 0 6.5ZM24.21 37a3.25 3.25 0 1 0 0-6.5 3.25 3.25 0 0 0 0 6.5ZM17.52 48c-1.62 0-3.06.66-4.07 1.87a5.97 5.97 0 0 0-1.33 3.76 7.1 7.1 0 0 0-1.94-.3c-1.55 0-2.95.59-3.94 1.66a5.8 5.8 0 0 0-.8 7 5.3 5.3 0 0 0-1.79 2.82c-.24.9-.48 2.8.8 4.74a5.22 5.22 0 0 0-.37 5.02c1.02 2.32 3.57 4.14 8.52 6.1 3.07 1.22 5.89 2 5.91 2.01a44.33 44.33 0 0 0 10.93 1.6c5.86 0 10.05-1.8 12.46-5.34 3.88-5.69 3.33-10.9-1.7-15.92-2.77-2.78-4.62-6.87-5-7.77-.78-2.66-2.84-5.62-6.25-5.62a5.7 5.7 0 0 0-4.6 2.46c-1-1.26-1.98-2.25-2.86-2.82A7.4 7.4 0 0 0 17.52 48Zm0 4c.51 0 1.14.22 1.82.65 2.14 1.36 6.25 8.43 7.76 11.18.5.92 1.37 1.31 2.14 1.31 1.55 0 2.75-1.53.15-3.48-3.92-2.93-2.55-7.72-.68-8.01.08-.02.17-.02.24-.02 1.7 0 2.45 2.93 2.45 2.93s2.2 5.52 5.98 9.3c3.77 3.77 3.97 6.8 1.22 10.83-1.88 2.75-5.47 3.58-9.16 3.58-3.81 0-7.73-.9-9.92-1.46-.11-.03-13.45-3.8-11.76-7 .28-.54.75-.76 1.34-.76 2.38 0 6.7 3.54 8.57 3.54.41 0 .7-.17.83-.6.79-2.85-12.06-4.05-10.98-8.17.2-.73.71-1.02 1.44-1.02 3.14 0 10.2 5.53 11.68 5.53.11 0 .2-.03.24-.1.74-1.2.33-2.04-4.9-5.2-5.21-3.16-8.88-5.06-6.8-7.33.24-.26.58-.38 1-.38 3.17 0 10.66 6.82 10.66 6.82s2.02 2.1 3.25 2.1c.28 0 .52-.1.68-.38.86-1.46-8.06-8.22-8.56-11.01-.34-1.9.24-2.85 1.31-2.85Z" />'
|
| 16 |
+
'<path fill="#FFD21E" d="M38.6 76.69c2.75-4.04 2.55-7.07-1.22-10.84-3.78-3.77-5.98-9.3-5.98-9.3s-.82-3.2-2.69-2.9c-1.87.3-3.24 5.08.68 8.01 3.91 2.93-.78 4.92-2.29 2.17-1.5-2.75-5.62-9.82-7.76-11.18-2.13-1.35-3.63-.6-3.13 2.2.5 2.79 9.43 9.55 8.56 11-.87 1.47-3.93-1.71-3.93-1.71s-9.57-8.71-11.66-6.44c-2.08 2.27 1.59 4.17 6.8 7.33 5.23 3.16 5.64 4 4.9 5.2-.75 1.2-12.28-8.53-13.36-4.4-1.08 4.11 11.77 5.3 10.98 8.15-.8 2.85-9.06-5.38-10.74-2.18-1.7 3.21 11.65 6.98 11.76 7.01 4.3 1.12 15.25 3.49 19.08-2.12Z" />'
|
| 17 |
+
'<path fill="#FF9D0B" d="M77.4 48c1.62 0 3.07.66 4.07 1.87a5.97 5.97 0 0 1 1.33 3.76 7.1 7.1 0 0 1 1.95-.3c1.55 0 2.95.59 3.94 1.66a5.8 5.8 0 0 1 .8 7 5.3 5.3 0 0 1 1.78 2.82c.24.9.48 2.8-.8 4.74a5.22 5.22 0 0 1 .37 5.02c-1.02 2.32-3.57 4.14-8.51 6.1-3.08 1.22-5.9 2-5.92 2.01a44.33 44.33 0 0 1-10.93 1.6c-5.86 0-10.05-1.8-12.46-5.34-3.88-5.69-3.33-10.9 1.7-15.92 2.78-2.78 4.63-6.87 5.01-7.77.78-2.66 2.83-5.62 6.24-5.62a5.7 5.7 0 0 1 4.6 2.46c1-1.26 1.98-2.25 2.87-2.82A7.4 7.4 0 0 1 77.4 48Zm0 4c-.51 0-1.13.22-1.82.65-2.13 1.36-6.25 8.43-7.76 11.18a2.43 2.43 0 0 1-2.14 1.31c-1.54 0-2.75-1.53-.14-3.48 3.91-2.93 2.54-7.72.67-8.01a1.54 1.54 0 0 0-.24-.02c-1.7 0-2.45 2.93-2.45 2.93s-2.2 5.52-5.97 9.3c-3.78 3.77-3.98 6.8-1.22 10.83 1.87 2.75 5.47 3.58 9.15 3.58 3.82 0 7.73-.9 9.93-1.46.1-.03 13.45-3.8 11.76-7-.29-.54-.75-.76-1.34-.76-2.38 0-6.71 3.54-8.57 3.54-.42 0-.71-.17-.83-.6-.8-2.85 12.05-4.05 10.97-8.17-.19-.73-.7-1.02-1.44-1.02-3.14 0-10.2 5.53-11.68 5.53-.1 0-.19-.03-.23-.1-.74-1.2-.34-2.04 4.88-5.2 5.23-3.16 8.9-5.06 6.8-7.33-.23-.26-.57-.38-.98-.38-3.18 0-10.67 6.82-10.67 6.82s-2.02 2.1-3.24 2.1a.74.74 0 0 1-.68-.38c-.87-1.46 8.05-8.22 8.55-11.01.34-1.9-.24-2.85-1.31-2.85Z" />'
|
| 18 |
+
'<path fill="#FFD21E" d="M56.33 76.69c-2.75-4.04-2.56-7.07 1.22-10.84 3.77-3.77 5.97-9.3 5.97-9.3s.82-3.2 2.7-2.9c1.86.3 3.23 5.08-.68 8.01-3.92 2.93.78 4.92 2.28 2.17 1.51-2.75 5.63-9.82 7.76-11.18 2.13-1.35 3.64-.6 3.13 2.2-.5 2.79-9.42 9.55-8.55 11 .86 1.47 3.92-1.71 3.92-1.71s9.58-8.71 11.66-6.44c2.08 2.27-1.58 4.17-6.8 7.33-5.23 3.16-5.63 4-4.9 5.2.75 1.2 12.28-8.53 13.36-4.4 1.08 4.11-11.76 5.3-10.97 8.15.8 2.85 9.05-5.38 10.74-2.18 1.69 3.21-11.65 6.98-11.76 7.01-4.31 1.12-15.26 3.49-19.08-2.12Z" />'
|
| 19 |
+
'</svg>'
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
SMOLCODE_CSS = """
|
| 23 |
+
:root { --hf-yellow:#FFD21E; --sc-accent:#7c3aed; --sc-bg:#0b1020; --sc-panel:#111827;
|
| 24 |
+
--sc-border:#334155; --sc-fg:#e2e8f0; --sc-dim:#64748b; --sc-ok:#34d399; --sc-tool:#a78bfa; }
|
| 25 |
+
body, .gradio-container { background:var(--sc-bg) !important; color:var(--sc-fg) !important; }
|
| 26 |
+
/* Lock the whole page to the viewport so it can NEVER scroll; only inner panes scroll. */
|
| 27 |
+
html, body { height:100% !important; max-height:100vh !important; margin:0 !important;
|
| 28 |
+
overflow:hidden !important; }
|
| 29 |
+
gradio-app { display:block !important; height:100vh !important; max-height:100vh !important;
|
| 30 |
+
overflow:hidden !important; }
|
| 31 |
+
.gradio-container { max-width:100% !important; padding:0.5rem 1rem !important;
|
| 32 |
+
height:100vh !important; max-height:100vh !important; min-height:0 !important;
|
| 33 |
+
overflow:hidden !important; }
|
| 34 |
+
/* Every Gradio wrapper between the container and our shell must be height-locked, not auto. */
|
| 35 |
+
.gradio-container > .wrap, .gradio-container .contain,
|
| 36 |
+
main.fillable, main.app, .gradio-container > main {
|
| 37 |
+
height:100% !important; max-height:100% !important; min-height:0 !important;
|
| 38 |
+
overflow:hidden !important; }
|
| 39 |
+
/* The unnamed outer column Gradio injects around our shell column. */
|
| 40 |
+
main.fillable > .column, .contain > .column, .wrap > .column {
|
| 41 |
+
height:100% !important; max-height:100% !important; min-height:0 !important;
|
| 42 |
+
overflow:hidden !important; }
|
| 43 |
+
.sc-header { display:flex; align-items:center; gap:.75rem; margin-bottom:.25rem; }
|
| 44 |
+
.hf-logo { flex-shrink:0; }
|
| 45 |
+
.sc-title { font-weight:800; font-size:1.7rem; letter-spacing:-.02em; line-height:1.2; }
|
| 46 |
+
.sc-title .hf-accent, .hf-accent { color:var(--hf-yellow); }
|
| 47 |
+
.sc-badge { display:inline-block; padding:2px 10px; border-radius:999px;
|
| 48 |
+
background:#2a2410; color:var(--hf-yellow); border:1px solid rgba(255,210,30,.25);
|
| 49 |
+
font-size:.72rem; font-weight:600; margin-left:.4rem; vertical-align:middle; }
|
| 50 |
+
.sc-sub { color:#94a3b8; margin-top:.2rem; font-size:.9rem; }
|
| 51 |
+
.sc-tui-shell { display:flex !important; flex-direction:column; gap:.5rem;
|
| 52 |
+
height:100% !important; max-height:100% !important; min-height:0; overflow:hidden !important; }
|
| 53 |
+
.sc-header-bar { display:flex; align-items:center; gap:.85rem; padding:.5rem .75rem;
|
| 54 |
+
background:#1e293b; border-radius:6px; font-family:ui-monospace,monospace; font-size:.8rem;
|
| 55 |
+
flex-shrink:0; }
|
| 56 |
+
.sc-hbrand { font-weight:700; color:#0b1020; background:var(--sc-accent); padding:1px 8px;
|
| 57 |
+
border-radius:4px; }
|
| 58 |
+
.sc-hbrand .hf-accent { color:var(--hf-yellow); }
|
| 59 |
+
.sc-hgit { color:var(--sc-ok); }
|
| 60 |
+
.sc-hmodel { color:var(--sc-tool); font-weight:700; }
|
| 61 |
+
.sc-hhost { color:var(--sc-dim); }
|
| 62 |
+
.sc-htheme { color:var(--sc-dim); margin-left:auto; }
|
| 63 |
+
.sc-main-row { display:flex !important; flex-wrap:nowrap !important; align-items:stretch !important;
|
| 64 |
+
gap:.5rem !important; flex:1 !important; min-height:0 !important; overflow:hidden !important; }
|
| 65 |
+
.sc-main-row > .gr-html, .sc-main-row > .gr-column { min-height:0 !important; height:100% !important; }
|
| 66 |
+
.sc-sidebar { width:17rem !important; min-width:17rem !important; max-width:17rem !important;
|
| 67 |
+
flex-shrink:0 !important; height:100% !important; min-height:0 !important; overflow:hidden !important; }
|
| 68 |
+
.sc-sidebar > .html-container { padding:0 !important; height:100% !important; min-height:0 !important; }
|
| 69 |
+
.sc-sidebar-panel { height:100%; min-height:0; max-height:100%; display:flex; flex-direction:column;
|
| 70 |
+
background:var(--sc-panel); border:1px solid var(--sc-border); border-radius:8px;
|
| 71 |
+
font-family:ui-monospace,monospace; font-size:.78rem; overflow:hidden; }
|
| 72 |
+
.sc-sidebar-focused { border-color:var(--sc-accent); }
|
| 73 |
+
.sc-sidebar-title { padding:.35rem .55rem; color:var(--sc-accent); font-weight:700;
|
| 74 |
+
border-bottom:1px solid var(--sc-border); background:#0f172a; }
|
| 75 |
+
.sc-sidebar-body { flex:1 1 0%; min-height:0; height:100%;
|
| 76 |
+
max-height:calc(100vh - 5rem); overflow-y:auto; overflow-x:hidden;
|
| 77 |
+
padding:.25rem 0; line-height:1.35; }
|
| 78 |
+
.sc-sb-dir { color:var(--sc-accent); font-weight:700; padding:.1rem .45rem; white-space:nowrap; }
|
| 79 |
+
.sc-sb-file { display:flex; align-items:baseline; gap:.15rem; padding:.05rem .45rem;
|
| 80 |
+
color:var(--sc-fg); white-space:nowrap; }
|
| 81 |
+
.sc-sb-file:hover { background:#1e293b; }
|
| 82 |
+
.sc-sb-sel { background:var(--sc-ok); color:#0b1020; font-weight:700; }
|
| 83 |
+
.sc-sb-sel .sc-sb-glyph, .sc-sb-sel .sc-sb-name { color:#0b1020; }
|
| 84 |
+
.sc-sb-mark { display:inline-block; width:.85rem; text-align:center; }
|
| 85 |
+
.sc-sb-glyph { opacity:.6; }
|
| 86 |
+
.sc-sb-more { color:var(--sc-dim); font-style:italic; padding:.2rem .45rem; }
|
| 87 |
+
.sc-sb-empty, .sc-sb-stat { padding:.15rem .45rem; color:var(--sc-fg); }
|
| 88 |
+
.sc-sb-dim { color:var(--sc-dim); }
|
| 89 |
+
.sc-main-col { flex:1 !important; min-width:0 !important; min-height:0 !important;
|
| 90 |
+
height:100% !important; display:flex !important; flex-direction:column !important;
|
| 91 |
+
gap:.5rem !important; overflow:hidden !important; }
|
| 92 |
+
.sc-editor-wrap, .sc-editor-wrap .gr-group { overflow:visible !important; flex-shrink:0 !important; }
|
| 93 |
+
.sc-transcript-wrap { flex:1; min-height:0; overflow-y:auto; overflow-x:hidden;
|
| 94 |
+
background:#0f172a; border:1px solid var(--sc-border); border-radius:8px; padding:.5rem .65rem; }
|
| 95 |
+
.sc-transcript-inner { font-family:ui-monospace,monospace; font-size:.82rem; line-height:1.45; }
|
| 96 |
+
.sc-transcript-empty { color:var(--sc-dim); padding:1rem; font-family:ui-monospace,monospace; }
|
| 97 |
+
.sc-tline { margin:.15rem 0; }
|
| 98 |
+
.sc-tglyph { display:inline-block; width:1rem; }
|
| 99 |
+
.sc-editor-wrap { border:1px solid var(--sc-accent); border-radius:8px; padding:.25rem;
|
| 100 |
+
background:#0f172a; flex-shrink:0; min-height:9rem; overflow:visible !important; }
|
| 101 |
+
.sc-editor-wrap .block, #sc-editor { height:auto !important; min-height:7rem !important;
|
| 102 |
+
overflow:visible !important; }
|
| 103 |
+
.sc-editor-wrap label { display:flex !important; flex-direction:column; min-height:6.5rem; }
|
| 104 |
+
.sc-editor-wrap textarea, #sc-editor textarea, #sc-editor input,
|
| 105 |
+
[data-testid="textbox"] textarea, [data-testid="textbox"] input {
|
| 106 |
+
font-family:ui-monospace,monospace !important; font-size:.85rem !important;
|
| 107 |
+
background:#0f172a !important; color:var(--sc-fg) !important; border:none !important;
|
| 108 |
+
box-shadow:none !important; pointer-events:auto !important;
|
| 109 |
+
min-height:6.5rem !important; resize:vertical !important; }
|
| 110 |
+
#sc-editor { pointer-events:auto !important; }
|
| 111 |
+
.sc-editor-hint { font-size:.72rem; color:var(--sc-dim); padding:.2rem .4rem;
|
| 112 |
+
font-family:ui-monospace,monospace; }
|
| 113 |
+
.sc-status-wrap { flex-shrink:0; }
|
| 114 |
+
.sc-status-bar { display:flex; flex-wrap:wrap; gap:.35rem; padding:.4rem .5rem;
|
| 115 |
+
background:#1e293b; border-radius:6px; font-family:ui-monospace,monospace; font-size:.75rem; }
|
| 116 |
+
.sc-chip { padding:2px 8px; border-radius:4px; background:#334155; color:#e2e8f0; }
|
| 117 |
+
.sc-chip-brand { background:var(--sc-accent); color:#fff; font-weight:700; }
|
| 118 |
+
.sc-chip-mode { background:#2a2410; color:var(--hf-yellow); font-weight:600; }
|
| 119 |
+
.sc-chip-think { background:#422006; color:#fdba74; }
|
| 120 |
+
.sc-chip-run { background:#14532d; color:#86efac; }
|
| 121 |
+
.sc-chip-dim { color:#94a3b8; }
|
| 122 |
+
.sc-chip-model { color:#a78bfa; }
|
| 123 |
+
.sc-chip-clickable { cursor:pointer; border:none; font:inherit; font-family:inherit; font-size:inherit; }
|
| 124 |
+
.sc-chip-clickable:hover { filter:brightness(1.15); }
|
| 125 |
+
.sc-picker-title { color:var(--sc-accent); font-weight:700; margin-bottom:.5rem; }
|
| 126 |
+
.sc-picker-list { display:flex; flex-direction:column; gap:2px; max-height:280px; overflow-y:auto; }
|
| 127 |
+
.sc-picker-item { display:flex; gap:.35rem; align-items:baseline; width:100%; text-align:left;
|
| 128 |
+
padding:.25rem .4rem; background:transparent; border:none; color:var(--sc-fg);
|
| 129 |
+
font-family:ui-monospace,monospace; font-size:.85rem; cursor:pointer; border-radius:4px; }
|
| 130 |
+
.sc-picker-item:hover { background:#334155; }
|
| 131 |
+
.sc-picker-sel { background:var(--sc-accent); color:#fff; font-weight:700; }
|
| 132 |
+
.sc-picker-mark { display:inline-block; width:1rem; text-align:center; }
|
| 133 |
+
.sc-picker-hint { margin-top:.6rem; font-size:.72rem; color:var(--sc-dim); }
|
| 134 |
+
.sc-picker-empty { color:var(--sc-dim); font-style:italic; }
|
| 135 |
+
.sc-popup-item.sc-popup-sel { background:#334155; font-weight:700; }
|
| 136 |
+
.sc-overlay { position:fixed; inset:0; background:rgba(0,0,0,.55); z-index:9999;
|
| 137 |
+
display:flex; align-items:center; justify-content:center; pointer-events:auto; }
|
| 138 |
+
.sc-overlay-panel { background:#1e293b; border:1px solid #7c3aed; border-radius:10px;
|
| 139 |
+
padding:1rem 1.25rem; max-width:480px; font-family:ui-monospace,monospace; font-size:.85rem;
|
| 140 |
+
color:#e2e8f0; pointer-events:auto; }
|
| 141 |
+
.sc-popup { position:absolute; z-index:100; background:#1e293b; border:1px solid #7c3aed;
|
| 142 |
+
border-radius:6px; max-height:200px; overflow:auto; font-family:ui-monospace,monospace; font-size:.8rem; }
|
| 143 |
+
.sc-popup-item { padding:.25rem .5rem; cursor:pointer; color:#34d399; }
|
| 144 |
+
.sc-popup-item:hover { background:#334155; }
|
| 145 |
+
.sc-approval { padding:.75rem 1rem; border:1px solid rgba(124,58,237,.45);
|
| 146 |
+
border-radius:8px; background:#1e1b4b; margin:.5rem 0; font-size:.9rem; }
|
| 147 |
+
footer { display:none !important; }
|
| 148 |
+
.gradio-container .block, .gradio-container .form { background:transparent !important;
|
| 149 |
+
border:none !important; box-shadow:none !important; }
|
| 150 |
+
.gradio-container .gr-group { background:transparent !important; border:none !important; }
|
| 151 |
+
.gradio-container label { display:none !important; }
|
| 152 |
+
.sc-hidden-controls { position:fixed !important; left:-10000px !important; top:0 !important;
|
| 153 |
+
width:1px !important; height:1px !important; overflow:hidden !important; opacity:0 !important; }
|
| 154 |
+
.sc-hidden-btn, .sc-hidden-btn.block, #sc-submit, #sc-clear, #sc-interrupt, #sc-toggle-sidebar,
|
| 155 |
+
#sc-cycle-mode, #sc-cycle-agent, #sc-cycle-model, #sc-cycle-think, #sc-help, #sc-whichkey,
|
| 156 |
+
#sc-open-picker-models, #sc-open-picker-themes, #sc-open-picker-agents, #sc-open-picker-sessions,
|
| 157 |
+
#sc-picker-up, #sc-picker-down, #sc-picker-confirm, #sc-picker-pick {
|
| 158 |
+
position:fixed !important; left:-10000px !important; top:0 !important;
|
| 159 |
+
width:1px !important; height:1px !important; opacity:0 !important;
|
| 160 |
+
overflow:hidden !important; pointer-events:auto !important; }
|
| 161 |
+
""" + theme_css_vars()
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def smolcode_header_html(*, preset: str, tier_badge: str, subtitle: str) -> str:
|
| 165 |
+
return (
|
| 166 |
+
f"<div class='sc-header'>{HF_LOGO_SVG}<div>"
|
| 167 |
+
f"<div class='sc-title'>smol<span class='hf-accent'>code</span>"
|
| 168 |
+
f"<span class='sc-badge'>preset: {preset}</span>"
|
| 169 |
+
f"<span class='sc-badge'>{tier_badge}</span></div>"
|
| 170 |
+
f"<div class='sc-sub'>{subtitle}</div>"
|
| 171 |
+
f"</div></div>"
|
| 172 |
+
)
|
engine/browser_runner.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Subprocess runner: check a model-built web app in a REAL headless browser.
|
| 2 |
+
|
| 3 |
+
Invoked as `python engine/browser_runner.py <app.html>` by
|
| 4 |
+
engine/browsercheck.py — never imported (keeps it free of the engine package /
|
| 5 |
+
liteforge, and isolates a browser crash from the Gradio process). It loads the
|
| 6 |
+
app wrapped in the EXACT same `srcdoc` + `sandbox` as the live preview
|
| 7 |
+
(engine/preview.py), so the verdict matches what the user sees, then clicks every
|
| 8 |
+
button and exercises the keyboard, and reports any uncaught JavaScript errors.
|
| 9 |
+
|
| 10 |
+
Browser: headless Firefox via Selenium + geckodriver. (Playwright's browser CDN
|
| 11 |
+
is firewalled in this environment; conda-forge Firefox is the reachable, rootless
|
| 12 |
+
real browser. The choice is invisible to callers — same JSON contract.)
|
| 13 |
+
|
| 14 |
+
We capture errors by injecting a tiny `window.onerror`/`unhandledrejection`
|
| 15 |
+
collector at the top of the framed document (so it catches errors during initial
|
| 16 |
+
script execution — the "script ran before its element / undefined function"
|
| 17 |
+
class), then read it back. That is the HARD failure signal.
|
| 18 |
+
|
| 19 |
+
Output: one JSON line {ok, errors, buttons, clicked}. Exit 3 only when the
|
| 20 |
+
browser itself can't run, so the caller can fall back to the jsdom checker.
|
| 21 |
+
"""
|
| 22 |
+
import json
|
| 23 |
+
import os
|
| 24 |
+
import re
|
| 25 |
+
import sys
|
| 26 |
+
import tempfile
|
| 27 |
+
|
| 28 |
+
PREVIEW_SANDBOX = "allow-scripts allow-same-origin allow-modals allow-popups allow-forms"
|
| 29 |
+
|
| 30 |
+
# Installed by the rootless conda-forge setup (see DEVELOPING.md). Overridable.
|
| 31 |
+
_BROWSER_PREFIX = os.environ.get(
|
| 32 |
+
"SMOLBUILDER_BROWSER_PREFIX",
|
| 33 |
+
os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), ".browser"))
|
| 34 |
+
_FIREFOX_BIN = os.path.join(_BROWSER_PREFIX, "bin", "FirefoxApp", "firefox")
|
| 35 |
+
_GECKODRIVER = os.path.join(_BROWSER_PREFIX, "bin", "geckodriver")
|
| 36 |
+
|
| 37 |
+
# Injected first inside the frame so it catches errors thrown during load.
|
| 38 |
+
_CAPTURE = ("<script>(function(){window.__errs=[];"
|
| 39 |
+
"window.addEventListener('error',function(e){try{__errs.push('uncaught: '+"
|
| 40 |
+
"((e.error&&e.error.message)||e.message||String(e)))}catch(_){}} ,true);"
|
| 41 |
+
"window.addEventListener('unhandledrejection',function(e){try{__errs.push("
|
| 42 |
+
"'rejection: '+((e.reason&&e.reason.message)||e.reason))}catch(_){}});})();</script>")
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _escape_srcdoc(doc: str) -> str:
|
| 46 |
+
return doc.replace("&", "&").replace('"', """)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _inject_capture(app_html: str) -> str:
|
| 50 |
+
"""Put the error collector before the app's own scripts."""
|
| 51 |
+
m = re.search(r"<head[^>]*>", app_html, re.I)
|
| 52 |
+
if m:
|
| 53 |
+
return app_html[:m.end()] + _CAPTURE + app_html[m.end():]
|
| 54 |
+
m = re.search(r"<html[^>]*>", app_html, re.I)
|
| 55 |
+
if m:
|
| 56 |
+
return app_html[:m.end()] + _CAPTURE + app_html[m.end():]
|
| 57 |
+
return _CAPTURE + app_html
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _emit(obj: dict) -> None:
|
| 61 |
+
sys.stdout.write(json.dumps(obj) + "\n")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def main(path: str) -> int:
|
| 65 |
+
try:
|
| 66 |
+
from selenium import webdriver
|
| 67 |
+
from selenium.webdriver.firefox.options import Options
|
| 68 |
+
from selenium.webdriver.firefox.service import Service
|
| 69 |
+
from selenium.webdriver.common.by import By
|
| 70 |
+
except Exception as e:
|
| 71 |
+
_emit({"ok": None, "infra": f"selenium import failed: {e}"})
|
| 72 |
+
return 3
|
| 73 |
+
|
| 74 |
+
if not (os.path.exists(_FIREFOX_BIN) and os.path.exists(_GECKODRIVER)):
|
| 75 |
+
_emit({"ok": None, "infra": "firefox/geckodriver not installed"})
|
| 76 |
+
return 3
|
| 77 |
+
|
| 78 |
+
with open(path, encoding="utf-8") as f:
|
| 79 |
+
app_html = f.read()
|
| 80 |
+
|
| 81 |
+
host = ('<!doctype html><meta charset="utf-8"><body style="margin:0">'
|
| 82 |
+
f'<iframe id="app" style="width:100%;height:600px;border:0" '
|
| 83 |
+
f'sandbox="{PREVIEW_SANDBOX}" '
|
| 84 |
+
f'srcdoc="{_escape_srcdoc(_inject_capture(app_html))}"></iframe>')
|
| 85 |
+
host_path = os.path.join(tempfile.mkdtemp(prefix="brhost-"), "host.html")
|
| 86 |
+
with open(host_path, "w", encoding="utf-8") as f:
|
| 87 |
+
f.write(host)
|
| 88 |
+
|
| 89 |
+
opts = Options()
|
| 90 |
+
opts.add_argument("-headless")
|
| 91 |
+
opts.binary_location = _FIREFOX_BIN
|
| 92 |
+
opts.set_preference("security.sandbox.content.level", 0) # no userns in container
|
| 93 |
+
svc = Service(executable_path=_GECKODRIVER, log_output=os.path.join(tempfile.gettempdir(), "gecko.log"))
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
driver = webdriver.Firefox(options=opts, service=svc)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
_emit({"ok": None, "infra": f"firefox launch failed: {str(e)[:200]}"})
|
| 99 |
+
return 3
|
| 100 |
+
|
| 101 |
+
errors: list[str] = []
|
| 102 |
+
buttons = clicked = 0
|
| 103 |
+
try:
|
| 104 |
+
driver.set_page_load_timeout(20)
|
| 105 |
+
driver.get("file://" + host_path)
|
| 106 |
+
driver.switch_to.frame(driver.find_element(By.ID, "app"))
|
| 107 |
+
import time
|
| 108 |
+
time.sleep(0.3) # let scripts settle
|
| 109 |
+
els = driver.find_elements(
|
| 110 |
+
By.CSS_SELECTOR, "button, [onclick], input[type=button], input[type=submit]")
|
| 111 |
+
buttons = len(els)
|
| 112 |
+
for el in els[:25]:
|
| 113 |
+
try:
|
| 114 |
+
driver.execute_script("arguments[0].disabled=false;", el)
|
| 115 |
+
el.click()
|
| 116 |
+
clicked += 1
|
| 117 |
+
except Exception:
|
| 118 |
+
pass # handler errors show up in __errs
|
| 119 |
+
# Exercise keyboard handlers (canvas games etc.).
|
| 120 |
+
try:
|
| 121 |
+
driver.execute_script(
|
| 122 |
+
"['ArrowUp','ArrowDown','ArrowLeft','ArrowRight',' '].forEach(function(k){"
|
| 123 |
+
"var c={key:k,keyCode:k===' '?32:({ArrowUp:38,ArrowDown:40,ArrowLeft:37,ArrowRight:39}[k]),bubbles:true};"
|
| 124 |
+
"document.dispatchEvent(new KeyboardEvent('keydown',c));"
|
| 125 |
+
"window.dispatchEvent(new KeyboardEvent('keydown',c));});")
|
| 126 |
+
except Exception:
|
| 127 |
+
pass
|
| 128 |
+
time.sleep(0.3) # surface late/timer errors
|
| 129 |
+
try:
|
| 130 |
+
errors = driver.execute_script("return window.__errs || [];") or []
|
| 131 |
+
except Exception:
|
| 132 |
+
errors = []
|
| 133 |
+
finally:
|
| 134 |
+
try:
|
| 135 |
+
driver.quit()
|
| 136 |
+
except Exception:
|
| 137 |
+
pass
|
| 138 |
+
|
| 139 |
+
errors = [str(e)[:400] for e in errors][:20]
|
| 140 |
+
_emit({"ok": len(errors) == 0, "errors": errors, "buttons": buttons, "clicked": clicked})
|
| 141 |
+
return 0
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
if __name__ == "__main__":
|
| 145 |
+
sys.exit(main(sys.argv[1]))
|
engine/browsercheck.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Real-browser verification of model-built web apps, with a jsdom fallback.
|
| 2 |
+
|
| 3 |
+
The web equivalent of `run_python`, but faithful: it drives a REAL headless
|
| 4 |
+
browser (Firefox via Selenium, in engine/browser_runner.py as a subprocess) and
|
| 5 |
+
loads the app in the exact `srcdoc`/`sandbox` wrapper the live preview uses — so
|
| 6 |
+
the agent's verdict matches what the user actually sees. jsdom
|
| 7 |
+
(engine/webcheck.py) can't: it has a working localStorage and never applies the
|
| 8 |
+
sandbox, so it falsely passes apps that break in a browser (e.g. a notepad on a
|
| 9 |
+
`data:` opaque origin).
|
| 10 |
+
|
| 11 |
+
Same contract as webcheck.check_html — (True, []) / (False, [...]) / (None, [...]).
|
| 12 |
+
Fallback chain: real browser -> jsdom -> unverifiable. A browser that's missing,
|
| 13 |
+
slow, or crashes returns None internally and falls back rather than failing the
|
| 14 |
+
build (a flaky checker must never cause spurious model escalation).
|
| 15 |
+
|
| 16 |
+
The browser must be installed wherever this runs (rootless conda-forge Firefox —
|
| 17 |
+
see DEVELOPING.md); on a minimal image (e.g. the HF Space) it isn't, and we use
|
| 18 |
+
jsdom.
|
| 19 |
+
"""
|
| 20 |
+
from __future__ import annotations
|
| 21 |
+
|
| 22 |
+
import functools
|
| 23 |
+
import json
|
| 24 |
+
import os
|
| 25 |
+
import subprocess
|
| 26 |
+
import sys
|
| 27 |
+
import tempfile
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
|
| 30 |
+
from . import webcheck
|
| 31 |
+
|
| 32 |
+
# Real-browser runners, tried in order. Playwright/Chromium first (the reachable
|
| 33 |
+
# rootless browser in this devcontainer), then conda-forge Firefox/Selenium.
|
| 34 |
+
# Whichever launches first is cached for the life of the process. Both speak the
|
| 35 |
+
# same JSON contract, so the choice is invisible to callers.
|
| 36 |
+
_RUNNERS = [
|
| 37 |
+
Path(__file__).with_name("playwright_runner.py"),
|
| 38 |
+
Path(__file__).with_name("browser_runner.py"),
|
| 39 |
+
]
|
| 40 |
+
_BROWSER_PREFIX = Path(os.environ.get(
|
| 41 |
+
"SMOLBUILDER_BROWSER_PREFIX",
|
| 42 |
+
str(Path(__file__).resolve().parent.parent / ".browser")))
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _child_env() -> dict:
|
| 46 |
+
"""Env for the runner subprocess: Firefox's conda libs on LD_LIBRARY_PATH."""
|
| 47 |
+
env = dict(os.environ)
|
| 48 |
+
libdir = str(_BROWSER_PREFIX / "lib")
|
| 49 |
+
prev = env.get("LD_LIBRARY_PATH", "")
|
| 50 |
+
env["LD_LIBRARY_PATH"] = f"{libdir}:{prev}" if prev else libdir
|
| 51 |
+
env["SMOLBUILDER_BROWSER_PREFIX"] = str(_BROWSER_PREFIX)
|
| 52 |
+
return env
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@functools.lru_cache(maxsize=1)
|
| 56 |
+
def _active_runner() -> Path | None:
|
| 57 |
+
"""First runner whose browser actually launches (probed once; cached, since a
|
| 58 |
+
launch is slow and availability is fixed for the life of the process)."""
|
| 59 |
+
probe = "<!doctype html><html><body><button>probe</button></body></html>"
|
| 60 |
+
for runner in _RUNNERS:
|
| 61 |
+
if not runner.exists():
|
| 62 |
+
continue
|
| 63 |
+
ok, _ = _invoke(probe, 45, runner)
|
| 64 |
+
if ok is not None:
|
| 65 |
+
return runner
|
| 66 |
+
return None
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def available() -> bool:
|
| 70 |
+
"""True if any real-browser check actually runs."""
|
| 71 |
+
return _active_runner() is not None
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def check_html(html: str, timeout: int = 35) -> tuple[bool | None, list[str]]:
|
| 75 |
+
"""Real-browser check with graceful fallback to jsdom, then unverifiable."""
|
| 76 |
+
runner = _active_runner()
|
| 77 |
+
if runner is not None:
|
| 78 |
+
ok, errors = _invoke(html, timeout, runner)
|
| 79 |
+
if ok is not None:
|
| 80 |
+
return ok, errors
|
| 81 |
+
if webcheck.available():
|
| 82 |
+
return webcheck.check_html(html, timeout=min(timeout, 20))
|
| 83 |
+
return None, ["no runtime checker available (browser + jsdom both missing)"]
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _invoke(html: str, timeout: int, runner: Path) -> tuple[bool | None, list[str]]:
|
| 87 |
+
"""Run a browser runner once. Returns (ok|None, errors); None = couldn't run."""
|
| 88 |
+
with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
|
| 89 |
+
f.write(html)
|
| 90 |
+
path = f.name
|
| 91 |
+
try:
|
| 92 |
+
proc = subprocess.run(
|
| 93 |
+
[sys.executable, str(runner), path],
|
| 94 |
+
capture_output=True, text=True, timeout=timeout, env=_child_env())
|
| 95 |
+
except subprocess.TimeoutExpired:
|
| 96 |
+
return None, []
|
| 97 |
+
finally:
|
| 98 |
+
Path(path).unlink(missing_ok=True)
|
| 99 |
+
|
| 100 |
+
if proc.returncode == 3:
|
| 101 |
+
return None, []
|
| 102 |
+
lines = (proc.stdout or "").strip().splitlines()
|
| 103 |
+
if not lines:
|
| 104 |
+
return None, []
|
| 105 |
+
try:
|
| 106 |
+
data = json.loads(lines[-1])
|
| 107 |
+
except json.JSONDecodeError:
|
| 108 |
+
return None, []
|
| 109 |
+
if data.get("ok") is None:
|
| 110 |
+
return None, []
|
| 111 |
+
return bool(data.get("ok")), list(data.get("errors", []))
|
engine/builder.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""smolbuilder — a Lovable/Replit-style web-app builder on a tiny local model.
|
| 2 |
+
|
| 3 |
+
Where `Router` (engine/router.py) answers one coding *task* per call with a
|
| 4 |
+
fresh workspace, `WebBuilder` is a **stateful session**: you describe a web app,
|
| 5 |
+
the agent builds a self-contained `index.html`, and then you keep talking to it
|
| 6 |
+
("make it dark mode", "add a reset button") and it edits the *same* workspace.
|
| 7 |
+
|
| 8 |
+
First build uses the router's escalation idea — start small, and if the tiny
|
| 9 |
+
model can't produce a usable app, retry on the next-bigger model — but once a
|
| 10 |
+
tier succeeds we **lock onto that agent and its workspace** so every later turn
|
| 11 |
+
is a cheap incremental edit rather than a from-scratch rebuild.
|
| 12 |
+
|
| 13 |
+
The build is verified by rendering: did the agent leave a non-trivial HTML
|
| 14 |
+
entrypoint behind? Static apps have no `run_python` signal, so "it produced an
|
| 15 |
+
app you can preview" is the success criterion the UI also relies on.
|
| 16 |
+
"""
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
from collections.abc import AsyncIterator
|
| 20 |
+
from dataclasses import dataclass, field
|
| 21 |
+
|
| 22 |
+
from .agent import SmallCodeAgent, Step
|
| 23 |
+
from .config import Preset, Tier, load_preset
|
| 24 |
+
from .live_run import LiveFrame
|
| 25 |
+
from .preview import find_entry, inline_app, preview_iframe
|
| 26 |
+
from .router import classify_tier
|
| 27 |
+
from .sandbox import Workspace
|
| 28 |
+
from .tools import build_web_registry
|
| 29 |
+
from .trace_collector import TraceEvent
|
| 30 |
+
from .ui_trace import merge_step_metadata
|
| 31 |
+
from . import browsercheck
|
| 32 |
+
|
| 33 |
+
BUILD_SYSTEM_PROMPT = """You are smolbuilder, a web app builder running on a small local model.
|
| 34 |
+
|
| 35 |
+
You build small, self-contained web apps that run directly in a browser — like a tiny Lovable or Replit.
|
| 36 |
+
|
| 37 |
+
Your workspace tools:
|
| 38 |
+
- write_file(path, content): create or overwrite a file.
|
| 39 |
+
- read_file(path): read a file back.
|
| 40 |
+
- list_files(): see what already exists.
|
| 41 |
+
- check_app(): run the current app in a headless browser — load index.html, execute its JavaScript, click every button — and report any errors.
|
| 42 |
+
|
| 43 |
+
Hard rules:
|
| 44 |
+
1. The app's entrypoint is ALWAYS a single file named index.html, and it must start with <!doctype html><html> and include <head> and <body>.
|
| 45 |
+
2. Put the CSS in a <style> tag and the JavaScript in a <script> tag INSIDE index.html. Prefer one self-contained file — it must run with no build step and no server.
|
| 46 |
+
3. Put the <script> tag at the very END of <body>, AFTER the elements it uses (or wrap your code in window.addEventListener('DOMContentLoaded', ...)). If a script runs before its elements exist, document.getElementById returns null and every button silently breaks.
|
| 47 |
+
4. Every button or interactive control must have a working handler that you actually wire up. Define functions before they are referenced.
|
| 48 |
+
5. Vanilla HTML/CSS/JS only. Do not require a framework, npm, or a backend. You may load a library from a CDN with a full https:// URL only if it is truly needed.
|
| 49 |
+
6. Make it look good by default: sensible layout, spacing, a coherent color palette, readable type. Mobile-friendly.
|
| 50 |
+
|
| 51 |
+
Method — follow it every time:
|
| 52 |
+
1. Write a complete index.html in one write_file call.
|
| 53 |
+
2. Call check_app() to test it.
|
| 54 |
+
3. If check_app reports errors, read them, fix index.html (write the FULL file again), and call check_app again. Repeat until it reports ok.
|
| 55 |
+
4. To CHANGE an existing app, write the FULL updated index.html (never a partial file — keep everything that already worked), then check_app again.
|
| 56 |
+
|
| 57 |
+
Only finish once check_app reports the app works. Then reply with one short sentence describing what the app does. Do not paste the code in your reply.
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
# Minimum entrypoint size (chars) to count as "a real app" and not a stub.
|
| 61 |
+
_MIN_APP_CHARS = 60
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
@dataclass
|
| 65 |
+
class BuildResult:
|
| 66 |
+
final: str
|
| 67 |
+
steps: list[Step]
|
| 68 |
+
files: dict[str, str]
|
| 69 |
+
preview_html: str
|
| 70 |
+
entry: str | None
|
| 71 |
+
tier_name: str
|
| 72 |
+
tier_model: str
|
| 73 |
+
start_tier: str
|
| 74 |
+
escalations: int
|
| 75 |
+
verified: bool
|
| 76 |
+
turn: int = 0
|
| 77 |
+
trace_events: list[TraceEvent] = field(default_factory=list)
|
| 78 |
+
agent: SmallCodeAgent | None = None
|
| 79 |
+
|
| 80 |
+
@property
|
| 81 |
+
def app_html(self) -> str:
|
| 82 |
+
"""The self-contained document — for the 'download app' button."""
|
| 83 |
+
return inline_app(self.files)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _evaluate(agent: SmallCodeAgent) -> tuple[bool, str | None, dict[str, str]]:
|
| 87 |
+
"""Did the agent leave a *working* app behind? Drives the verified badge and
|
| 88 |
+
escalation. Structural first (is there a real HTML entrypoint), then a
|
| 89 |
+
runtime check — a broken app (JS errors) counts as a failure so the router
|
| 90 |
+
escalates to a bigger model. An unverifiable check (no Node) doesn't fail.
|
| 91 |
+
"""
|
| 92 |
+
files = agent.files()
|
| 93 |
+
entry = find_entry(files)
|
| 94 |
+
if entry is None or len(files[entry].strip()) < _MIN_APP_CHARS:
|
| 95 |
+
return False, entry, files
|
| 96 |
+
if entry.lower().endswith((".html", ".htm")):
|
| 97 |
+
ok, _errors = browsercheck.check_html(inline_app(files))
|
| 98 |
+
if ok is False:
|
| 99 |
+
return False, entry, files
|
| 100 |
+
return True, entry, files
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class WebBuilder:
|
| 104 |
+
"""A persistent build session. One instance per browser session (gr.State)."""
|
| 105 |
+
|
| 106 |
+
def __init__(self, preset: Preset | None = None, max_steps: int = 16,
|
| 107 |
+
preview_height: int = 540) -> None:
|
| 108 |
+
self.preset = preset or load_preset()
|
| 109 |
+
self.tiers: list[Tier] = self.preset.tiers
|
| 110 |
+
self.max_steps = max_steps
|
| 111 |
+
self.preview_height = preview_height
|
| 112 |
+
# The workspace (the built app on disk) persists across turns; the tier
|
| 113 |
+
# that built it is remembered so edits stay on the same model. A spent
|
| 114 |
+
# LiteForge agent can't be re-run, so each turn gets a fresh agent over
|
| 115 |
+
# this same workspace.
|
| 116 |
+
self.workspace: Workspace | None = None
|
| 117 |
+
self.tier_idx = 0
|
| 118 |
+
self.turn = 0
|
| 119 |
+
self.think = "off"
|
| 120 |
+
self.yolo = False
|
| 121 |
+
|
| 122 |
+
@property
|
| 123 |
+
def has_app(self) -> bool:
|
| 124 |
+
"""True once a first build has produced a workspace to iterate on."""
|
| 125 |
+
return self.workspace is not None
|
| 126 |
+
|
| 127 |
+
# --- public API ------------------------------------------------------
|
| 128 |
+
async def send(self, message: str) -> BuildResult:
|
| 129 |
+
"""Build (first turn) or edit (later turns) and return a BuildResult."""
|
| 130 |
+
result: BuildResult | None = None
|
| 131 |
+
async for frame in self.send_live(message):
|
| 132 |
+
if frame.done and isinstance(frame.result, BuildResult):
|
| 133 |
+
result = frame.result
|
| 134 |
+
assert result is not None
|
| 135 |
+
return result
|
| 136 |
+
|
| 137 |
+
async def send_live(self, message: str) -> AsyncIterator[LiveFrame]:
|
| 138 |
+
"""Yield live frames while building or editing."""
|
| 139 |
+
self.turn += 1
|
| 140 |
+
if self.workspace is None:
|
| 141 |
+
async for frame in self._first_build_live(message):
|
| 142 |
+
yield frame
|
| 143 |
+
else:
|
| 144 |
+
async for frame in self._iterate_live(message):
|
| 145 |
+
yield frame
|
| 146 |
+
|
| 147 |
+
def reset(self) -> None:
|
| 148 |
+
"""Drop the current app and start a fresh session."""
|
| 149 |
+
self.cleanup()
|
| 150 |
+
self.workspace = None
|
| 151 |
+
self.tier_idx = 0
|
| 152 |
+
self.turn = 0
|
| 153 |
+
|
| 154 |
+
def cleanup(self) -> None:
|
| 155 |
+
if self.workspace is not None:
|
| 156 |
+
self.workspace.cleanup()
|
| 157 |
+
|
| 158 |
+
def empty_preview(self) -> str:
|
| 159 |
+
return preview_iframe({}, height=self.preview_height)
|
| 160 |
+
|
| 161 |
+
# --- internals -------------------------------------------------------
|
| 162 |
+
def _new_agent(self, tier: Tier, workspace: Workspace | None = None) -> SmallCodeAgent:
|
| 163 |
+
return SmallCodeAgent(
|
| 164 |
+
preset=self.preset, model=tier.model, max_steps=self.max_steps,
|
| 165 |
+
system_prompt=BUILD_SYSTEM_PROMPT, registry_builder=build_web_registry,
|
| 166 |
+
workspace=workspace, name="smolbuilder",
|
| 167 |
+
agent="build", profile="web",
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
async def _first_build_live(self, message: str) -> AsyncIterator[LiveFrame]:
|
| 171 |
+
"""Escalate the model ladder until one produces a previewable app."""
|
| 172 |
+
start = classify_tier(message, len(self.tiers))
|
| 173 |
+
task = (f"Build this web app as a self-contained index.html:\n\n{message}")
|
| 174 |
+
escalations = 0
|
| 175 |
+
last: BuildResult | None = None
|
| 176 |
+
prev_tier_name: str | None = None
|
| 177 |
+
|
| 178 |
+
for idx in range(start, len(self.tiers)):
|
| 179 |
+
tier = self.tiers[idx]
|
| 180 |
+
if prev_tier_name is not None:
|
| 181 |
+
yield LiveFrame(events=[
|
| 182 |
+
TraceEvent(kind="tier_escalation", name=tier.name,
|
| 183 |
+
detail=f"escalated from {prev_tier_name}"),
|
| 184 |
+
])
|
| 185 |
+
agent = self._new_agent(tier)
|
| 186 |
+
async for frame in agent.run_live_turn(
|
| 187 |
+
task, think=self.think, yolo=self.yolo,
|
| 188 |
+
):
|
| 189 |
+
if not frame.done:
|
| 190 |
+
yield frame
|
| 191 |
+
continue
|
| 192 |
+
final, steps = frame.result
|
| 193 |
+
ok, entry, files = _evaluate(agent)
|
| 194 |
+
ok = ok and not (agent.hit_max_steps or agent.errored)
|
| 195 |
+
last = self._result(agent, final, steps, files, entry, tier,
|
| 196 |
+
self.tiers[start].name, escalations, ok)
|
| 197 |
+
is_last_tier = idx == len(self.tiers) - 1
|
| 198 |
+
if ok or is_last_tier:
|
| 199 |
+
self.workspace = agent.workspace
|
| 200 |
+
self.tier_idx = idx
|
| 201 |
+
yield LiveFrame(
|
| 202 |
+
steps=steps,
|
| 203 |
+
events=last.trace_events,
|
| 204 |
+
files=last.files,
|
| 205 |
+
done=True,
|
| 206 |
+
result=last,
|
| 207 |
+
)
|
| 208 |
+
return
|
| 209 |
+
if idx < len(self.tiers) - 1:
|
| 210 |
+
agent.trace_collector.record_escalation(tier.name, self.tiers[idx + 1].name)
|
| 211 |
+
agent.cleanup()
|
| 212 |
+
escalations += 1
|
| 213 |
+
prev_tier_name = tier.name
|
| 214 |
+
|
| 215 |
+
if last is not None:
|
| 216 |
+
yield LiveFrame(
|
| 217 |
+
steps=last.steps,
|
| 218 |
+
events=last.trace_events,
|
| 219 |
+
files=last.files,
|
| 220 |
+
done=True,
|
| 221 |
+
result=last,
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
async def _iterate_live(self, message: str) -> AsyncIterator[LiveFrame]:
|
| 225 |
+
tier = self.tiers[self.tier_idx]
|
| 226 |
+
agent = self._new_agent(tier, self.workspace)
|
| 227 |
+
cur = self.workspace.read_file("index.html")
|
| 228 |
+
body = cur["content"] if cur.get("ok") else ""
|
| 229 |
+
task = (
|
| 230 |
+
"You are editing an existing web app. Here is the current "
|
| 231 |
+
"index.html:\n\n```html\n" + body + "\n```\n\n"
|
| 232 |
+
"Apply the change below, then save the COMPLETE updated file with a "
|
| 233 |
+
"single write_file(\"index.html\", <full new contents>). Keep "
|
| 234 |
+
"everything that already works and output the whole file, never a "
|
| 235 |
+
"fragment.\n\nChange to make: " + message
|
| 236 |
+
)
|
| 237 |
+
async for frame in agent.run_live_turn(
|
| 238 |
+
task, think=self.think, yolo=self.yolo,
|
| 239 |
+
):
|
| 240 |
+
if not frame.done:
|
| 241 |
+
yield frame
|
| 242 |
+
continue
|
| 243 |
+
final, steps = frame.result
|
| 244 |
+
ok, entry, files = _evaluate(agent)
|
| 245 |
+
ok = ok and not (agent.hit_max_steps or agent.errored)
|
| 246 |
+
result = self._result(agent, final, steps, files, entry, tier, tier.name, 0, ok)
|
| 247 |
+
yield LiveFrame(
|
| 248 |
+
steps=steps,
|
| 249 |
+
events=result.trace_events,
|
| 250 |
+
files=result.files,
|
| 251 |
+
done=True,
|
| 252 |
+
result=result,
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
def _result(self, agent: SmallCodeAgent, final, steps, files, entry, tier, start_name,
|
| 256 |
+
escalations, verified) -> BuildResult:
|
| 257 |
+
# Small models sometimes write the file but return an empty answer; give
|
| 258 |
+
# the chat something sensible rather than a blank bubble.
|
| 259 |
+
if not (final or "").strip():
|
| 260 |
+
final = "✅ Done: check the live preview." if verified else \
|
| 261 |
+
"I made an attempt; have a look and tell me what to fix."
|
| 262 |
+
events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
|
| 263 |
+
return BuildResult(
|
| 264 |
+
final=final, steps=steps, files=files,
|
| 265 |
+
preview_html=preview_iframe(files, height=self.preview_height),
|
| 266 |
+
entry=entry, tier_name=tier.name, tier_model=tier.model,
|
| 267 |
+
start_tier=start_name, escalations=escalations,
|
| 268 |
+
verified=bool(verified), turn=self.turn,
|
| 269 |
+
trace_events=events, agent=agent,
|
| 270 |
+
)
|
engine/config.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Backend presets for smolcode.
|
| 2 |
+
|
| 3 |
+
smolcode always talks to ONE OpenAI-compatible endpoint. A "preset" just
|
| 4 |
+
selects the base_url and the model *tiers* the router may escalate through.
|
| 5 |
+
Everything is overridable by environment variables so the same code runs on a
|
| 6 |
+
laptop, inside an HF Space, or against the hal-9000 "home supercomputer".
|
| 7 |
+
|
| 8 |
+
Env overrides (highest priority):
|
| 9 |
+
SMALLCODE_PRESET space | laptop | hal | hal-smol (default: hal)
|
| 10 |
+
SMALLCODE_BASE_URL OpenAI-compatible /v1 URL
|
| 11 |
+
SMALLCODE_API_KEY bearer token (most local servers ignore it)
|
| 12 |
+
SMALLCODE_MODEL force a single model (disables tiering)
|
| 13 |
+
"""
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
import re
|
| 18 |
+
from dataclasses import dataclass, field
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass(frozen=True)
|
| 22 |
+
class Tier:
|
| 23 |
+
"""One rung of the model ladder. `name` is what the router shows in the UI."""
|
| 24 |
+
name: str
|
| 25 |
+
model: str
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@dataclass(frozen=True)
|
| 29 |
+
class Preset:
|
| 30 |
+
key: str
|
| 31 |
+
base_url: str
|
| 32 |
+
api_key: str
|
| 33 |
+
# Ordered cheap -> expensive. The router starts at tiers[0] and escalates.
|
| 34 |
+
tiers: list[Tier] = field(default_factory=list)
|
| 35 |
+
|
| 36 |
+
@property
|
| 37 |
+
def default_model(self) -> str:
|
| 38 |
+
return self.tiers[0].model
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@dataclass(frozen=True)
|
| 42 |
+
class SpecialistLadder:
|
| 43 |
+
"""One specialist family's size ladder (cheap -> expensive), reusing Tier."""
|
| 44 |
+
specialty: str
|
| 45 |
+
tiers: list[Tier] = field(default_factory=list)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@dataclass(frozen=True)
|
| 49 |
+
class SpecialistPreset(Preset):
|
| 50 |
+
"""A Preset whose escalation space is 2D: specialty -> size ladder.
|
| 51 |
+
|
| 52 |
+
Subclasses Preset so every existing reader of .base_url/.api_key/.tiers/
|
| 53 |
+
.default_model (bench, builder, agent) keeps working: the inherited `tiers` is
|
| 54 |
+
the GENERIC fallback ladder, and `ladders` holds the per-specialty rungs.
|
| 55 |
+
"""
|
| 56 |
+
ladders: dict[str, SpecialistLadder] = field(default_factory=dict)
|
| 57 |
+
|
| 58 |
+
def ladder_for(self, specialty: str) -> SpecialistLadder:
|
| 59 |
+
"""The specialist ladder for a key, or the generic ladder as a fallback."""
|
| 60 |
+
lad = self.ladders.get(specialty)
|
| 61 |
+
if lad and lad.tiers:
|
| 62 |
+
return lad
|
| 63 |
+
return SpecialistLadder(specialty="general", tiers=self.tiers)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# Local Ollama on the workstation exposes an OpenAI-compatible API at :11435/v1.
|
| 67 |
+
# NOTE: the default model is a tool-TUNED 3B (granite4.1:3b), not a coder model.
|
| 68 |
+
# Tiny coder models (qwen2.5-coder:3b) text-emit ```json instead of native
|
| 69 |
+
# `tool_calls`, which LiteForge's agent loop can't execute. Granite-3B (also
|
| 70 |
+
# <=4B, Tiny-Titan-eligible) emits native tool_calls. The dual-mode parser
|
| 71 |
+
# (P1) will let qwen-coder back in for code quality.
|
| 72 |
+
_LAPTOP = Preset(
|
| 73 |
+
key="laptop",
|
| 74 |
+
base_url="http://localhost:11435/v1",
|
| 75 |
+
api_key="ollama",
|
| 76 |
+
tiers=[Tier("3B", "granite4.1:3b")],
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# The submission Space: a single tiny model served by llama.cpp's llama-server.
|
| 80 |
+
# Kept to one <=4B model so the Tiny Titan claim is unambiguous.
|
| 81 |
+
# Port is configurable: 8080 inside the Space, but on the workstation 8080 is
|
| 82 |
+
# taken by Guacamole/Tomcat so local dev uses SMALLCODE_LLAMA_PORT=8088.
|
| 83 |
+
# llama-server ignores the model name and serves whatever GGUF was loaded.
|
| 84 |
+
_LLAMA_PORT = os.environ.get("SMALLCODE_LLAMA_PORT", "8080")
|
| 85 |
+
_SPACE = Preset(
|
| 86 |
+
key="space",
|
| 87 |
+
base_url=f"http://127.0.0.1:{_LLAMA_PORT}/v1",
|
| 88 |
+
api_key="local",
|
| 89 |
+
tiers=[Tier("3B", "qwen2.5-coder-3b-instruct-q4_k_m.gguf")],
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# hal-9000 (DGX Spark): full tiered router. Points straight at hal's Ollama
|
| 93 |
+
# (:11434/v1), which serves every pulled model over one OpenAI-compatible
|
| 94 |
+
# endpoint with native tool_calls — simpler than LiteLLM (whose :4000 exposed no
|
| 95 |
+
# models). Tiny tier is a TOOL-TUNED model (granite4.1:3b) that reliably drives
|
| 96 |
+
# the loop; escalate to bigger Qwen *coder* models for hard codegen. (Tiny coder
|
| 97 |
+
# models can't native-tool-call — see engine/config laptop note.)
|
| 98 |
+
_HAL = Preset(
|
| 99 |
+
key="hal",
|
| 100 |
+
base_url="http://10.8.0.6:11434/v1",
|
| 101 |
+
api_key=os.environ.get("SMALLCODE_API_KEY", "ollama"),
|
| 102 |
+
# All-Granite ladder: every tier emits native tool_calls on Ollama (verified
|
| 103 |
+
# on hal), all <=32B. NOTE: qwen2.5-coder does NOT native-tool-call on Ollama
|
| 104 |
+
# at ANY size (3b/14b text-emit the call) — bringing the Qwen *coder* models
|
| 105 |
+
# in (for the benchmark story) requires the dual-mode parser (see task 6).
|
| 106 |
+
tiers=[
|
| 107 |
+
Tier("3B", "granite4.1:3b"),
|
| 108 |
+
Tier("8B", "granite4.1:8b"),
|
| 109 |
+
Tier("30B", "granite4.1:30b"),
|
| 110 |
+
],
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# hal-9000 with the fine-tuned coder as the entry tier. The finetune/ pipeline
|
| 114 |
+
# trains Qwen2.5-Coder-1.5B to emit native <tool_call> (see finetune/README.md),
|
| 115 |
+
# so once it's served on hal's Ollama it can be the cheap first rung and we only
|
| 116 |
+
# escalate to Granite on verification failure. The served tag is configurable via
|
| 117 |
+
# SMALLCODE_SMOL_MODEL (default matches the published model name); import the GGUF
|
| 118 |
+
# into Ollama under that tag, or point SMALLCODE_BASE_URL at a llama-server.
|
| 119 |
+
_SMOL_MODEL = os.environ.get("SMALLCODE_SMOL_MODEL", "smolcode-coder-1.5b:tools")
|
| 120 |
+
_HAL_SMOL = Preset(
|
| 121 |
+
key="hal-smol",
|
| 122 |
+
base_url="http://10.8.0.6:11434/v1",
|
| 123 |
+
api_key=os.environ.get("SMALLCODE_API_KEY", "ollama"),
|
| 124 |
+
tiers=[
|
| 125 |
+
Tier("1.5B-tuned", _SMOL_MODEL),
|
| 126 |
+
Tier("8B", "granite4.1:8b"),
|
| 127 |
+
Tier("30B", "granite4.1:30b"),
|
| 128 |
+
],
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# --- the 2D specialist matrix (hal-matrix preset) ----------------------------
|
| 132 |
+
# A model per language/function (smolcode-coder-{specialty}-{size}:tools), served
|
| 133 |
+
# on hal's Ollama. The router classifies the task's specialty, picks that family's
|
| 134 |
+
# size ladder, and escalates within it — then into the generic Granite ladder at
|
| 135 |
+
# the top. Tags are derived by CONVENTION + served-tag discovery, so adding a
|
| 136 |
+
# specialist is a serving action, not a code edit.
|
| 137 |
+
|
| 138 |
+
_SPECIALIST_SIZES = ("1.5b", "3b", "7b") # 7b deferred but recognized if served.
|
| 139 |
+
_SPECIALTIES = ("py", "js", "bash", "git", "dotnet", "csharp", "java",
|
| 140 |
+
"powershell", "rust", "docker", "bsd", "go", "sql", "cpp", "terraform",
|
| 141 |
+
"orchestrate") # task_batch / parallel fan-out specialist
|
| 142 |
+
|
| 143 |
+
# Pattern is overridable so one env var can repoint the whole matrix. Back-compat:
|
| 144 |
+
# a value WITHOUT a "{specialty}" placeholder is treated as a legacy single tag.
|
| 145 |
+
_SMOL_PATTERN = os.environ.get("SMALLCODE_SMOL_MODEL",
|
| 146 |
+
"smolcode-coder-{specialty}-{size}:tools")
|
| 147 |
+
|
| 148 |
+
# Size parsing + specialty detection — shared by the model picker (Tiny-Titan <=32B
|
| 149 |
+
# display filter, collapsing the 16-per-size specialty fine-tunes to one "Auto" entry
|
| 150 |
+
# per size). Mirrors smolcode-cli/src/router.rs parse_size_b and the size_b() regex in
|
| 151 |
+
# tests/test_matrix_routing.py.
|
| 152 |
+
_SIZE_RE = re.compile(r"(\d+(?:\.\d+)?)b\b", re.I)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def parse_size_b(model: str) -> float:
|
| 156 |
+
"""Parameter count in billions from a model tag (last '<n>b' group), else 0.0.
|
| 157 |
+
|
| 158 |
+
'granite4.1:30b' -> 30.0, 'smolcode-coder-py-1.5b:tools' -> 1.5. Unknown -> 0.0
|
| 159 |
+
(so size-unknown models pass a '<=32B' filter rather than being hidden)."""
|
| 160 |
+
found = _SIZE_RE.findall(model or "")
|
| 161 |
+
return float(found[-1]) if found else 0.0
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def is_specialty_model(model: str) -> bool:
|
| 165 |
+
"""True if the tag is a per-specialty fine-tune (smolcode-coder-<specialty>-...)."""
|
| 166 |
+
m = (model or "").lower()
|
| 167 |
+
return any(m.startswith(f"smolcode-coder-{s}-") for s in _SPECIALTIES)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def specialist_sizes(preset: "Preset") -> list[str]:
|
| 171 |
+
"""Distinct specialist sizes (<=32B) present in a matrix preset's ladders,
|
| 172 |
+
smallest first (e.g. ['1.5b', '3b']). Empty for non-matrix presets."""
|
| 173 |
+
sizes: dict[float, str] = {}
|
| 174 |
+
for lad in (getattr(preset, "ladders", {}) or {}).values():
|
| 175 |
+
for t in lad.tiers:
|
| 176 |
+
if is_specialty_model(t.model):
|
| 177 |
+
sb = parse_size_b(t.model)
|
| 178 |
+
if 0 < sb <= 32:
|
| 179 |
+
sizes.setdefault(sb, f"{_SIZE_RE.findall(t.model)[-1]}b")
|
| 180 |
+
return [sizes[k] for k in sorted(sizes)]
|
| 181 |
+
|
| 182 |
+
# Generic Granite ladder every specialist escalates INTO at its top rung (all <=32B).
|
| 183 |
+
_GENERIC_TIERS = [Tier("8B", "granite4.1:8b"), Tier("30B", "granite4.1:30b")]
|
| 184 |
+
|
| 185 |
+
# Static fallback set of served tags when /v1/models discovery is unavailable.
|
| 186 |
+
# Keep in sync with what's pulled on hal; discovery (below) supersedes it.
|
| 187 |
+
_HAL_SERVED: set[str] = {f"smolcode-coder-{s}-1.5b:tools" for s in _SPECIALTIES} | \
|
| 188 |
+
{f"smolcode-coder-{s}-3b:tools" for s in _SPECIALTIES}
|
| 189 |
+
|
| 190 |
+
_DISCOVERY_CACHE: dict[str, set[str]] = {}
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def _discover_served(base_url: str, api_key: str) -> set[str]:
|
| 194 |
+
"""GET the OpenAI-compatible /v1/models once (cached per base_url); the set of
|
| 195 |
+
served model tags. Any failure -> empty set (caller falls back to _HAL_SERVED)."""
|
| 196 |
+
if base_url in _DISCOVERY_CACHE:
|
| 197 |
+
return _DISCOVERY_CACHE[base_url]
|
| 198 |
+
served: set[str] = set()
|
| 199 |
+
try:
|
| 200 |
+
import json
|
| 201 |
+
import urllib.request
|
| 202 |
+
req = urllib.request.Request(base_url.rstrip("/") + "/models",
|
| 203 |
+
headers={"Authorization": f"Bearer {api_key}"})
|
| 204 |
+
with urllib.request.urlopen(req, timeout=2) as r:
|
| 205 |
+
data = json.loads(r.read())
|
| 206 |
+
served = {m["id"] for m in data.get("data", []) if "id" in m}
|
| 207 |
+
except Exception:
|
| 208 |
+
served = set()
|
| 209 |
+
_DISCOVERY_CACHE[base_url] = served
|
| 210 |
+
return served
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def _build_ladder(specialty: str, served: set[str]) -> SpecialistLadder:
|
| 214 |
+
"""One specialist ladder: served specialist sizes (smallest first), then the
|
| 215 |
+
generic Granite tiers. Missing sizes are skipped; a wholly-missing specialist
|
| 216 |
+
yields just the generic tiers (ladder_for also guards this)."""
|
| 217 |
+
tiers: list[Tier] = []
|
| 218 |
+
if "{specialty}" in _SMOL_PATTERN:
|
| 219 |
+
for size in _SPECIALIST_SIZES:
|
| 220 |
+
tag = _SMOL_PATTERN.format(specialty=specialty, size=size)
|
| 221 |
+
if tag in served:
|
| 222 |
+
tiers.append(Tier(f"{size}-{specialty}", tag))
|
| 223 |
+
tiers.extend(_GENERIC_TIERS)
|
| 224 |
+
return SpecialistLadder(specialty=specialty, tiers=tiers)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
_HAL_MATRIX = SpecialistPreset(
|
| 228 |
+
key="hal-matrix",
|
| 229 |
+
base_url="http://10.8.0.6:11434/v1",
|
| 230 |
+
api_key=os.environ.get("SMALLCODE_API_KEY", "ollama"),
|
| 231 |
+
tiers=_GENERIC_TIERS, # generic fallback ladder (inherited Preset.tiers)
|
| 232 |
+
ladders={}, # built lazily in load_preset (needs the resolved base_url)
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
_PRESETS = {p.key: p for p in (_LAPTOP, _SPACE, _HAL, _HAL_SMOL, _HAL_MATRIX)}
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def default_ui_model(preset: Preset, cfg: dict) -> str:
|
| 239 |
+
"""Resolve the default model for the web UI from config and preset tiers."""
|
| 240 |
+
if cfg.get("model"):
|
| 241 |
+
return str(cfg["model"])
|
| 242 |
+
if preset.tiers:
|
| 243 |
+
return preset.default_model
|
| 244 |
+
return ""
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def load_preset() -> Preset:
|
| 248 |
+
"""Resolve the active preset, applying env overrides and Rust config.toml."""
|
| 249 |
+
# Default to the 2D specialist matrix so "Auto" routes by specialty out of the box;
|
| 250 |
+
# it auto-detects served specialists and falls back to the generic Granite ladder
|
| 251 |
+
# (per-specialty: ladder_for(); whole matrix: _discover_served -> _HAL_SERVED).
|
| 252 |
+
key = os.environ.get("SMALLCODE_PRESET", "hal-matrix").lower()
|
| 253 |
+
base = _PRESETS.get(key, _LAPTOP)
|
| 254 |
+
|
| 255 |
+
rust_cfg: dict = {}
|
| 256 |
+
try:
|
| 257 |
+
from .rust_session import load_rust_config
|
| 258 |
+
rust_cfg = load_rust_config()
|
| 259 |
+
except Exception:
|
| 260 |
+
pass
|
| 261 |
+
|
| 262 |
+
base_url = os.environ.get("SMALLCODE_BASE_URL", rust_cfg.get("base_url", base.base_url))
|
| 263 |
+
api_key = os.environ.get("SMALLCODE_API_KEY", base.api_key)
|
| 264 |
+
|
| 265 |
+
# An explicit env SMALLCODE_MODEL is a hard single-model override and wins over
|
| 266 |
+
# everything (including the matrix). A `model` in config.toml is only a *default*
|
| 267 |
+
# — it must NOT silently disable the matrix when the user explicitly asked for it
|
| 268 |
+
# via SMALLCODE_PRESET=hal-matrix.
|
| 269 |
+
env_model = os.environ.get("SMALLCODE_MODEL")
|
| 270 |
+
if env_model:
|
| 271 |
+
return Preset(key=base.key, base_url=base_url, api_key=api_key,
|
| 272 |
+
tiers=[Tier("custom", env_model)])
|
| 273 |
+
|
| 274 |
+
if isinstance(base, SpecialistPreset):
|
| 275 |
+
served = _discover_served(base_url, api_key) or _HAL_SERVED
|
| 276 |
+
ladders = {s: _build_ladder(s, served) for s in _SPECIALTIES}
|
| 277 |
+
return SpecialistPreset(key=base.key, base_url=base_url, api_key=api_key,
|
| 278 |
+
tiers=_GENERIC_TIERS, ladders=ladders)
|
| 279 |
+
|
| 280 |
+
# A config.toml `model` is a DEFAULT, not a hard override (that's SMALLCODE_MODEL,
|
| 281 |
+
# handled above). If it just names this preset's entry tier — the common case, e.g.
|
| 282 |
+
# the CLI default == hal-smol's 1.5B entry — keep the full escalation LADDER (so the
|
| 283 |
+
# router + judge still work). Only a model that ISN'T the preset entry is treated as
|
| 284 |
+
# a deliberate single-model choice.
|
| 285 |
+
forced = rust_cfg.get("model")
|
| 286 |
+
if forced and base.tiers and forced != base.default_model:
|
| 287 |
+
return Preset(key=base.key, base_url=base_url, api_key=api_key,
|
| 288 |
+
tiers=[Tier("custom", forced)])
|
| 289 |
+
|
| 290 |
+
return Preset(key=base.key, base_url=base_url, api_key=api_key, tiers=base.tiers)
|
engine/fanout.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Parallel sub-agent fan-out for the Python engine (mirror of the Rust CLI's
|
| 2 |
+
`task_batch`).
|
| 3 |
+
|
| 4 |
+
Where the Router runs ONE task through a tier ladder, fan-out runs MANY independent
|
| 5 |
+
tasks at once: each gets its own SmallCodeAgent + fresh Workspace and they run
|
| 6 |
+
concurrently via asyncio.gather, bounded so local inference isn't oversubscribed.
|
| 7 |
+
Use it for independent work — exploring/solving several things in parallel — when
|
| 8 |
+
each subtask doesn't depend on the others' output.
|
| 9 |
+
|
| 10 |
+
Cheap when each agent is a small local (e.g. the fine-tuned 1.5B) model: wall-clock
|
| 11 |
+
is ~the slowest job, not the sum.
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import asyncio
|
| 16 |
+
from collections.abc import AsyncIterator
|
| 17 |
+
from dataclasses import dataclass, field
|
| 18 |
+
|
| 19 |
+
from .agent import SmallCodeAgent, Step
|
| 20 |
+
from .config import Preset, load_preset
|
| 21 |
+
from .live_run import LiveFrame
|
| 22 |
+
from .router import _verify
|
| 23 |
+
from .trace_collector import TraceEvent
|
| 24 |
+
from .ui_trace import merge_step_metadata
|
| 25 |
+
|
| 26 |
+
MAX_CONCURRENCY = 4
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class FanoutResult:
|
| 31 |
+
index: int
|
| 32 |
+
task: str
|
| 33 |
+
final: str
|
| 34 |
+
steps: list[Step]
|
| 35 |
+
model: str
|
| 36 |
+
verified: bool
|
| 37 |
+
files: dict[str, str] = field(default_factory=dict)
|
| 38 |
+
error: str | None = None
|
| 39 |
+
trace_events: list[TraceEvent] = field(default_factory=list)
|
| 40 |
+
agent: SmallCodeAgent | None = None
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
async def fan_out(tasks: list[str], preset: Preset | None = None,
|
| 44 |
+
model: str | None = None, max_steps: int = 12,
|
| 45 |
+
concurrency: int = MAX_CONCURRENCY) -> list[FanoutResult]:
|
| 46 |
+
"""Run `tasks` concurrently, each in its own agent/workspace.
|
| 47 |
+
|
| 48 |
+
`model` defaults to the preset's entry tier (the cheap small model — the
|
| 49 |
+
natural choice for fanning out). Results are returned in input order.
|
| 50 |
+
"""
|
| 51 |
+
results: list[FanoutResult] = []
|
| 52 |
+
async for frame in fan_out_live(tasks, preset=preset, model=model,
|
| 53 |
+
max_steps=max_steps, concurrency=concurrency):
|
| 54 |
+
if frame.done and isinstance(frame.result, list):
|
| 55 |
+
results = frame.result
|
| 56 |
+
return results
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
async def fan_out_live(
|
| 60 |
+
tasks: list[str],
|
| 61 |
+
preset: Preset | None = None,
|
| 62 |
+
model: str | None = None,
|
| 63 |
+
max_steps: int = 12,
|
| 64 |
+
concurrency: int = MAX_CONCURRENCY,
|
| 65 |
+
poll_interval: float = 0.35,
|
| 66 |
+
) -> AsyncIterator[LiveFrame]:
|
| 67 |
+
"""Yield aggregate live frames while fan-out jobs run."""
|
| 68 |
+
if not tasks:
|
| 69 |
+
yield LiveFrame(done=True, result=[])
|
| 70 |
+
return
|
| 71 |
+
|
| 72 |
+
preset = preset or load_preset()
|
| 73 |
+
model = model or preset.default_model
|
| 74 |
+
sem = asyncio.Semaphore(max(1, concurrency))
|
| 75 |
+
agents: list[SmallCodeAgent] = []
|
| 76 |
+
for i, t in enumerate(tasks):
|
| 77 |
+
agents.append(SmallCodeAgent(preset=preset, model=model, max_steps=max_steps))
|
| 78 |
+
|
| 79 |
+
async def _job(index: int, task: str, agent: SmallCodeAgent) -> FanoutResult:
|
| 80 |
+
async with sem:
|
| 81 |
+
try:
|
| 82 |
+
final, steps = await agent.run(task)
|
| 83 |
+
ok = False if (agent.hit_max_steps or agent.errored) else _verify(agent)
|
| 84 |
+
events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
|
| 85 |
+
return FanoutResult(
|
| 86 |
+
index=index, task=task, final=final, steps=steps, model=model,
|
| 87 |
+
verified=bool(ok), files=agent.files(), trace_events=events, agent=agent,
|
| 88 |
+
)
|
| 89 |
+
except Exception as e:
|
| 90 |
+
return FanoutResult(index=index, task=task, final="", steps=[],
|
| 91 |
+
model=model, verified=False, error=str(e))
|
| 92 |
+
finally:
|
| 93 |
+
agent.cleanup()
|
| 94 |
+
|
| 95 |
+
job_tasks = [
|
| 96 |
+
asyncio.create_task(_job(i, t, agents[i]))
|
| 97 |
+
for i, t in enumerate(tasks)
|
| 98 |
+
]
|
| 99 |
+
try:
|
| 100 |
+
while not all(j.done() for j in job_tasks):
|
| 101 |
+
# Mid-run we must NOT call current_steps()/history() on a live agent
|
| 102 |
+
# (the Rust agent isn't reentrant and would deadlock). Read only the
|
| 103 |
+
# trace collectors (plain lists) and workspace files (disk).
|
| 104 |
+
events: list[TraceEvent] = []
|
| 105 |
+
all_files: dict[str, str] = {}
|
| 106 |
+
for i, agent in enumerate(agents):
|
| 107 |
+
events.extend(agent.trace_collector.snapshot())
|
| 108 |
+
for path, content in agent.files().items():
|
| 109 |
+
all_files[f"[{i + 1}] {path}"] = content
|
| 110 |
+
yield LiveFrame(steps=[], events=events, files=all_files)
|
| 111 |
+
await asyncio.sleep(poll_interval)
|
| 112 |
+
results = [await j for j in job_tasks]
|
| 113 |
+
results.sort(key=lambda r: r.index)
|
| 114 |
+
yield LiveFrame(done=True, result=results)
|
| 115 |
+
finally:
|
| 116 |
+
for j in job_tasks:
|
| 117 |
+
if not j.done():
|
| 118 |
+
j.cancel()
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def summarize(results: list[FanoutResult]) -> str:
|
| 122 |
+
"""Aggregate fan-out results into one labeled summary (mirrors the Rust output)."""
|
| 123 |
+
out = [f"Ran {len(results)} subagents in parallel. Results:\n"]
|
| 124 |
+
for r in results:
|
| 125 |
+
head = f"=== [{r.index + 1}] {r.model} {'OK' if r.verified else 'unverified'} ==="
|
| 126 |
+
body = r.error and f"error: {r.error}" or r.final.strip()
|
| 127 |
+
out.append(f"{head}\n{body}\n")
|
| 128 |
+
return "\n".join(out).rstrip()
|
engine/file_tree.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Workspace file tree with git status (Rust-backed)."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import re
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
|
| 7 |
+
from .rust_session import (
|
| 8 |
+
git_status,
|
| 9 |
+
rust_available,
|
| 10 |
+
workspace_files,
|
| 11 |
+
workspace_tree,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
_GIT_LINE = re.compile(r"^([ MADRCU?!]{1,2})\s+(.+)$")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@dataclass
|
| 18 |
+
class WorkspacePanel:
|
| 19 |
+
tree_md: str
|
| 20 |
+
git_md: str
|
| 21 |
+
file_choices: list[str]
|
| 22 |
+
preview_md: str
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def parse_git_dirty(git_status_text: str) -> dict[str, str]:
|
| 26 |
+
"""Map repo-relative path to a one-character git status marker."""
|
| 27 |
+
markers: dict[str, str] = {}
|
| 28 |
+
for line in git_status_text.splitlines():
|
| 29 |
+
m = _GIT_LINE.match(line.strip())
|
| 30 |
+
if not m:
|
| 31 |
+
continue
|
| 32 |
+
status, path = m.group(1).strip(), m.group(2).strip()
|
| 33 |
+
if " -> " in path:
|
| 34 |
+
path = path.split(" -> ")[-1].strip()
|
| 35 |
+
mark = status.replace(" ", "")
|
| 36 |
+
markers[path] = mark[-1] if mark else "?"
|
| 37 |
+
return markers
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _preview_md(path: str, content: str) -> str:
|
| 41 |
+
lang = "python" if path.endswith(".py") else ""
|
| 42 |
+
return f"**`{path}`**\n```{lang}\n{content}\n```"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def build_workspace_panel(
|
| 46 |
+
workspace: str,
|
| 47 |
+
selected: str | None = None,
|
| 48 |
+
*,
|
| 49 |
+
depth: int = 3,
|
| 50 |
+
files: dict[str, str] | None = None,
|
| 51 |
+
) -> WorkspacePanel:
|
| 52 |
+
"""Build git header, ASCII tree, file picker choices, and file preview."""
|
| 53 |
+
if not rust_available():
|
| 54 |
+
return WorkspacePanel(
|
| 55 |
+
tree_md="_smolcode_core not installed_",
|
| 56 |
+
git_md="",
|
| 57 |
+
file_choices=[],
|
| 58 |
+
preview_md="",
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
git_text = git_status(workspace)
|
| 62 |
+
git_lines = git_text.splitlines()
|
| 63 |
+
git_md = "\n".join(git_lines[:6]) if git_lines else "_not a git repository_"
|
| 64 |
+
|
| 65 |
+
tree_body = workspace_tree(workspace, depth=depth)
|
| 66 |
+
tree_md = f"```\n{tree_body}\n```"
|
| 67 |
+
|
| 68 |
+
if files is None:
|
| 69 |
+
files = workspace_files(workspace)
|
| 70 |
+
|
| 71 |
+
dirty = parse_git_dirty(git_text)
|
| 72 |
+
file_choices: list[str] = []
|
| 73 |
+
for path in sorted(files):
|
| 74 |
+
mark = dirty.get(path, "")
|
| 75 |
+
label = f"{mark} {path}" if mark else path
|
| 76 |
+
file_choices.append(label)
|
| 77 |
+
|
| 78 |
+
preview_md = ""
|
| 79 |
+
if selected:
|
| 80 |
+
clean = selected
|
| 81 |
+
if len(selected) > 2 and selected[1] == " " and selected[0] in "MADRCU?!":
|
| 82 |
+
clean = selected[2:]
|
| 83 |
+
content = files.get(clean, "")
|
| 84 |
+
if content:
|
| 85 |
+
preview_md = _preview_md(clean, content)
|
| 86 |
+
|
| 87 |
+
return WorkspacePanel(
|
| 88 |
+
tree_md=tree_md,
|
| 89 |
+
git_md=git_md,
|
| 90 |
+
file_choices=file_choices,
|
| 91 |
+
preview_md=preview_md,
|
| 92 |
+
)
|
engine/gradio_shell.py
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared Gradio UI helpers for app.py and smolbuilder.py (CLI parity)."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import asyncio
|
| 5 |
+
import os
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
|
| 10 |
+
from .rust_session import (
|
| 11 |
+
RustSession,
|
| 12 |
+
expand_command,
|
| 13 |
+
expand_skill,
|
| 14 |
+
export_transcript,
|
| 15 |
+
list_background_jobs,
|
| 16 |
+
list_commands,
|
| 17 |
+
list_mcp,
|
| 18 |
+
list_rules,
|
| 19 |
+
list_skills,
|
| 20 |
+
render_config,
|
| 21 |
+
session_timeline,
|
| 22 |
+
write_agents_md,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class UiSettings:
|
| 28 |
+
workspace: str = "."
|
| 29 |
+
model: str = ""
|
| 30 |
+
agent: str = "build"
|
| 31 |
+
mode: str = "normal" # normal | auto | plan
|
| 32 |
+
think: str = "off"
|
| 33 |
+
yolo: bool = False
|
| 34 |
+
fan_out: bool = False
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@dataclass
|
| 38 |
+
class ApprovalState:
|
| 39 |
+
pending_desc: str | None = None
|
| 40 |
+
result: bool | None = None
|
| 41 |
+
|
| 42 |
+
async def ask(self, desc: str) -> bool:
|
| 43 |
+
self.pending_desc = desc
|
| 44 |
+
self.result = None
|
| 45 |
+
while self.result is None:
|
| 46 |
+
await asyncio.sleep(0.15)
|
| 47 |
+
approved = bool(self.result)
|
| 48 |
+
self.pending_desc = None
|
| 49 |
+
self.result = None
|
| 50 |
+
return approved
|
| 51 |
+
|
| 52 |
+
def approve(self, yes: bool = True) -> None:
|
| 53 |
+
self.result = yes
|
| 54 |
+
|
| 55 |
+
def deny(self) -> None:
|
| 56 |
+
self.approve(False)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@dataclass
|
| 60 |
+
class AppSessionState:
|
| 61 |
+
"""Gradio gr.State payload for session + settings."""
|
| 62 |
+
rust: RustSession | None = None
|
| 63 |
+
settings: UiSettings = field(default_factory=UiSettings)
|
| 64 |
+
approval: ApprovalState = field(default_factory=ApprovalState)
|
| 65 |
+
status_msg: str = ""
|
| 66 |
+
bg_jobs: str = ""
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@dataclass
|
| 70 |
+
class SlashResult:
|
| 71 |
+
reply: str = ""
|
| 72 |
+
queued_task: str | None = None
|
| 73 |
+
clear_chat: bool = False
|
| 74 |
+
download_path: str | None = None
|
| 75 |
+
toggle_sidebar: bool = False
|
| 76 |
+
toggle_sidebar_view: bool = False
|
| 77 |
+
open_picker: str | None = None
|
| 78 |
+
cycle_mode: bool = False
|
| 79 |
+
cycle_think: bool = False
|
| 80 |
+
set_think: str | None = None
|
| 81 |
+
show_help: bool = False
|
| 82 |
+
show_whichkey: bool = False
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
_BUILTIN_SLASH = {
|
| 86 |
+
"/help", "/new", "/sessions", "/fork", "/rename", "/export", "/stats",
|
| 87 |
+
"/mcp", "/rules", "/skills", "/skill", "/commit", "/init", "/bg", "/clear",
|
| 88 |
+
"/delete", "/timeline", "/mode", "/think", "/config", "/search",
|
| 89 |
+
"/agents", "/models", "/themes", "/files", "/quit",
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
_ATTACH_MAX = 8192
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def parse_input(
|
| 97 |
+
text: str,
|
| 98 |
+
*,
|
| 99 |
+
workspace_files: list[str] | None = None,
|
| 100 |
+
workspace: str | None = None,
|
| 101 |
+
rust: RustSession | None = None,
|
| 102 |
+
) -> tuple[str, str | None, str | None]:
|
| 103 |
+
"""Parse user input. Returns (task, slash_command_result, shell_output).
|
| 104 |
+
|
| 105 |
+
- `!cmd` runs shell directly
|
| 106 |
+
- `/cmd args` returns command to dispatch
|
| 107 |
+
- `@file` inlines file content into task
|
| 108 |
+
"""
|
| 109 |
+
stripped = (text or "").strip()
|
| 110 |
+
if not stripped:
|
| 111 |
+
return "", None, None
|
| 112 |
+
|
| 113 |
+
if stripped.startswith("!"):
|
| 114 |
+
return "", None, stripped[1:].strip()
|
| 115 |
+
|
| 116 |
+
if stripped.startswith("/"):
|
| 117 |
+
return "", stripped, None
|
| 118 |
+
|
| 119 |
+
task = stripped
|
| 120 |
+
if "@" in task and (workspace_files or workspace):
|
| 121 |
+
from .rust_session import read_workspace_file
|
| 122 |
+
|
| 123 |
+
paths = list(workspace_files or [])
|
| 124 |
+
import re
|
| 125 |
+
|
| 126 |
+
for match in re.finditer(r"@(\S+)", task):
|
| 127 |
+
path = match.group(1)
|
| 128 |
+
if paths and path not in paths:
|
| 129 |
+
candidates = [p for p in paths if p.endswith(path) or p == path]
|
| 130 |
+
if len(candidates) == 1:
|
| 131 |
+
path = candidates[0]
|
| 132 |
+
elif path not in paths:
|
| 133 |
+
continue
|
| 134 |
+
ws = workspace or (rust.workspace_path if rust else ".")
|
| 135 |
+
content = read_workspace_file(ws, path, max_bytes=_ATTACH_MAX, rust=rust)
|
| 136 |
+
if content is not None:
|
| 137 |
+
block = f"[attached: {path}]\n```\n{content}\n```"
|
| 138 |
+
task = task.replace(f"@{match.group(1)}", block, 1)
|
| 139 |
+
return task, None, None
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def _workspace(session: AppSessionState) -> str:
|
| 143 |
+
return session.settings.workspace or "."
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def dispatch_slash(cmd_line: str, session: AppSessionState) -> SlashResult:
|
| 147 |
+
"""Handle a slash command; mirrors CLI TUI handle_slash."""
|
| 148 |
+
parts = cmd_line.strip().split(maxsplit=1)
|
| 149 |
+
cmd = parts[0].lower()
|
| 150 |
+
args = parts[1] if len(parts) > 1 else ""
|
| 151 |
+
ws = _workspace(session)
|
| 152 |
+
|
| 153 |
+
if cmd == "/help":
|
| 154 |
+
custom = list_commands(ws)
|
| 155 |
+
extra = ""
|
| 156 |
+
if custom:
|
| 157 |
+
extra = "\n\n**Custom commands:** " + ", ".join(f"`/{n}`" for n in custom)
|
| 158 |
+
return SlashResult(
|
| 159 |
+
reply=(
|
| 160 |
+
"**Slash commands:** `/new`, `/sessions`, `/fork`, `/rename <title>`, "
|
| 161 |
+
"`/stats`, `/export [file]`, `/timeline`, `/delete`, `/mcp`, `/rules`, "
|
| 162 |
+
"`/skills`, `/skill <name>`, `/commit [msg]`, `/init`, `/bg`, `/clear`, "
|
| 163 |
+
"`/mode`, `/think`, `/config`, `/search`, `/files`"
|
| 164 |
+
f"{extra}\n\n"
|
| 165 |
+
"**Input:** `!cmd` runs shell without LLM; `@file` attaches workspace files."
|
| 166 |
+
)
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
if cmd == "/new":
|
| 170 |
+
session.rust = None
|
| 171 |
+
return SlashResult(reply="Started a new session.", clear_chat=True)
|
| 172 |
+
|
| 173 |
+
if cmd == "/sessions":
|
| 174 |
+
rows = RustSession.list_sessions()
|
| 175 |
+
if not rows:
|
| 176 |
+
return SlashResult(reply="_No saved sessions._")
|
| 177 |
+
lines = [f"- **{r['title']}** (`{r['id']}`)" for r in rows[:20]]
|
| 178 |
+
return SlashResult(reply="**Sessions:**\n" + "\n".join(lines))
|
| 179 |
+
|
| 180 |
+
if cmd == "/fork":
|
| 181 |
+
if session.rust and (nid := session.rust.fork()):
|
| 182 |
+
return SlashResult(reply=f"Forked session → `{nid}`")
|
| 183 |
+
return SlashResult(reply="Nothing to fork yet.")
|
| 184 |
+
|
| 185 |
+
if cmd == "/rename":
|
| 186 |
+
if session.rust and args and session.rust.rename(args):
|
| 187 |
+
return SlashResult(reply=f"Renamed session to **{args}**")
|
| 188 |
+
return SlashResult(reply="Usage: `/rename <title>`")
|
| 189 |
+
|
| 190 |
+
if cmd == "/stats":
|
| 191 |
+
nfiles = len(session.rust.files()) if session.rust else 0
|
| 192 |
+
sid = session.rust.session_id if session.rust else "(none)"
|
| 193 |
+
return SlashResult(
|
| 194 |
+
reply=(
|
| 195 |
+
f"session `{sid}` · workspace: `{ws}` · files: {nfiles} · "
|
| 196 |
+
f"agent: {session.settings.agent}"
|
| 197 |
+
)
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
if cmd == "/export":
|
| 201 |
+
sid = session.rust.session_id if session.rust else ""
|
| 202 |
+
if not sid:
|
| 203 |
+
return SlashResult(reply="No session to export yet.")
|
| 204 |
+
try:
|
| 205 |
+
path = export_transcript(sid, args or None)
|
| 206 |
+
return SlashResult(
|
| 207 |
+
reply=f"Exported transcript to `{path}`",
|
| 208 |
+
download_path=path,
|
| 209 |
+
)
|
| 210 |
+
except Exception as e:
|
| 211 |
+
return SlashResult(reply=f"/export failed: {e}")
|
| 212 |
+
|
| 213 |
+
if cmd == "/mcp":
|
| 214 |
+
if session.rust is None:
|
| 215 |
+
return SlashResult(
|
| 216 |
+
reply="_Start a task first so MCP servers are connected._"
|
| 217 |
+
)
|
| 218 |
+
servers = list_mcp(session.rust)
|
| 219 |
+
if not servers:
|
| 220 |
+
return SlashResult(
|
| 221 |
+
reply=(
|
| 222 |
+
"no MCP servers connected — add `[[mcp]]` entries to "
|
| 223 |
+
"`~/.config/smolcode/config.toml` or `.smolcode/config.toml`"
|
| 224 |
+
)
|
| 225 |
+
)
|
| 226 |
+
lines = [f"**MCP servers ({len(servers)}):**"]
|
| 227 |
+
for row in servers:
|
| 228 |
+
tools = row.get("tools", [])
|
| 229 |
+
tlist = ", ".join(tools[:8]) if tools else "(no tools)"
|
| 230 |
+
if len(tools) > 8:
|
| 231 |
+
tlist += "…"
|
| 232 |
+
lines.append(f"- **{row.get('server', '?')}** ({len(tools)}): {tlist}")
|
| 233 |
+
return SlashResult(reply="\n".join(lines))
|
| 234 |
+
|
| 235 |
+
if cmd == "/rules":
|
| 236 |
+
rules = list_rules(ws)
|
| 237 |
+
if not rules:
|
| 238 |
+
return SlashResult(
|
| 239 |
+
reply="no rules — add `*.md` to `.smolcode/rules/` or `~/.config/smolcode/rules/`"
|
| 240 |
+
)
|
| 241 |
+
lines = [f"**active rules ({len(rules)}):**"]
|
| 242 |
+
for r in rules:
|
| 243 |
+
desc = r.get("description", "")
|
| 244 |
+
tail = f" — {desc}" if desc else ""
|
| 245 |
+
lines.append(f"- `{r.get('name', '?')}` [{r.get('scope', '?')}]{tail}")
|
| 246 |
+
return SlashResult(reply="\n".join(lines))
|
| 247 |
+
|
| 248 |
+
if cmd == "/skills":
|
| 249 |
+
skills = list_skills(ws)
|
| 250 |
+
if not skills:
|
| 251 |
+
return SlashResult(
|
| 252 |
+
reply="no skills — add `<name>/SKILL.md` to `.smolcode/skills/`"
|
| 253 |
+
)
|
| 254 |
+
lines = [f"**skills ({len(skills)})** — run with `/skill <name>`:"]
|
| 255 |
+
for s in skills:
|
| 256 |
+
desc = s.get("description", "")
|
| 257 |
+
tail = f" — {desc}" if desc else ""
|
| 258 |
+
lines.append(f"- `{s.get('name', '?')}`{tail}")
|
| 259 |
+
return SlashResult(reply="\n".join(lines))
|
| 260 |
+
|
| 261 |
+
if cmd == "/skill":
|
| 262 |
+
if not args:
|
| 263 |
+
return SlashResult(reply="Usage: `/skill <name> [args]` (see `/skills`)")
|
| 264 |
+
sname, _, sargs = args.partition(" ")
|
| 265 |
+
sname = sname.strip()
|
| 266 |
+
sargs = sargs.strip()
|
| 267 |
+
expanded = expand_skill(ws, sname, sargs)
|
| 268 |
+
if expanded is None:
|
| 269 |
+
return SlashResult(reply=f"no skill named `{sname}` (see `/skills`)")
|
| 270 |
+
return SlashResult(reply=f"Running skill **{sname}**…", queued_task=expanded)
|
| 271 |
+
|
| 272 |
+
if cmd == "/commit":
|
| 273 |
+
if args:
|
| 274 |
+
task = f"Commit all current changes with git_commit using this message: {args}"
|
| 275 |
+
else:
|
| 276 |
+
task = (
|
| 277 |
+
"Review the staged/unstaged changes with git_diff, then commit them "
|
| 278 |
+
"with git_commit using a concise, descriptive message."
|
| 279 |
+
)
|
| 280 |
+
return SlashResult(reply="Queued git commit task…", queued_task=task)
|
| 281 |
+
|
| 282 |
+
if cmd == "/init":
|
| 283 |
+
try:
|
| 284 |
+
path = write_agents_md(ws)
|
| 285 |
+
return SlashResult(reply=f"wrote `{path}` (project guide for agents)")
|
| 286 |
+
except Exception as e:
|
| 287 |
+
return SlashResult(reply=f"/init: {e}")
|
| 288 |
+
|
| 289 |
+
if cmd == "/bg":
|
| 290 |
+
session.bg_jobs = list_background_jobs()
|
| 291 |
+
return SlashResult(reply=session.bg_jobs or "_No background jobs._")
|
| 292 |
+
|
| 293 |
+
if cmd == "/timeline":
|
| 294 |
+
sid = session.rust.session_id if session.rust else ""
|
| 295 |
+
if not sid:
|
| 296 |
+
return SlashResult(reply="no saved session yet")
|
| 297 |
+
lines = session_timeline(sid)
|
| 298 |
+
return SlashResult(reply="**Timeline:**\n" + "\n".join(f"- {ln}" for ln in lines))
|
| 299 |
+
|
| 300 |
+
if cmd == "/delete":
|
| 301 |
+
removed = session.rust.delete() if session.rust else False
|
| 302 |
+
session.rust = None
|
| 303 |
+
msg = "deleted session; started a new one" if removed else "started a new session"
|
| 304 |
+
return SlashResult(reply=msg, clear_chat=True)
|
| 305 |
+
|
| 306 |
+
if cmd == "/clear":
|
| 307 |
+
return SlashResult(reply="_Transcript cleared._", clear_chat=True)
|
| 308 |
+
|
| 309 |
+
if cmd == "/mode":
|
| 310 |
+
return SlashResult(reply="Cycling mode…", cycle_mode=True)
|
| 311 |
+
|
| 312 |
+
if cmd == "/think":
|
| 313 |
+
if args:
|
| 314 |
+
return SlashResult(reply=f"think → {args}", set_think=args.split()[0].lower())
|
| 315 |
+
return SlashResult(reply="Cycling think level…", cycle_think=True)
|
| 316 |
+
|
| 317 |
+
if cmd == "/config":
|
| 318 |
+
if session.rust is None:
|
| 319 |
+
return SlashResult(reply="_Start a task first to view config._")
|
| 320 |
+
return SlashResult(reply=f"```\n{render_config(session.rust)}\n```")
|
| 321 |
+
|
| 322 |
+
if cmd == "/search":
|
| 323 |
+
if not args:
|
| 324 |
+
return SlashResult(reply="Usage: `/search <text>`")
|
| 325 |
+
return SlashResult(reply=f"_Search for `{args}` runs in transcript handler._")
|
| 326 |
+
|
| 327 |
+
if cmd == "/agents":
|
| 328 |
+
return SlashResult(reply="Opening agent picker…", open_picker="agents")
|
| 329 |
+
|
| 330 |
+
if cmd == "/models":
|
| 331 |
+
return SlashResult(reply="Opening model picker…", open_picker="models")
|
| 332 |
+
|
| 333 |
+
if cmd == "/themes":
|
| 334 |
+
return SlashResult(reply="Opening theme picker…", open_picker="themes")
|
| 335 |
+
|
| 336 |
+
if cmd == "/files":
|
| 337 |
+
return SlashResult(reply="Toggling sidebar…", toggle_sidebar=True)
|
| 338 |
+
|
| 339 |
+
if cmd == "/quit":
|
| 340 |
+
return SlashResult(reply="_Use browser close to exit the web UI._")
|
| 341 |
+
|
| 342 |
+
if cmd not in _BUILTIN_SLASH:
|
| 343 |
+
name = cmd.lstrip("/")
|
| 344 |
+
expanded = expand_command(ws, name, args)
|
| 345 |
+
if expanded is not None:
|
| 346 |
+
return SlashResult(
|
| 347 |
+
reply=f"Running custom command `/{name}`…",
|
| 348 |
+
queued_task=expanded,
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
return SlashResult(reply=f"Unknown command `{cmd}`. Try `/help`.")
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def settings_from_ui(
|
| 355 |
+
workspace: str,
|
| 356 |
+
model: str,
|
| 357 |
+
agent: str,
|
| 358 |
+
mode: str,
|
| 359 |
+
think: str,
|
| 360 |
+
yolo: bool,
|
| 361 |
+
) -> UiSettings:
|
| 362 |
+
y = yolo or mode == "auto"
|
| 363 |
+
ag = "plan" if mode == "plan" else agent
|
| 364 |
+
return UiSettings(
|
| 365 |
+
workspace=workspace or ".",
|
| 366 |
+
model=model or "",
|
| 367 |
+
agent=ag,
|
| 368 |
+
mode=mode,
|
| 369 |
+
think=think,
|
| 370 |
+
yolo=y,
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
def build_settings_panel(preset_models: list[str]) -> dict:
|
| 375 |
+
"""Return Gradio components for the settings sidebar."""
|
| 376 |
+
with gr.Accordion("⚙️ settings", open=False):
|
| 377 |
+
workspace = gr.Textbox(
|
| 378 |
+
value=os.environ.get("SMALLCODE_WORKSPACE", "."),
|
| 379 |
+
label="workspace directory",
|
| 380 |
+
)
|
| 381 |
+
model = gr.Dropdown(
|
| 382 |
+
choices=preset_models,
|
| 383 |
+
value=preset_models[0] if preset_models else "",
|
| 384 |
+
label="model",
|
| 385 |
+
allow_custom_value=True,
|
| 386 |
+
)
|
| 387 |
+
agent = gr.Dropdown(
|
| 388 |
+
choices=["build", "plan"],
|
| 389 |
+
value="build",
|
| 390 |
+
label="agent",
|
| 391 |
+
)
|
| 392 |
+
mode = gr.Radio(
|
| 393 |
+
choices=["normal", "auto", "plan"],
|
| 394 |
+
value="normal",
|
| 395 |
+
label="mode",
|
| 396 |
+
)
|
| 397 |
+
think = gr.Dropdown(
|
| 398 |
+
choices=["off", "low", "high", "xtra"],
|
| 399 |
+
value="off",
|
| 400 |
+
label="think level",
|
| 401 |
+
)
|
| 402 |
+
yolo = gr.Checkbox(value=False, label="yolo (auto-approve tools)")
|
| 403 |
+
return {
|
| 404 |
+
"workspace": workspace,
|
| 405 |
+
"model": model,
|
| 406 |
+
"agent": agent,
|
| 407 |
+
"mode": mode,
|
| 408 |
+
"think": think,
|
| 409 |
+
"yolo": yolo,
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
def file_tree_md(files: dict[str, str], selected: str | None = None) -> str:
|
| 414 |
+
"""Legacy flat file list (prefer engine.file_tree.build_workspace_panel)."""
|
| 415 |
+
if not files:
|
| 416 |
+
return "_workspace is empty_"
|
| 417 |
+
lines = []
|
| 418 |
+
for path in sorted(files):
|
| 419 |
+
mark = " →" if path == selected else ""
|
| 420 |
+
lines.append(f"- `{path}`{mark}")
|
| 421 |
+
body = files.get(selected or "", "") if selected and selected in files else ""
|
| 422 |
+
if body:
|
| 423 |
+
lang = "python" if selected.endswith(".py") else ""
|
| 424 |
+
return "\n".join(lines) + f"\n\n**`{selected}`**\n```{lang}\n{body}\n```"
|
| 425 |
+
return "\n".join(lines)
|
engine/judge.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLM-judge correctness gate for the router.
|
| 2 |
+
|
| 3 |
+
`router._verify()` only proves the produced code RUNS (clean exit / tests it wrote
|
| 4 |
+
itself), not that it's actually CORRECT — so a small model can ship a clean-but-wrong
|
| 5 |
+
solution and the router accepts it instead of escalating (exactly how the bench's
|
| 6 |
+
roman_to_int slipped through: ran fine, wrong output).
|
| 7 |
+
|
| 8 |
+
This judge asks a more capable model whether the solution truly satisfies the task; a
|
| 9 |
+
concrete "no" is turned into an escalation by the router. Mirrors
|
| 10 |
+
smolcode-cli/src/judge.rs (JSON-only reply, temperature 0, lenient parse), but the
|
| 11 |
+
verdict drives ESCALATION rather than stop/continue.
|
| 12 |
+
|
| 13 |
+
Conservative by design: only a clear defect escalates. On judge error / timeout /
|
| 14 |
+
unparseable reply we ACCEPT — the judge is a net to catch obvious wrongness, not a
|
| 15 |
+
hard gate, and we don't want to over-escalate (and lose the small-model win).
|
| 16 |
+
"""
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import json
|
| 20 |
+
import os
|
| 21 |
+
import re
|
| 22 |
+
|
| 23 |
+
import liteforge as lf
|
| 24 |
+
|
| 25 |
+
_SYSTEM = (
|
| 26 |
+
"You are a strict senior code reviewer. You are given a coding TASK and the FILES "
|
| 27 |
+
"an agent produced. The code already runs without crashing — your job is to judge "
|
| 28 |
+
"whether it is actually CORRECT and COMPLETE for the task: check the exact "
|
| 29 |
+
"requirements, edge cases, and obvious logic bugs.\n"
|
| 30 |
+
"Reply with ONLY a JSON object: {\"correct\": true|false, \"reason\": \"<one short sentence>\"}.\n"
|
| 31 |
+
"Set \"correct\": false if you find ANY bug, wrong/missing edge case, or unmet "
|
| 32 |
+
"requirement. Ignore style. Do not write code."
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def judge_enabled() -> bool:
|
| 37 |
+
"""Judge is on by default; SMALLCODE_JUDGE=0 disables it."""
|
| 38 |
+
return os.environ.get("SMALLCODE_JUDGE", "1").lower() not in ("0", "false", "no", "")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _files_block(files: dict[str, str], cap: int = 6000) -> str:
|
| 42 |
+
blob = "\n\n".join(f"### {path}\n{content}" for path, content in files.items())
|
| 43 |
+
return blob[:cap]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _parse(text: str) -> bool | None:
|
| 47 |
+
"""True (correct), False (defect found), or None (couldn't tell)."""
|
| 48 |
+
m = re.search(r"\{.*\}", text, re.DOTALL)
|
| 49 |
+
if m:
|
| 50 |
+
try:
|
| 51 |
+
obj = json.loads(m.group(0))
|
| 52 |
+
if isinstance(obj.get("correct"), bool):
|
| 53 |
+
return obj["correct"]
|
| 54 |
+
except Exception:
|
| 55 |
+
pass
|
| 56 |
+
low = text.lower()
|
| 57 |
+
if "correct\": false" in low or "correct: false" in low or "incorrect" in low:
|
| 58 |
+
return False
|
| 59 |
+
if "correct\": true" in low or "correct: true" in low:
|
| 60 |
+
return True
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
async def judge_correct(preset, judge_model: str, task: str,
|
| 65 |
+
files: dict[str, str], final: str) -> bool:
|
| 66 |
+
"""Return True if the solution likely satisfies the task, False on a clear defect.
|
| 67 |
+
|
| 68 |
+
Accepts (True) on empty files, judge error, or unparseable reply.
|
| 69 |
+
"""
|
| 70 |
+
if not files:
|
| 71 |
+
return True
|
| 72 |
+
user = (
|
| 73 |
+
f"TASK:\n{task}\n\nFILES:\n{_files_block(files)}\n\n"
|
| 74 |
+
f"AGENT'S FINAL CLAIM:\n{(final or '')[:500]}\n\n"
|
| 75 |
+
"Is the solution correct and complete for the task? Reply with JSON only."
|
| 76 |
+
)
|
| 77 |
+
try:
|
| 78 |
+
client = lf.AsyncForgeClient(
|
| 79 |
+
base_url=preset.base_url, api_key=preset.api_key, default_model=judge_model,
|
| 80 |
+
)
|
| 81 |
+
resp = await client.complete(
|
| 82 |
+
messages=[{"role": "system", "content": _SYSTEM},
|
| 83 |
+
{"role": "user", "content": user}],
|
| 84 |
+
model=judge_model, temperature=0.0,
|
| 85 |
+
)
|
| 86 |
+
content = resp["choices"][0]["message"].get("content", "") or ""
|
| 87 |
+
except Exception:
|
| 88 |
+
return True # judge unavailable -> don't block the accept
|
| 89 |
+
verdict = _parse(content)
|
| 90 |
+
return True if verdict is None else verdict
|
engine/live_run.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Live polling helper for Gradio streaming updates."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import asyncio
|
| 5 |
+
from collections.abc import AsyncIterator, Awaitable, Callable
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from typing import Any, TypeVar
|
| 8 |
+
|
| 9 |
+
from .agent import SmallCodeAgent, Step
|
| 10 |
+
from .trace_collector import TraceEvent
|
| 11 |
+
|
| 12 |
+
T = TypeVar("T")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class LiveFrame:
|
| 17 |
+
steps: list[Step] = field(default_factory=list)
|
| 18 |
+
events: list[TraceEvent] = field(default_factory=list)
|
| 19 |
+
files: dict[str, str] = field(default_factory=dict)
|
| 20 |
+
done: bool = False
|
| 21 |
+
result: Any = None
|
| 22 |
+
raw_event: dict | None = None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
async def run_with_live_updates(
|
| 26 |
+
coro: Awaitable[T],
|
| 27 |
+
agent: SmallCodeAgent,
|
| 28 |
+
*,
|
| 29 |
+
poll_interval: float = 0.35,
|
| 30 |
+
) -> AsyncIterator[LiveFrame]:
|
| 31 |
+
"""Yield snapshots while `coro` runs, then a final frame with the result."""
|
| 32 |
+
task = asyncio.create_task(coro)
|
| 33 |
+
try:
|
| 34 |
+
while not task.done():
|
| 35 |
+
yield _live_snapshot(agent)
|
| 36 |
+
await asyncio.sleep(poll_interval)
|
| 37 |
+
result = await task
|
| 38 |
+
yield _final_snapshot(agent, result=result)
|
| 39 |
+
except asyncio.CancelledError:
|
| 40 |
+
task.cancel()
|
| 41 |
+
raise
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
async def stream_live(
|
| 45 |
+
make_coro: Callable[[], Awaitable[T]],
|
| 46 |
+
get_agent: Callable[[], SmallCodeAgent | None],
|
| 47 |
+
*,
|
| 48 |
+
poll_interval: float = 0.35,
|
| 49 |
+
) -> AsyncIterator[LiveFrame]:
|
| 50 |
+
"""Like run_with_live_updates but agent may appear only after coro starts."""
|
| 51 |
+
task = asyncio.create_task(make_coro())
|
| 52 |
+
try:
|
| 53 |
+
while not task.done():
|
| 54 |
+
agent = get_agent()
|
| 55 |
+
yield _live_snapshot(agent) if agent is not None else LiveFrame()
|
| 56 |
+
await asyncio.sleep(poll_interval)
|
| 57 |
+
result = await task
|
| 58 |
+
agent = get_agent()
|
| 59 |
+
if agent is not None:
|
| 60 |
+
yield _final_snapshot(agent, result=result)
|
| 61 |
+
else:
|
| 62 |
+
yield LiveFrame(done=True, result=result)
|
| 63 |
+
except asyncio.CancelledError:
|
| 64 |
+
task.cancel()
|
| 65 |
+
raise
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _live_snapshot(agent: SmallCodeAgent) -> LiveFrame:
|
| 69 |
+
"""A mid-run snapshot.
|
| 70 |
+
|
| 71 |
+
IMPORTANT: never touch the LiteForge agent object (history/state) while a run
|
| 72 |
+
is in flight — the Rust ToolCallingAgent is not reentrant and `run()` holds an
|
| 73 |
+
internal lock for its whole duration, so `current_steps()` would deadlock. We
|
| 74 |
+
read only the trace collector (a plain Python list the wrapped tools append to)
|
| 75 |
+
and the workspace files (plain disk reads).
|
| 76 |
+
"""
|
| 77 |
+
return LiveFrame(
|
| 78 |
+
steps=[],
|
| 79 |
+
events=agent.trace_collector.snapshot(),
|
| 80 |
+
files=agent.files(),
|
| 81 |
+
done=False,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _final_snapshot(agent: SmallCodeAgent, *, result: Any = None) -> LiveFrame:
|
| 86 |
+
"""A post-run snapshot — safe to read the agent now that `run()` has returned."""
|
| 87 |
+
return LiveFrame(
|
| 88 |
+
steps=agent.current_steps(),
|
| 89 |
+
events=agent.trace_collector.snapshot(),
|
| 90 |
+
files=agent.files(),
|
| 91 |
+
done=True,
|
| 92 |
+
result=result,
|
| 93 |
+
)
|
engine/playwright_runner.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Subprocess runner: check a model-built web app in headless Chromium.
|
| 2 |
+
|
| 3 |
+
A Playwright/Chromium sibling of engine/browser_runner.py (Firefox/Selenium),
|
| 4 |
+
with the IDENTICAL JSON contract so engine/browsercheck.py can try whichever
|
| 5 |
+
real browser is installed. Invoked as `python engine/playwright_runner.py
|
| 6 |
+
<app.html>` — never imported (keeps Playwright out of the Gradio process and
|
| 7 |
+
isolates a browser crash).
|
| 8 |
+
|
| 9 |
+
It loads the app in the EXACT same `srcdoc` + `sandbox` wrapper as the live
|
| 10 |
+
preview (engine/preview.py), injects an error collector before the app's own
|
| 11 |
+
scripts, clicks every button, exercises the keyboard, and reports uncaught JS
|
| 12 |
+
errors — the hard failure signal that lets the router escalate a broken build.
|
| 13 |
+
|
| 14 |
+
Output: one JSON line {ok, errors, buttons, clicked}. Exit 3 only when Chromium
|
| 15 |
+
itself can't run (Playwright missing or the browser binary not downloaded), so
|
| 16 |
+
the caller falls back to Firefox, then jsdom.
|
| 17 |
+
"""
|
| 18 |
+
import json
|
| 19 |
+
import os
|
| 20 |
+
import re
|
| 21 |
+
import sys
|
| 22 |
+
import tempfile
|
| 23 |
+
|
| 24 |
+
PREVIEW_SANDBOX = "allow-scripts allow-same-origin allow-modals allow-popups allow-forms"
|
| 25 |
+
|
| 26 |
+
# Same collector browser_runner.py injects: catches errors thrown during load
|
| 27 |
+
# (the "script ran before its element / undefined function" class).
|
| 28 |
+
_CAPTURE = ("<script>(function(){window.__errs=[];"
|
| 29 |
+
"window.addEventListener('error',function(e){try{__errs.push('uncaught: '+"
|
| 30 |
+
"((e.error&&e.error.message)||e.message||String(e)))}catch(_){}} ,true);"
|
| 31 |
+
"window.addEventListener('unhandledrejection',function(e){try{__errs.push("
|
| 32 |
+
"'rejection: '+((e.reason&&e.reason.message)||e.reason))}catch(_){}});})();</script>")
|
| 33 |
+
|
| 34 |
+
_CLICK_SELECTOR = "button, [onclick], input[type=button], input[type=submit]"
|
| 35 |
+
_KEYBOARD_JS = (
|
| 36 |
+
"['ArrowUp','ArrowDown','ArrowLeft','ArrowRight',' '].forEach(function(k){"
|
| 37 |
+
"var c={key:k,keyCode:k===' '?32:({ArrowUp:38,ArrowDown:40,ArrowLeft:37,ArrowRight:39}[k]),bubbles:true};"
|
| 38 |
+
"document.dispatchEvent(new KeyboardEvent('keydown',c));"
|
| 39 |
+
"window.dispatchEvent(new KeyboardEvent('keydown',c));});")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _escape_srcdoc(doc: str) -> str:
|
| 43 |
+
return doc.replace("&", "&").replace('"', """)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _inject_capture(app_html: str) -> str:
|
| 47 |
+
m = re.search(r"<head[^>]*>", app_html, re.I)
|
| 48 |
+
if m:
|
| 49 |
+
return app_html[:m.end()] + _CAPTURE + app_html[m.end():]
|
| 50 |
+
m = re.search(r"<html[^>]*>", app_html, re.I)
|
| 51 |
+
if m:
|
| 52 |
+
return app_html[:m.end()] + _CAPTURE + app_html[m.end():]
|
| 53 |
+
return _CAPTURE + app_html
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _emit(obj: dict) -> None:
|
| 57 |
+
sys.stdout.write(json.dumps(obj) + "\n")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def main(path: str) -> int:
|
| 61 |
+
try:
|
| 62 |
+
from playwright.sync_api import sync_playwright
|
| 63 |
+
except Exception as e: # noqa: BLE001
|
| 64 |
+
_emit({"ok": None, "infra": f"playwright import failed: {e}"})
|
| 65 |
+
return 3
|
| 66 |
+
|
| 67 |
+
with open(path, encoding="utf-8") as f:
|
| 68 |
+
app_html = f.read()
|
| 69 |
+
|
| 70 |
+
host = ('<!doctype html><meta charset="utf-8"><body style="margin:0">'
|
| 71 |
+
f'<iframe id="app" style="width:100%;height:600px;border:0" '
|
| 72 |
+
f'sandbox="{PREVIEW_SANDBOX}" '
|
| 73 |
+
f'srcdoc="{_escape_srcdoc(_inject_capture(app_html))}"></iframe>')
|
| 74 |
+
host_path = os.path.join(tempfile.mkdtemp(prefix="pwhost-"), "host.html")
|
| 75 |
+
with open(host_path, "w", encoding="utf-8") as f:
|
| 76 |
+
f.write(host)
|
| 77 |
+
|
| 78 |
+
errors: list[str] = []
|
| 79 |
+
buttons = clicked = 0
|
| 80 |
+
try:
|
| 81 |
+
with sync_playwright() as p:
|
| 82 |
+
try:
|
| 83 |
+
browser = p.chromium.launch(
|
| 84 |
+
headless=True,
|
| 85 |
+
args=["--allow-file-access-from-files", "--no-sandbox"])
|
| 86 |
+
except Exception as e: # noqa: BLE001
|
| 87 |
+
_emit({"ok": None, "infra": f"chromium launch failed: {str(e)[:200]}"})
|
| 88 |
+
return 3
|
| 89 |
+
try:
|
| 90 |
+
page = browser.new_page()
|
| 91 |
+
page.set_default_timeout(4000)
|
| 92 |
+
page.goto("file://" + host_path, timeout=20000)
|
| 93 |
+
handle = page.wait_for_selector("#app", timeout=5000)
|
| 94 |
+
frame = handle.content_frame()
|
| 95 |
+
if frame is None:
|
| 96 |
+
_emit({"ok": None, "infra": "could not enter app iframe"})
|
| 97 |
+
return 3
|
| 98 |
+
page.wait_for_timeout(300) # let scripts settle
|
| 99 |
+
els = frame.query_selector_all(_CLICK_SELECTOR)
|
| 100 |
+
buttons = len(els)
|
| 101 |
+
for el in els[:25]:
|
| 102 |
+
try:
|
| 103 |
+
el.evaluate("e => { e.disabled = false; }")
|
| 104 |
+
el.click(force=True, timeout=1000)
|
| 105 |
+
clicked += 1
|
| 106 |
+
except Exception:
|
| 107 |
+
pass # handler errors land in __errs
|
| 108 |
+
try:
|
| 109 |
+
frame.evaluate(_KEYBOARD_JS)
|
| 110 |
+
except Exception:
|
| 111 |
+
pass
|
| 112 |
+
page.wait_for_timeout(300) # surface late/timer errors
|
| 113 |
+
try:
|
| 114 |
+
errors = frame.evaluate("() => window.__errs || []") or []
|
| 115 |
+
except Exception:
|
| 116 |
+
errors = []
|
| 117 |
+
finally:
|
| 118 |
+
try:
|
| 119 |
+
browser.close()
|
| 120 |
+
except Exception:
|
| 121 |
+
pass
|
| 122 |
+
except Exception as e: # noqa: BLE001
|
| 123 |
+
_emit({"ok": None, "infra": f"playwright run failed: {str(e)[:200]}"})
|
| 124 |
+
return 3
|
| 125 |
+
|
| 126 |
+
errors = [str(e)[:400] for e in errors][:20]
|
| 127 |
+
_emit({"ok": len(errors) == 0, "errors": errors, "buttons": buttons, "clicked": clicked})
|
| 128 |
+
return 0
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
if __name__ == "__main__":
|
| 132 |
+
sys.exit(main(sys.argv[1]))
|
engine/preflight.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Startup reachability check for the active backend.
|
| 2 |
+
|
| 3 |
+
The whole point of smolcode is that one OpenAI-compatible endpoint (chosen by
|
| 4 |
+
the preset) serves the model ladder. If that endpoint is unreachable — hal is
|
| 5 |
+
off the VPN, the laptop Ollama isn't running — the agent loop will hang or fail
|
| 6 |
+
deep inside a request with no obvious cause. Worse, a silent default to the
|
| 7 |
+
wrong preset (the historical "it's using my laptop, not hal" bug) looks fine
|
| 8 |
+
until you notice the weak single-tier model.
|
| 9 |
+
|
| 10 |
+
`preflight()` makes that visible: it prints which preset/endpoint is active and
|
| 11 |
+
probes `{base_url}/models` once at startup. On success it prints a one-line
|
| 12 |
+
banner with the model count; on failure it prints a loud warning naming the dead
|
| 13 |
+
URL and which *other* presets are reachable right now, so the fix is obvious.
|
| 14 |
+
|
| 15 |
+
It never raises and never blocks the app from starting — it only informs.
|
| 16 |
+
"""
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import json
|
| 20 |
+
import sys
|
| 21 |
+
import urllib.error
|
| 22 |
+
import urllib.request
|
| 23 |
+
|
| 24 |
+
from .config import Preset, _PRESETS, load_preset
|
| 25 |
+
|
| 26 |
+
_TIMEOUT = 4.0
|
| 27 |
+
|
| 28 |
+
# ANSI: bold, green ok, red warn — degrade to plain text when not a TTY.
|
| 29 |
+
_BOLD, _GREEN, _RED, _DIM, _RESET = "\033[1m", "\033[32m", "\033[31m", "\033[2m", "\033[0m"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _color(s: str, code: str) -> str:
|
| 33 |
+
return f"{code}{s}{_RESET}" if sys.stderr.isatty() else s
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def list_models(base_url: str, timeout: float = _TIMEOUT) -> list[str]:
|
| 37 |
+
"""Fetch model IDs from {base_url}/models. Returns [] on failure."""
|
| 38 |
+
url = base_url.rstrip("/") + "/models"
|
| 39 |
+
try:
|
| 40 |
+
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
| 41 |
+
if resp.status != 200:
|
| 42 |
+
return []
|
| 43 |
+
data = json.loads(resp.read().decode("utf-8", "replace"))
|
| 44 |
+
models = data.get("data") if isinstance(data, dict) else None
|
| 45 |
+
if not isinstance(models, list):
|
| 46 |
+
return []
|
| 47 |
+
ids: list[str] = []
|
| 48 |
+
for m in models:
|
| 49 |
+
if isinstance(m, dict) and m.get("id"):
|
| 50 |
+
ids.append(str(m["id"]))
|
| 51 |
+
return sorted(ids)
|
| 52 |
+
except (urllib.error.URLError, TimeoutError, OSError, ValueError, json.JSONDecodeError):
|
| 53 |
+
return []
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def probe(base_url: str, timeout: float = _TIMEOUT,
|
| 57 |
+
api_key: str | None = None) -> tuple[bool, int | None, str | None]:
|
| 58 |
+
"""Return (reachable, model_count, error). Never raises.
|
| 59 |
+
|
| 60 |
+
Sends the bearer token so endpoints that require auth (e.g. a vLLM server
|
| 61 |
+
started with --api-key) report reachable instead of a spurious 401."""
|
| 62 |
+
url = base_url.rstrip("/") + "/models"
|
| 63 |
+
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
| 64 |
+
try:
|
| 65 |
+
req = urllib.request.Request(url, headers=headers)
|
| 66 |
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
| 67 |
+
if resp.status != 200:
|
| 68 |
+
return False, None, f"HTTP {resp.status}"
|
| 69 |
+
data = json.loads(resp.read().decode("utf-8", "replace"))
|
| 70 |
+
models = data.get("data") if isinstance(data, dict) else None
|
| 71 |
+
count = len(models) if isinstance(models, list) else None
|
| 72 |
+
return True, count, None
|
| 73 |
+
except urllib.error.URLError as e:
|
| 74 |
+
return False, None, getattr(e, "reason", str(e)).__str__()
|
| 75 |
+
except (TimeoutError, OSError, ValueError, json.JSONDecodeError) as e:
|
| 76 |
+
return False, None, str(e)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _reachable_alternatives(active_key: str) -> list[str]:
|
| 80 |
+
"""Which *other* known presets answer right now — points at the easy fix."""
|
| 81 |
+
out = []
|
| 82 |
+
for key, preset in _PRESETS.items():
|
| 83 |
+
if key == active_key:
|
| 84 |
+
continue
|
| 85 |
+
ok, _count, _err = probe(preset.base_url, timeout=2.0, api_key=preset.api_key)
|
| 86 |
+
if ok:
|
| 87 |
+
out.append(f"{key} ({preset.base_url})")
|
| 88 |
+
return out
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def preflight(preset: Preset | None = None) -> bool:
|
| 92 |
+
"""Print a startup banner for the active backend. Returns True if reachable."""
|
| 93 |
+
preset = preset or load_preset()
|
| 94 |
+
tiers = " · ".join(f"{t.name}:{t.model}" for t in preset.tiers)
|
| 95 |
+
ok, count, err = probe(preset.base_url, api_key=preset.api_key)
|
| 96 |
+
if ok:
|
| 97 |
+
models = f"{count} models" if count is not None else "reachable"
|
| 98 |
+
banner = (f"smolcode backend: preset={preset.key} · {preset.base_url} "
|
| 99 |
+
f"· {models}\n tiers: {tiers}")
|
| 100 |
+
print(_color(banner, _BOLD + _GREEN), file=sys.stderr)
|
| 101 |
+
return True
|
| 102 |
+
|
| 103 |
+
lines = [
|
| 104 |
+
_color("⚠ smolcode backend UNREACHABLE", _BOLD + _RED),
|
| 105 |
+
f" preset={preset.key} · {preset.base_url} · {err}",
|
| 106 |
+
f" tiers: {tiers}",
|
| 107 |
+
]
|
| 108 |
+
alts = _reachable_alternatives(preset.key)
|
| 109 |
+
if alts:
|
| 110 |
+
lines.append(" reachable instead: " + ", ".join(alts))
|
| 111 |
+
lines.append(_color(" → set SMALLCODE_PRESET to one of the above, "
|
| 112 |
+
"or fix the endpoint.", _DIM))
|
| 113 |
+
else:
|
| 114 |
+
lines.append(_color(" → no known preset endpoint is answering right now.", _DIM))
|
| 115 |
+
print("\n".join(lines), file=sys.stderr)
|
| 116 |
+
return False
|
engine/preview.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Live-preview rendering for smolbuilder.
|
| 2 |
+
|
| 3 |
+
Turns the agent's workspace (a `path -> content` dict of a small static web app)
|
| 4 |
+
into a single self-contained HTML document, then into a sandboxed iframe that
|
| 5 |
+
Gradio can drop straight into a `gr.HTML`. This is the "Replit/Lovable" preview:
|
| 6 |
+
what the tiny model just built, running live in the browser.
|
| 7 |
+
|
| 8 |
+
Deliberately dependency-free (stdlib only) so it can be unit-tested without
|
| 9 |
+
Gradio or the Rust engine, and so the rendering logic stays trivially auditable.
|
| 10 |
+
|
| 11 |
+
Design choices:
|
| 12 |
+
- We inline locally-referenced `<link rel=stylesheet>` and `<script src=...>`
|
| 13 |
+
from sibling files, so a model that splits style.css / script.js out of
|
| 14 |
+
index.html still previews correctly — but we never touch absolute/CDN URLs.
|
| 15 |
+
- The iframe is loaded via `srcdoc=` (not a `data:` URI). A `data:` URL has an
|
| 16 |
+
*opaque origin*, where `localStorage`/`sessionStorage` throw `SecurityError` —
|
| 17 |
+
so any app that persists state (a notepad, a to-do list) dies on load before it
|
| 18 |
+
can wire up its buttons. A `srcdoc` frame inherits the embedder's (Gradio's)
|
| 19 |
+
origin, so storage and scripts work the way the model expects.
|
| 20 |
+
- SECURITY TRADE-OFF: `sandbox="allow-scripts allow-same-origin ..."` is required
|
| 21 |
+
for storage to work, but that combination also lets the framed (model-written)
|
| 22 |
+
code reach the parent page. This is acceptable for a *local, single-user*
|
| 23 |
+
builder — the framed code is the same user's own request, on a page holding no
|
| 24 |
+
one else's secrets. Do NOT reuse this wrapper to embed untrusted third-party
|
| 25 |
+
apps on an origin that holds other users' data; the isolation-preserving fix is
|
| 26 |
+
to serve the preview from a separate origin (out of scope here).
|
| 27 |
+
- The same wrapper (`PREVIEW_SANDBOX`/`_escape_srcdoc`) is reused by the headless
|
| 28 |
+
verification check (engine/browsercheck.py) so the agent tests *exactly* what
|
| 29 |
+
the user sees.
|
| 30 |
+
"""
|
| 31 |
+
from __future__ import annotations
|
| 32 |
+
|
| 33 |
+
import html
|
| 34 |
+
import re
|
| 35 |
+
|
| 36 |
+
# Sandbox flags shared by the live preview and the verification check.
|
| 37 |
+
# allow-same-origin is required so srcdoc inherits the parent origin and web
|
| 38 |
+
# storage works; combined with allow-scripts it weakens isolation (see docstring).
|
| 39 |
+
PREVIEW_SANDBOX = "allow-scripts allow-same-origin allow-modals allow-popups allow-forms"
|
| 40 |
+
|
| 41 |
+
# Files we know how to treat as the app entrypoint, best first.
|
| 42 |
+
_ENTRY_CANDIDATES = ("index.html", "main.html", "app.html")
|
| 43 |
+
|
| 44 |
+
_LINK_RE = re.compile(
|
| 45 |
+
r"""<link\b[^>]*?\brel\s*=\s*['"]?stylesheet['"]?[^>]*?>""", re.I | re.S)
|
| 46 |
+
_SCRIPT_SRC_RE = re.compile(
|
| 47 |
+
r"""<script\b[^>]*?\bsrc\s*=\s*['"]([^'"]+)['"][^>]*?>\s*</script>""", re.I | re.S)
|
| 48 |
+
_HREF_RE = re.compile(r"""\bhref\s*=\s*['"]([^'"]+)['"]""", re.I)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def find_entry(files: dict[str, str]) -> str | None:
|
| 52 |
+
"""Pick the HTML entrypoint to preview, or None if there's nothing webby."""
|
| 53 |
+
lower = {p.lower(): p for p in files}
|
| 54 |
+
for cand in _ENTRY_CANDIDATES:
|
| 55 |
+
if cand in lower:
|
| 56 |
+
return lower[cand]
|
| 57 |
+
# Fall back to any .html file (shallowest path wins for determinism).
|
| 58 |
+
htmls = sorted((p for p in files if p.lower().endswith(".html")),
|
| 59 |
+
key=lambda p: (p.count("/"), p))
|
| 60 |
+
return htmls[0] if htmls else None
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _is_local(url: str) -> bool:
|
| 64 |
+
"""True for a same-app relative reference we can inline (not a CDN/data URI)."""
|
| 65 |
+
u = url.strip()
|
| 66 |
+
if not u:
|
| 67 |
+
return False
|
| 68 |
+
return not re.match(r"^(?:[a-z]+:)?//|^https?:|^data:|^mailto:|^#", u, re.I)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _lookup(files: dict[str, str], ref: str) -> str | None:
|
| 72 |
+
"""Resolve a relative href/src against the workspace file map."""
|
| 73 |
+
ref = ref.split("?", 1)[0].split("#", 1)[0].lstrip("./").lstrip("/")
|
| 74 |
+
if ref in files:
|
| 75 |
+
return files[ref]
|
| 76 |
+
# Case-insensitive / basename fallback so '/style.css' finds 'style.css'.
|
| 77 |
+
base = ref.rsplit("/", 1)[-1].lower()
|
| 78 |
+
for path, content in files.items():
|
| 79 |
+
if path.lower() == ref.lower() or path.rsplit("/", 1)[-1].lower() == base:
|
| 80 |
+
return content
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def inline_app(files: dict[str, str]) -> str:
|
| 85 |
+
"""Return one self-contained HTML document for the app in `files`.
|
| 86 |
+
|
| 87 |
+
If there's no HTML entrypoint, render a friendly placeholder (e.g. the model
|
| 88 |
+
has only written notes or a not-yet-web file).
|
| 89 |
+
"""
|
| 90 |
+
entry = find_entry(files)
|
| 91 |
+
if entry is None:
|
| 92 |
+
return _placeholder(files)
|
| 93 |
+
|
| 94 |
+
doc = files[entry]
|
| 95 |
+
|
| 96 |
+
def _inline_css(match: re.Match) -> str:
|
| 97 |
+
tag = match.group(0)
|
| 98 |
+
href_m = _HREF_RE.search(tag)
|
| 99 |
+
if not href_m or not _is_local(href_m.group(1)):
|
| 100 |
+
return tag
|
| 101 |
+
css = _lookup(files, href_m.group(1))
|
| 102 |
+
if css is None:
|
| 103 |
+
return tag
|
| 104 |
+
return f"<style>\n{css}\n</style>"
|
| 105 |
+
|
| 106 |
+
def _inline_js(match: re.Match) -> str:
|
| 107 |
+
src = match.group(1)
|
| 108 |
+
if not _is_local(src):
|
| 109 |
+
return match.group(0)
|
| 110 |
+
js = _lookup(files, src)
|
| 111 |
+
if js is None:
|
| 112 |
+
return match.group(0)
|
| 113 |
+
# Guard against the inlined body prematurely closing the script element.
|
| 114 |
+
safe = js.replace("</script>", "<\\/script>")
|
| 115 |
+
return f"<script>\n{safe}\n</script>"
|
| 116 |
+
|
| 117 |
+
doc = _LINK_RE.sub(_inline_css, doc)
|
| 118 |
+
doc = _SCRIPT_SRC_RE.sub(_inline_js, doc)
|
| 119 |
+
return doc
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _escape_srcdoc(doc: str) -> str:
|
| 123 |
+
"""Escape an HTML document for a double-quoted `srcdoc="..."` attribute.
|
| 124 |
+
|
| 125 |
+
Only `&` and `"` are significant inside a double-quoted attribute value, and
|
| 126 |
+
`&` must go first (so the `&` we introduce for `"` isn't re-escaped). `<`,
|
| 127 |
+
`>` and even a literal `</script>` are FINE here — the parser is in
|
| 128 |
+
attribute-value state, not script-data state — so we must NOT touch them
|
| 129 |
+
(html.escape would corrupt the rendered document).
|
| 130 |
+
"""
|
| 131 |
+
return doc.replace("&", "&").replace('"', """)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def preview_iframe(files: dict[str, str], *, height: int = 540) -> str:
|
| 135 |
+
"""Render the app as a sandboxed `srcdoc` iframe ready for `gr.HTML`."""
|
| 136 |
+
srcdoc = _escape_srcdoc(inline_app(files))
|
| 137 |
+
return (
|
| 138 |
+
f'<iframe title="smolbuilder preview" '
|
| 139 |
+
f'style="width:100%;height:{height}px;border:0;border-radius:12px;'
|
| 140 |
+
f'background:#fff;box-shadow:0 1px 0 rgba(0,0,0,.06)" '
|
| 141 |
+
f'sandbox="{PREVIEW_SANDBOX}" '
|
| 142 |
+
f'srcdoc="{srcdoc}"></iframe>'
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def _placeholder(files: dict[str, str]) -> str:
|
| 147 |
+
listing = "".join(
|
| 148 |
+
f"<li><code>{html.escape(p)}</code></li>" for p in sorted(files)
|
| 149 |
+
) or "<li><em>workspace is empty</em></li>"
|
| 150 |
+
return (
|
| 151 |
+
"<!doctype html><html><head><meta charset='utf-8'>"
|
| 152 |
+
"<style>body{font:15px/1.5 system-ui,sans-serif;color:#475569;"
|
| 153 |
+
"background:#f8fafc;padding:2rem}h2{color:#7c3aed;margin:.2rem 0 1rem}"
|
| 154 |
+
"code{background:#ede9fe;color:#5b21b6;padding:1px 6px;border-radius:6px}"
|
| 155 |
+
"</style></head><body>"
|
| 156 |
+
"<h2>No preview yet</h2>"
|
| 157 |
+
"<p>smolbuilder previews the app's <code>index.html</code>. "
|
| 158 |
+
"Describe a web app on the left and it'll appear here, live.</p>"
|
| 159 |
+
f"<p>Files in the workspace:</p><ul>{listing}</ul>"
|
| 160 |
+
"</body></html>"
|
| 161 |
+
)
|
engine/route_clf.py
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Learned routing classifier — the confidence-gated upgrade to the regex router.
|
| 2 |
+
|
| 3 |
+
smolcode's router historically guesses two things from cheap regex
|
| 4 |
+
([router.classify_specialty][engine.router.classify_specialty] and
|
| 5 |
+
[router.classify_tier][engine.router.classify_tier]). This module adds tiny
|
| 6 |
+
learned classifiers (SetFit backbone + light head, exported to int8 ONNX) that
|
| 7 |
+
predict, per task:
|
| 8 |
+
|
| 9 |
+
- **specialty** — which fine-tune family (16-way)
|
| 10 |
+
- **tier** — a difficulty bucket -> the *starting* rung in the ladder
|
| 11 |
+
- **escalate** — whether the task will likely need a bigger model
|
| 12 |
+
|
| 13 |
+
Thinking level (off/low/high/xtra) is *derived* from (tier, escalate), not a
|
| 14 |
+
separate model.
|
| 15 |
+
|
| 16 |
+
The design is deliberately "pure upside": every prediction is gated by a
|
| 17 |
+
calibrated confidence threshold. Below threshold — or if onnxruntime / the model
|
| 18 |
+
artifacts aren't present at all — the field **falls back to the existing regex**,
|
| 19 |
+
so we can never route worse than the status quo and rules-confident cases stay
|
| 20 |
+
100% deterministic.
|
| 21 |
+
|
| 22 |
+
Heavy deps (onnxruntime, tokenizers, numpy) are imported lazily; if any is
|
| 23 |
+
missing the classifier simply abstains everywhere and the regex drives routing.
|
| 24 |
+
"""
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import functools
|
| 28 |
+
import json
|
| 29 |
+
import os
|
| 30 |
+
from pathlib import Path
|
| 31 |
+
|
| 32 |
+
from pydantic import BaseModel, Field
|
| 33 |
+
|
| 34 |
+
from .router import classify_specialty, classify_tier
|
| 35 |
+
|
| 36 |
+
# Difficulty buckets the tier head predicts; mapped onto the ladder by
|
| 37 |
+
# start = min(bucket, n_tiers - 1) — exactly classify_tier's clamping contract,
|
| 38 |
+
# so the head stays ladder-length-agnostic.
|
| 39 |
+
TIER_BUCKETS = 3
|
| 40 |
+
|
| 41 |
+
# Ordered thinking levels (matches smolcode-cli/src/router.rs Think enum).
|
| 42 |
+
THINK_LEVELS = ("off", "low", "high", "xtra")
|
| 43 |
+
|
| 44 |
+
# Default per-head confidence thresholds; overridden by router_clf.json's
|
| 45 |
+
# "thresholds" map written at export/calibration time.
|
| 46 |
+
_DEFAULT_TAU = {"specialty": 0.60, "tier": 0.55, "escalate": 0.65}
|
| 47 |
+
|
| 48 |
+
_DEFAULT_DIR = Path(__file__).resolve().parent.parent / "finetune" / "router_clf" / "onnx"
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class RouteDecision(BaseModel):
|
| 52 |
+
"""The typed routing decision. `tier` is a start index into the active ladder."""
|
| 53 |
+
|
| 54 |
+
specialty: str
|
| 55 |
+
tier: int
|
| 56 |
+
escalate: bool
|
| 57 |
+
think: str
|
| 58 |
+
# Per-field model confidence (0.0 when the field came from regex/default).
|
| 59 |
+
confidences: dict[str, float] = Field(default_factory=dict)
|
| 60 |
+
# Per-field provenance: "model" | "regex" | "default" — for telemetry/debugging.
|
| 61 |
+
sources: dict[str, str] = Field(default_factory=dict)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _softmax(row): # row: 1-D numpy array
|
| 65 |
+
import numpy as np
|
| 66 |
+
|
| 67 |
+
# If the ONNX head already emits a probability distribution, don't re-normalize
|
| 68 |
+
# (argmax is unaffected either way, but confidence should stay honest).
|
| 69 |
+
if row.min() >= 0.0 and abs(float(row.sum()) - 1.0) < 1e-3:
|
| 70 |
+
return row
|
| 71 |
+
e = np.exp(row - row.max())
|
| 72 |
+
return e / e.sum()
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class _OnnxHead:
|
| 76 |
+
"""A single ONNX sequence-classification head + its tokenizer and label map."""
|
| 77 |
+
|
| 78 |
+
def __init__(self, session, tokenizer, labels: list[str], input_names: set[str],
|
| 79 |
+
max_len: int = 128) -> None:
|
| 80 |
+
self.session = session
|
| 81 |
+
self.tokenizer = tokenizer
|
| 82 |
+
self.labels = labels
|
| 83 |
+
self.input_names = input_names
|
| 84 |
+
self.max_len = max_len
|
| 85 |
+
|
| 86 |
+
@classmethod
|
| 87 |
+
def try_load(cls, dpath: Path) -> "_OnnxHead | None":
|
| 88 |
+
"""Load model.onnx + tokenizer.json + labels.json from a dir, or None."""
|
| 89 |
+
model_file, tok_file, labels_file = (
|
| 90 |
+
dpath / "model.onnx", dpath / "tokenizer.json", dpath / "labels.json",
|
| 91 |
+
)
|
| 92 |
+
if not (model_file.exists() and tok_file.exists() and labels_file.exists()):
|
| 93 |
+
return None
|
| 94 |
+
import onnxruntime as ort
|
| 95 |
+
from tokenizers import Tokenizer
|
| 96 |
+
|
| 97 |
+
sess = ort.InferenceSession(
|
| 98 |
+
str(model_file), providers=["CPUExecutionProvider"],
|
| 99 |
+
)
|
| 100 |
+
tok = Tokenizer.from_file(str(tok_file))
|
| 101 |
+
meta = json.loads(labels_file.read_text())
|
| 102 |
+
labels = meta["labels"] if isinstance(meta, dict) else list(meta)
|
| 103 |
+
max_len = int(meta.get("max_len", 128)) if isinstance(meta, dict) else 128
|
| 104 |
+
input_names = {i.name for i in sess.get_inputs()}
|
| 105 |
+
return cls(sess, tok, labels, input_names, max_len=max_len)
|
| 106 |
+
|
| 107 |
+
def predict(self, text: str) -> tuple[str, float]:
|
| 108 |
+
"""(label, confidence) for the argmax class."""
|
| 109 |
+
import numpy as np
|
| 110 |
+
|
| 111 |
+
enc = self.tokenizer.encode(text)
|
| 112 |
+
ids = enc.ids[: self.max_len]
|
| 113 |
+
mask = [1] * len(ids)
|
| 114 |
+
feed = {
|
| 115 |
+
"input_ids": np.asarray([ids], dtype=np.int64),
|
| 116 |
+
"attention_mask": np.asarray([mask], dtype=np.int64),
|
| 117 |
+
}
|
| 118 |
+
if "token_type_ids" in self.input_names:
|
| 119 |
+
feed["token_type_ids"] = np.zeros((1, len(ids)), dtype=np.int64)
|
| 120 |
+
out = self.session.run(None, feed)[0]
|
| 121 |
+
probs = _softmax(np.asarray(out)[0])
|
| 122 |
+
idx = int(probs.argmax())
|
| 123 |
+
return self.labels[idx], float(probs[idx])
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
class RouteClassifier:
|
| 127 |
+
"""Loads the (optional) ONNX heads and turns a task string into a RouteDecision.
|
| 128 |
+
|
| 129 |
+
Always safe to construct: missing deps or artifacts -> empty `heads`, and every
|
| 130 |
+
prediction abstains to the regex baseline.
|
| 131 |
+
"""
|
| 132 |
+
|
| 133 |
+
def __init__(self, model_dir: str | os.PathLike | None = None) -> None:
|
| 134 |
+
self.model_dir = Path(
|
| 135 |
+
model_dir or os.environ.get("SMALLCODE_ROUTER_CLF_DIR", _DEFAULT_DIR)
|
| 136 |
+
)
|
| 137 |
+
self.heads: dict[str, _OnnxHead] = {}
|
| 138 |
+
self.thresholds = dict(_DEFAULT_TAU)
|
| 139 |
+
self.think_map: dict | None = None
|
| 140 |
+
self._load()
|
| 141 |
+
|
| 142 |
+
def _load(self) -> None:
|
| 143 |
+
try: # the heavy trio — absent in a bare runtime, which is fine.
|
| 144 |
+
import numpy # noqa: F401
|
| 145 |
+
import onnxruntime # noqa: F401
|
| 146 |
+
import tokenizers # noqa: F401
|
| 147 |
+
except Exception:
|
| 148 |
+
return
|
| 149 |
+
cfg_path = self.model_dir / "router_clf.json"
|
| 150 |
+
if cfg_path.exists():
|
| 151 |
+
try:
|
| 152 |
+
cfg = json.loads(cfg_path.read_text())
|
| 153 |
+
self.thresholds.update(cfg.get("thresholds", {}))
|
| 154 |
+
self.think_map = cfg.get("think_map")
|
| 155 |
+
except Exception:
|
| 156 |
+
pass
|
| 157 |
+
for name in ("specialty", "tier", "escalate"):
|
| 158 |
+
try:
|
| 159 |
+
head = _OnnxHead.try_load(self.model_dir / name)
|
| 160 |
+
except Exception:
|
| 161 |
+
head = None
|
| 162 |
+
if head is not None:
|
| 163 |
+
self.heads[name] = head
|
| 164 |
+
|
| 165 |
+
@property
|
| 166 |
+
def available(self) -> bool:
|
| 167 |
+
return bool(self.heads)
|
| 168 |
+
|
| 169 |
+
# --- per-decision helpers (model if confident, else regex/default) --------
|
| 170 |
+
|
| 171 |
+
def pick_specialty(self, task: str, specialties=None) -> tuple[str, float, str]:
|
| 172 |
+
head = self.heads.get("specialty")
|
| 173 |
+
if head is not None:
|
| 174 |
+
label, conf = head.predict(task)
|
| 175 |
+
ok = conf >= self.thresholds["specialty"]
|
| 176 |
+
if ok and (specialties is None or label in specialties):
|
| 177 |
+
return label, conf, "model"
|
| 178 |
+
return classify_specialty(task), 0.0, "regex"
|
| 179 |
+
|
| 180 |
+
def pick_tier(self, task: str, n_tiers: int) -> tuple[int, float, str]:
|
| 181 |
+
head = self.heads.get("tier")
|
| 182 |
+
if head is not None:
|
| 183 |
+
label, conf = head.predict(task)
|
| 184 |
+
if conf >= self.thresholds["tier"]:
|
| 185 |
+
try:
|
| 186 |
+
bucket = int(label)
|
| 187 |
+
except ValueError:
|
| 188 |
+
bucket = 0
|
| 189 |
+
return min(bucket, max(n_tiers - 1, 0)), conf, "model"
|
| 190 |
+
return classify_tier(task, n_tiers), 0.0, "regex"
|
| 191 |
+
|
| 192 |
+
def pick_escalate(self, task: str) -> tuple[bool, float, str]:
|
| 193 |
+
head = self.heads.get("escalate")
|
| 194 |
+
if head is not None:
|
| 195 |
+
label, conf = head.predict(task)
|
| 196 |
+
if conf >= self.thresholds["escalate"]:
|
| 197 |
+
return label in ("1", "true", "yes", "escalate"), conf, "model"
|
| 198 |
+
# No regex equivalent — default to "no escalation predicted".
|
| 199 |
+
return False, 0.0, "default"
|
| 200 |
+
|
| 201 |
+
def think_for(self, tier: int, n_tiers: int, escalate: bool) -> str:
|
| 202 |
+
if self.think_map:
|
| 203 |
+
key = f"{min(tier, n_tiers - 1)}:{int(escalate)}"
|
| 204 |
+
lvl = self.think_map.get(key) or self.think_map.get(str(tier))
|
| 205 |
+
if lvl in THINK_LEVELS:
|
| 206 |
+
return lvl
|
| 207 |
+
return default_think(tier, n_tiers, escalate)
|
| 208 |
+
|
| 209 |
+
def decide(self, task: str, *, specialties=None, n_tiers: int = 1) -> RouteDecision:
|
| 210 |
+
sp, sp_c, sp_s = self.pick_specialty(task, specialties)
|
| 211 |
+
tier, t_c, t_s = self.pick_tier(task, n_tiers)
|
| 212 |
+
esc, e_c, e_s = self.pick_escalate(task)
|
| 213 |
+
return RouteDecision(
|
| 214 |
+
specialty=sp,
|
| 215 |
+
tier=tier,
|
| 216 |
+
escalate=esc,
|
| 217 |
+
think=self.think_for(tier, n_tiers, esc),
|
| 218 |
+
confidences={"specialty": sp_c, "tier": t_c, "escalate": e_c},
|
| 219 |
+
sources={"specialty": sp_s, "tier": t_s, "escalate": e_s},
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def default_think(tier: int, n_tiers: int, escalate: bool) -> str:
|
| 224 |
+
"""Monotone map: a higher start rung / predicted escalation -> more thinking."""
|
| 225 |
+
if n_tiers <= 1:
|
| 226 |
+
return "high" if escalate else "off"
|
| 227 |
+
frac = tier / (n_tiers - 1)
|
| 228 |
+
if frac >= 0.999:
|
| 229 |
+
return "xtra" if escalate else "high"
|
| 230 |
+
if frac >= 0.5:
|
| 231 |
+
return "high" if escalate else "low"
|
| 232 |
+
return "low" if escalate else "off"
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
@functools.lru_cache(maxsize=1)
|
| 236 |
+
def get_classifier() -> RouteClassifier:
|
| 237 |
+
"""Process-wide singleton (loads ONNX sessions once)."""
|
| 238 |
+
return RouteClassifier()
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def classify_route(task: str, *, specialties=None, n_tiers: int = 1) -> RouteDecision:
|
| 242 |
+
"""Public entry: a typed, confidence-gated routing decision for `task`."""
|
| 243 |
+
return get_classifier().decide(task, specialties=specialties, n_tiers=n_tiers)
|
engine/router.py
ADDED
|
@@ -0,0 +1,455 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tiered model router — the "forge-router" pattern.
|
| 2 |
+
|
| 3 |
+
The point of smolcode: don't burn a 32B model on a one-line helper, and don't
|
| 4 |
+
fail a hard task on a 3B. The router picks a *starting* tier from a cheap
|
| 5 |
+
complexity heuristic, runs the agent, then **escalates on failure**: if the
|
| 6 |
+
produced code doesn't actually pass when re-run, it retries the whole task on the
|
| 7 |
+
next-bigger model. The tier that ultimately solved it is surfaced for the UI badge.
|
| 8 |
+
|
| 9 |
+
Each tier is an independent SmallCodeAgent (its own model + fresh workspace), so
|
| 10 |
+
every model in the ladder uses LiteForge's native tool-calling loop — no parsing
|
| 11 |
+
hacks. All tiers are <=32B to stay hackathon-eligible.
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import re
|
| 17 |
+
from collections.abc import AsyncIterator
|
| 18 |
+
from dataclasses import dataclass, field
|
| 19 |
+
|
| 20 |
+
from . import browsercheck
|
| 21 |
+
from .agent import SmallCodeAgent, Step
|
| 22 |
+
from .config import Preset, SpecialistLadder, SpecialistPreset, Tier, load_preset
|
| 23 |
+
from .judge import judge_correct, judge_enabled
|
| 24 |
+
from .live_run import LiveFrame
|
| 25 |
+
from .preview import find_entry, inline_app
|
| 26 |
+
from .trace_collector import TraceEvent
|
| 27 |
+
from .ui_trace import merge_step_metadata
|
| 28 |
+
|
| 29 |
+
# Signals that a task is non-trivial and worth starting higher up the ladder.
|
| 30 |
+
# Leading \b + trailing \w* so stems match their word family
|
| 31 |
+
# (recursi -> recursive, optimi -> optimize, concurren -> concurrency).
|
| 32 |
+
_HARD_HINTS = re.compile(
|
| 33 |
+
r"\b(class|async|thread|concurren|regex|pars|algorithm|optimi|recursi|"
|
| 34 |
+
r"benchmark|refactor|multiple files|api|server|database|sql|decorator|"
|
| 35 |
+
r"generator|data ?structure|graph|tree|dynamic programming)\w*",
|
| 36 |
+
re.I,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _route_classifier():
|
| 41 |
+
"""The learned routing classifier singleton, or None if unavailable.
|
| 42 |
+
|
| 43 |
+
Importing route_clf pulls in pydantic (and lazily onnxruntime); any failure
|
| 44 |
+
here just means we route with the regex baseline below.
|
| 45 |
+
"""
|
| 46 |
+
try:
|
| 47 |
+
from .route_clf import get_classifier
|
| 48 |
+
return get_classifier()
|
| 49 |
+
except Exception:
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def classify_tier(task: str, n_tiers: int) -> int:
|
| 54 |
+
"""Pick a starting tier index (0 = smallest). Cheap, transparent heuristic."""
|
| 55 |
+
if n_tiers <= 1:
|
| 56 |
+
return 0
|
| 57 |
+
score = 0
|
| 58 |
+
if len(task) > 280:
|
| 59 |
+
score += 1
|
| 60 |
+
if len(_HARD_HINTS.findall(task)) >= 1:
|
| 61 |
+
score += 1
|
| 62 |
+
if len(_HARD_HINTS.findall(task)) >= 3:
|
| 63 |
+
score += 1
|
| 64 |
+
return min(score, n_tiers - 1)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# --- specialty (language/function) classifier --------------------------------
|
| 68 |
+
# Picks the specialist *family* for a task; classify_tier then picks the size
|
| 69 |
+
# within it. Same cheap, transparent, ordered-regex style as classify_tier.
|
| 70 |
+
# Priority on ties (earlier wins); 'py' is last because it's the safe default.
|
| 71 |
+
# `orchestrate` is first: explicit fan-out language is a strong, specific signal
|
| 72 |
+
# that should win over an incidental language mention.
|
| 73 |
+
_SPECIALTY_ORDER = ("orchestrate", "git", "terraform", "docker", "sql", "powershell",
|
| 74 |
+
"bsd", "rust", "go", "cpp", "java", "dotnet", "csharp", "bash",
|
| 75 |
+
"js", "py")
|
| 76 |
+
|
| 77 |
+
_FENCE_LANG = re.compile(r"```([a-z0-9+#.]+)", re.I)
|
| 78 |
+
_FENCE_TO_SPECIALTY = {
|
| 79 |
+
"python": "py", "py": "py", "pytest": "py",
|
| 80 |
+
"bash": "bash", "sh": "bash", "shell": "bash", "zsh": "bash", "console": "bash",
|
| 81 |
+
"powershell": "powershell", "ps1": "powershell", "pwsh": "powershell",
|
| 82 |
+
"sql": "sql", "psql": "sql", "sqlite": "sql",
|
| 83 |
+
"javascript": "js", "js": "js", "ts": "js", "typescript": "js",
|
| 84 |
+
"jsx": "js", "tsx": "js", "node": "js",
|
| 85 |
+
"go": "go", "golang": "go",
|
| 86 |
+
"rust": "rust", "rs": "rust",
|
| 87 |
+
"cpp": "cpp", "c++": "cpp", "cc": "cpp", "c": "cpp",
|
| 88 |
+
"java": "java",
|
| 89 |
+
"csharp": "csharp", "cs": "csharp",
|
| 90 |
+
"dockerfile": "docker", "docker": "docker",
|
| 91 |
+
"hcl": "terraform", "terraform": "terraform", "tf": "terraform",
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
_EXT_RE = re.compile(r"\.(py|sh|bash|ps1|sql|js|mjs|cjs|ts|tsx|jsx|go|rs|cpp|cc|cxx|"
|
| 95 |
+
r"hpp|java|cs|csproj|tf|dockerfile)\b", re.I)
|
| 96 |
+
_EXT_TO_SPECIALTY = {
|
| 97 |
+
"py": "py", "sh": "bash", "bash": "bash", "ps1": "powershell", "sql": "sql",
|
| 98 |
+
"js": "js", "mjs": "js", "cjs": "js", "ts": "js", "tsx": "js", "jsx": "js",
|
| 99 |
+
"go": "go", "rs": "rust", "cpp": "cpp", "cc": "cpp", "cxx": "cpp", "hpp": "cpp",
|
| 100 |
+
"java": "java", "cs": "csharp", "csproj": "dotnet", "tf": "terraform",
|
| 101 |
+
"dockerfile": "docker",
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
_SPECIALTY_HINTS = {
|
| 105 |
+
# Fan-out / parallel delegation work -> the task_batch specialist.
|
| 106 |
+
"orchestrate": re.compile(r"\b(in parallel|fan ?out|concurrently|task_batch|"
|
| 107 |
+
r"orchestrat|several independent|multiple independent|"
|
| 108 |
+
r"simultaneously|batch of (tasks|jobs))\w*", re.I),
|
| 109 |
+
# NOTE: `staged` requires the trailing 'd' so it does NOT match "stage" inside
|
| 110 |
+
# "multi-stage" (a docker term) — that false-positive misrouted Docker tasks.
|
| 111 |
+
"git": re.compile(r"\b(git|commit|rebase|cherry-?pick|merge conflict|stash|"
|
| 112 |
+
r"\bbranch\b|pull request|\bPR\b|revert|bisect|staged)\w*", re.I),
|
| 113 |
+
"terraform": re.compile(r"\b(terraform|\bhcl\b|\.tf\b|provider|resource block|"
|
| 114 |
+
r"infrastructure as code|\biac\b|tfstate)\w*", re.I),
|
| 115 |
+
"docker": re.compile(r"\b(docker|dockerfile|docker-?compose|container image|"
|
| 116 |
+
r"\bimage\b|\bbuild -t\b|entrypoint)\w*", re.I),
|
| 117 |
+
"sql": re.compile(r"\b(sql|select |insert |update |delete |join|schema|"
|
| 118 |
+
r"\btable\b|\bindex\b|migration|postgres|sqlite|mysql|query)\w*", re.I),
|
| 119 |
+
"powershell": re.compile(r"\b(powershell|pwsh|\.ps1|cmdlet|get-|set-|write-output)\w*", re.I),
|
| 120 |
+
"bsd": re.compile(r"\b(freebsd|openbsd|netbsd|\bbsd\b|pf\.conf|rc\.d|pkg_add)\w*", re.I),
|
| 121 |
+
"rust": re.compile(r"\b(rust|cargo|crate|rustc|\.rs\b|borrow checker|tokio)\w*", re.I),
|
| 122 |
+
"go": re.compile(r"\b(golang|\bgo\b|goroutine|go mod|go test|\.go\b)\w*", re.I),
|
| 123 |
+
"cpp": re.compile(r"\b(c\+\+|cpp|g\+\+|clang|std::|cmake|\.cpp\b|template)\w*", re.I),
|
| 124 |
+
"java": re.compile(r"\b(java|maven|gradle|\bjvm\b|junit|\.java\b)\w*", re.I),
|
| 125 |
+
"dotnet": re.compile(r"\b(\.net|dotnet|nuget|asp\.net|\.csproj|msbuild)\w*", re.I),
|
| 126 |
+
"csharp": re.compile(r"\b(c#|csharp|\blinq\b|\.cs\b|\bxunit\b)\w*", re.I),
|
| 127 |
+
"bash": re.compile(r"\b(shell script|\bbash\b|\bzsh\b|chmod|grep|sed|awk|"
|
| 128 |
+
r"\bpipe\b|cron|stdout|stderr|\$PATH)\w*", re.I),
|
| 129 |
+
"js": re.compile(r"\b(javascript|typescript|node|npm|react|vue|jsx|tsx|"
|
| 130 |
+
r"webpack|vite|eslint|package\.json)\w*", re.I),
|
| 131 |
+
"py": re.compile(r"\b(python|pytest|pandas|numpy|django|flask|pip|venv|"
|
| 132 |
+
r"def |async def|decorator)\w*", re.I),
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def classify_specialty(task: str, *, default: str = "py") -> str:
|
| 137 |
+
"""Pick the specialist family key for a task. Cheap, transparent, deterministic.
|
| 138 |
+
|
| 139 |
+
Precedence (most explicit signal first): SMALLCODE_SPECIALTY env override ->
|
| 140 |
+
code-fence language tag -> file extensions mentioned -> keyword-cue scoring ->
|
| 141 |
+
default. Mirrors classify_tier's style; pairs with it for 2D routing.
|
| 142 |
+
"""
|
| 143 |
+
forced = os.environ.get("SMALLCODE_SPECIALTY")
|
| 144 |
+
if forced:
|
| 145 |
+
return forced.strip().lower()
|
| 146 |
+
|
| 147 |
+
# A fenced code block (```lang) is the single most explicit signal -> hard win.
|
| 148 |
+
for lang in _FENCE_LANG.findall(task):
|
| 149 |
+
s = _FENCE_TO_SPECIALTY.get(lang.lower())
|
| 150 |
+
if s:
|
| 151 |
+
return s
|
| 152 |
+
|
| 153 |
+
# Otherwise SCORE keyword cues AND file-extension mentions together, so a strong
|
| 154 |
+
# action signal (e.g. "rebase ... merge conflict") beats an incidental ".py"
|
| 155 |
+
# filename. Ties broken by _SPECIALTY_ORDER (earlier = higher priority).
|
| 156 |
+
scores = {s: len(rx.findall(task)) for s, rx in _SPECIALTY_HINTS.items()}
|
| 157 |
+
for e in _EXT_RE.findall(task):
|
| 158 |
+
s = _EXT_TO_SPECIALTY.get(e.lower())
|
| 159 |
+
if s:
|
| 160 |
+
scores[s] = scores.get(s, 0) + 1
|
| 161 |
+
best = max(scores, key=lambda s: (scores[s], -_SPECIALTY_ORDER.index(s)))
|
| 162 |
+
if scores[best] > 0:
|
| 163 |
+
return best
|
| 164 |
+
|
| 165 |
+
return default
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@dataclass
|
| 169 |
+
class RouteResult:
|
| 170 |
+
final: str
|
| 171 |
+
steps: list[Step]
|
| 172 |
+
tier_name: str
|
| 173 |
+
tier_model: str
|
| 174 |
+
start_tier: str
|
| 175 |
+
escalations: int
|
| 176 |
+
verified: bool
|
| 177 |
+
specialty: str = "general"
|
| 178 |
+
files: dict[str, str] = field(default_factory=dict)
|
| 179 |
+
trace_events: list[TraceEvent] = field(default_factory=list)
|
| 180 |
+
agent: SmallCodeAgent | None = None
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def _smoke_command(files: list[str]) -> str | None:
|
| 184 |
+
"""A best-effort 'does it build/run (and pass any tests)?' shell command for a
|
| 185 |
+
NON-Python solution, or None if the language isn't recognized. Mirrors the
|
| 186 |
+
per-specialty run commands (finetune/specialties.py) so the router can escalate
|
| 187 |
+
on go/rust/js/sql/… exactly like it does on Python via run_python."""
|
| 188 |
+
def ext(e: str) -> list[str]:
|
| 189 |
+
return [f for f in files if f.endswith(e)]
|
| 190 |
+
|
| 191 |
+
if ext(".go"):
|
| 192 |
+
if any(f.endswith("_test.go") for f in files):
|
| 193 |
+
return "go test ./... 2>&1"
|
| 194 |
+
return "go run . 2>&1 || go run *.go 2>&1"
|
| 195 |
+
if "Cargo.toml" in files:
|
| 196 |
+
return "cargo test -q 2>&1 || cargo build -q 2>&1"
|
| 197 |
+
if ext(".rs"):
|
| 198 |
+
return f"rustc {ext('.rs')[0]} -o /tmp/_smv 2>&1 && /tmp/_smv"
|
| 199 |
+
js = ext(".js") + ext(".mjs") + ext(".cjs") + ext(".ts")
|
| 200 |
+
if "package.json" in files:
|
| 201 |
+
return "npm test --silent 2>&1 || node --test 2>&1"
|
| 202 |
+
if js:
|
| 203 |
+
if any(".test." in f or ".spec." in f for f in js):
|
| 204 |
+
return "node --test 2>&1"
|
| 205 |
+
entry = next((f for f in js if f in ("index.js", "main.js")), js[0])
|
| 206 |
+
return f"node {entry} 2>&1"
|
| 207 |
+
if ext(".sql"):
|
| 208 |
+
return f"sqlite3 :memory: < {ext('.sql')[0]} 2>&1"
|
| 209 |
+
if ext(".cpp") or ext(".cc"):
|
| 210 |
+
srcs = " ".join(ext(".cpp") + ext(".cc"))
|
| 211 |
+
return f"g++ -std=c++17 {srcs} -o /tmp/_smv 2>&1 && /tmp/_smv"
|
| 212 |
+
if ext(".java"):
|
| 213 |
+
main = "Main" if "Main.java" in files else ext(".java")[0][:-5]
|
| 214 |
+
return f"javac *.java 2>&1 && java {main} 2>&1"
|
| 215 |
+
if ext(".sh"):
|
| 216 |
+
return f"bash {ext('.sh')[0]} 2>&1"
|
| 217 |
+
if ext(".tf"):
|
| 218 |
+
return "terraform init -backend=false 2>&1 && terraform validate 2>&1"
|
| 219 |
+
if "Program.cs" in files or ext(".cs"):
|
| 220 |
+
return "dotnet run 2>&1"
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def _verify(agent: SmallCodeAgent) -> bool | None:
|
| 225 |
+
"""Independently check the agent's output actually works.
|
| 226 |
+
|
| 227 |
+
Returns True/False if there's something runnable to check, else None
|
| 228 |
+
(unverifiable — don't escalate purely on a missing signal). Python uses the
|
| 229 |
+
pytest/run_python fast paths; other languages smoke-run via run_shell so the
|
| 230 |
+
specialist router escalates on a broken go/rust/sql/… solution instead of
|
| 231 |
+
silently accepting the smallest tier.
|
| 232 |
+
"""
|
| 233 |
+
ws = agent.workspace
|
| 234 |
+
files = ws.list_files()
|
| 235 |
+
pys = [f for f in files if f.endswith(".py")]
|
| 236 |
+
if pys:
|
| 237 |
+
if any("test" in f.lower() for f in pys):
|
| 238 |
+
return ws.run_tests().ok
|
| 239 |
+
entry = next((f for f in pys if f in ("main.py", "solution.py")), None) or pys[0]
|
| 240 |
+
return ws.run_python(path=entry).ok
|
| 241 |
+
# Web app (index.html + browser JS): render it in a real browser — must come
|
| 242 |
+
# BEFORE the shell smoke-run so we don't `node` browser-side JS. Same signal
|
| 243 |
+
# smolbuilder's WebBuilder uses (engine/builder._evaluate).
|
| 244 |
+
web_files = agent.files()
|
| 245 |
+
if find_entry(web_files) is not None:
|
| 246 |
+
ok, _errors = browsercheck.check_html(inline_app(web_files))
|
| 247 |
+
return ok
|
| 248 |
+
cmd = _smoke_command(files)
|
| 249 |
+
if cmd is not None:
|
| 250 |
+
return ws.run_shell(cmd, timeout=90).ok
|
| 251 |
+
return None
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def _build_result(agent: SmallCodeAgent, final: str, steps: list[Step], tier: Tier,
|
| 255 |
+
start_name: str, escalations: int, verified: bool,
|
| 256 |
+
specialty: str = "general") -> RouteResult:
|
| 257 |
+
events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
|
| 258 |
+
return RouteResult(
|
| 259 |
+
final=final, steps=steps, tier_name=tier.name, tier_model=tier.model,
|
| 260 |
+
start_tier=start_name, escalations=escalations, verified=verified,
|
| 261 |
+
specialty=specialty, files=agent.files(), trace_events=events, agent=agent,
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
# Difficulty buckets the tier head predicts (matches route_clf.TIER_BUCKETS). Kept as
|
| 266 |
+
# a local constant so router.py imports even when route_clf's deps (pydantic) are
|
| 267 |
+
# absent. The bucket drives BOTH the thinking level and the start-tier clamp, so it's
|
| 268 |
+
# decoupled from the ladder length — think stays meaningful even for a pinned 1-tier
|
| 269 |
+
# preset.
|
| 270 |
+
_THINK_BUCKETS = 3
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
class Router:
|
| 274 |
+
def __init__(
|
| 275 |
+
self,
|
| 276 |
+
preset: Preset | None = None,
|
| 277 |
+
max_steps: int = 12,
|
| 278 |
+
approval_handler=None,
|
| 279 |
+
workspace_dir: str | None = None,
|
| 280 |
+
think: str = "off",
|
| 281 |
+
yolo: bool = False,
|
| 282 |
+
agent: str = "build",
|
| 283 |
+
size_floor: str | None = None,
|
| 284 |
+
) -> None:
|
| 285 |
+
self.preset = preset or load_preset()
|
| 286 |
+
self.tiers: list[Tier] = self.preset.tiers
|
| 287 |
+
self.max_steps = max_steps
|
| 288 |
+
self.approval_handler = approval_handler
|
| 289 |
+
self.workspace_dir = workspace_dir
|
| 290 |
+
self.think = think
|
| 291 |
+
self.yolo = yolo
|
| 292 |
+
self.agent_name = agent
|
| 293 |
+
# "Auto · <size>" pins the START rung to this specialist size (e.g. "3b") while
|
| 294 |
+
# the router still picks the specialty and escalation still climbs the ladder.
|
| 295 |
+
self.size_floor = size_floor
|
| 296 |
+
|
| 297 |
+
async def run(self, task: str) -> RouteResult:
|
| 298 |
+
result: RouteResult | None = None
|
| 299 |
+
async for frame in self.run_live(task):
|
| 300 |
+
if frame.done and isinstance(frame.result, RouteResult):
|
| 301 |
+
result = frame.result
|
| 302 |
+
assert result is not None
|
| 303 |
+
return result
|
| 304 |
+
|
| 305 |
+
def _ladder_for(self, task: str, specialty: str | None = None) -> SpecialistLadder:
|
| 306 |
+
"""The size ladder for this task's specialty (generic if not a matrix preset).
|
| 307 |
+
|
| 308 |
+
`specialty` may be supplied by the learned classifier; falls back to the
|
| 309 |
+
regex classify_specialty when not given.
|
| 310 |
+
"""
|
| 311 |
+
if isinstance(self.preset, SpecialistPreset):
|
| 312 |
+
if specialty is None:
|
| 313 |
+
specialty = classify_specialty(task)
|
| 314 |
+
return self.preset.ladder_for(specialty)
|
| 315 |
+
return SpecialistLadder(specialty="general", tiers=self.preset.tiers)
|
| 316 |
+
|
| 317 |
+
def _size_floor_index(self, tiers: list[Tier], size_floor: str) -> int:
|
| 318 |
+
"""Start-rung index for an 'Auto · <size>' pin: the first ladder tier whose
|
| 319 |
+
size is >= the floor (closest available, then escalates). Falls back to 0."""
|
| 320 |
+
from .config import parse_size_b
|
| 321 |
+
target = parse_size_b(size_floor if str(size_floor).lower().endswith("b")
|
| 322 |
+
else f"{size_floor}b")
|
| 323 |
+
if target <= 0:
|
| 324 |
+
return 0
|
| 325 |
+
for i, t in enumerate(tiers):
|
| 326 |
+
if parse_size_b(t.model) >= target:
|
| 327 |
+
return i
|
| 328 |
+
return max(len(tiers) - 1, 0)
|
| 329 |
+
|
| 330 |
+
def _route(self, task: str) -> tuple[SpecialistLadder, int, str]:
|
| 331 |
+
"""Pick (ladder, start-tier index, thinking level) for a task.
|
| 332 |
+
|
| 333 |
+
Uses the learned RouteClassifier when it's confident; otherwise the regex
|
| 334 |
+
baseline. A difficulty bucket (decoupled from ladder length) drives both the
|
| 335 |
+
start rung and the thinking level. `size_floor` (Auto · <size>) overrides the
|
| 336 |
+
start rung; an explicit user `/think` (anything but the default "off") wins.
|
| 337 |
+
"""
|
| 338 |
+
clf = _route_classifier()
|
| 339 |
+
has_clf = clf is not None and clf.available
|
| 340 |
+
|
| 341 |
+
# 1. specialty -> size ladder
|
| 342 |
+
if has_clf and isinstance(self.preset, SpecialistPreset):
|
| 343 |
+
specialty = clf.pick_specialty(task, list(self.preset.ladders))[0]
|
| 344 |
+
ladder = self._ladder_for(task, specialty=specialty)
|
| 345 |
+
else:
|
| 346 |
+
ladder = self._ladder_for(task)
|
| 347 |
+
tiers = ladder.tiers
|
| 348 |
+
|
| 349 |
+
# 2. difficulty bucket (0..TIER_BUCKETS-1) + escalation hint
|
| 350 |
+
if has_clf:
|
| 351 |
+
bucket = clf.pick_tier(task, _THINK_BUCKETS)[0]
|
| 352 |
+
esc = clf.pick_escalate(task)[0]
|
| 353 |
+
else:
|
| 354 |
+
bucket = classify_tier(task, _THINK_BUCKETS)
|
| 355 |
+
esc = False
|
| 356 |
+
|
| 357 |
+
# 3. start rung: an explicit size floor wins; else the difficulty bucket
|
| 358 |
+
if self.size_floor:
|
| 359 |
+
start = self._size_floor_index(tiers, self.size_floor)
|
| 360 |
+
else:
|
| 361 |
+
start = min(bucket, max(len(tiers) - 1, 0))
|
| 362 |
+
|
| 363 |
+
# 4. thinking level: explicit /think wins; else router-derived (clf only)
|
| 364 |
+
if self.think != "off":
|
| 365 |
+
think = self.think
|
| 366 |
+
elif has_clf:
|
| 367 |
+
think = clf.think_for(bucket, _THINK_BUCKETS, esc)
|
| 368 |
+
else:
|
| 369 |
+
think = "off"
|
| 370 |
+
return ladder, start, think
|
| 371 |
+
|
| 372 |
+
async def run_live(
|
| 373 |
+
self,
|
| 374 |
+
task: str,
|
| 375 |
+
*,
|
| 376 |
+
rust_session=None,
|
| 377 |
+
) -> AsyncIterator[LiveFrame]:
|
| 378 |
+
"""Yield live frames while routing; final frame carries RouteResult."""
|
| 379 |
+
ladder, start, think = self._route(task)
|
| 380 |
+
specialty = ladder.specialty
|
| 381 |
+
tiers = ladder.tiers
|
| 382 |
+
escalations = 0
|
| 383 |
+
last: RouteResult | None = None
|
| 384 |
+
prev_tier_name: str | None = None
|
| 385 |
+
|
| 386 |
+
for idx in range(start, len(tiers)):
|
| 387 |
+
tier = tiers[idx]
|
| 388 |
+
if prev_tier_name is not None:
|
| 389 |
+
yield LiveFrame(events=[
|
| 390 |
+
TraceEvent(kind="tier_escalation", name=tier.name,
|
| 391 |
+
detail=f"escalated from {prev_tier_name}"),
|
| 392 |
+
])
|
| 393 |
+
# The start tier reuses the caller's session; make it run the ROUTED model
|
| 394 |
+
# (not whatever the UI last pinned), so "Auto" honors the router's pick and
|
| 395 |
+
# a concrete pin (single-tier ladder) runs exactly that model.
|
| 396 |
+
if idx == start and rust_session is not None:
|
| 397 |
+
try:
|
| 398 |
+
rust_session.set_model(tier.model)
|
| 399 |
+
except Exception:
|
| 400 |
+
pass
|
| 401 |
+
agent = SmallCodeAgent(
|
| 402 |
+
preset=self.preset,
|
| 403 |
+
model=tier.model,
|
| 404 |
+
max_steps=self.max_steps,
|
| 405 |
+
approval_handler=self.approval_handler,
|
| 406 |
+
workspace_dir=self.workspace_dir,
|
| 407 |
+
agent=self.agent_name,
|
| 408 |
+
yolo=self.yolo,
|
| 409 |
+
rust_session=rust_session if idx == start else None,
|
| 410 |
+
)
|
| 411 |
+
async for frame in agent.run_live_turn(
|
| 412 |
+
task, think=think, yolo=self.yolo,
|
| 413 |
+
):
|
| 414 |
+
if not frame.done:
|
| 415 |
+
yield frame
|
| 416 |
+
continue
|
| 417 |
+
final, steps = frame.result
|
| 418 |
+
ok = False if (agent.hit_max_steps or agent.errored) else _verify(agent)
|
| 419 |
+
# _verify only proves the code RAN, not that it's correct. If it ran
|
| 420 |
+
# clean (ok is True) but a bigger tier exists, ask a judge whether the
|
| 421 |
+
# solution actually satisfies the task; a concrete "no" -> escalate.
|
| 422 |
+
if ok is True and idx < len(tiers) - 1 and judge_enabled():
|
| 423 |
+
correct = await judge_correct(
|
| 424 |
+
self.preset, tiers[idx + 1].model, task, agent.files(), final,
|
| 425 |
+
)
|
| 426 |
+
if not correct:
|
| 427 |
+
ok = False
|
| 428 |
+
last = _build_result(
|
| 429 |
+
agent, final, steps, tier, tiers[start].name,
|
| 430 |
+
escalations, bool(ok), specialty=specialty,
|
| 431 |
+
)
|
| 432 |
+
if ok is not False:
|
| 433 |
+
yield LiveFrame(
|
| 434 |
+
steps=steps,
|
| 435 |
+
events=last.trace_events,
|
| 436 |
+
files=last.files,
|
| 437 |
+
done=True,
|
| 438 |
+
result=last,
|
| 439 |
+
)
|
| 440 |
+
return
|
| 441 |
+
if idx < len(tiers) - 1:
|
| 442 |
+
agent.trace_collector.record_escalation(tier.name, tiers[idx + 1].name)
|
| 443 |
+
agent.cleanup()
|
| 444 |
+
escalations += 1
|
| 445 |
+
prev_tier_name = tier.name
|
| 446 |
+
break
|
| 447 |
+
|
| 448 |
+
if last is not None:
|
| 449 |
+
yield LiveFrame(
|
| 450 |
+
steps=last.steps,
|
| 451 |
+
events=last.trace_events,
|
| 452 |
+
files=last.files,
|
| 453 |
+
done=True,
|
| 454 |
+
result=last,
|
| 455 |
+
)
|
engine/rust_session.py
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Python facade over the Rust smolcode agent engine (smolcode_core)."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import asyncio
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import tempfile
|
| 8 |
+
from collections.abc import Awaitable, Callable
|
| 9 |
+
from dataclasses import dataclass, field
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from .trace_collector import TraceCollector, TraceEvent
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import smolcode_core as _rust
|
| 16 |
+
except ImportError:
|
| 17 |
+
_rust = None # type: ignore
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def rust_available() -> bool:
|
| 21 |
+
return _rust is not None
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
ApprovalHandler = Callable[[str], Awaitable[bool]]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class RustRunResult:
|
| 29 |
+
final: str
|
| 30 |
+
hit_max_steps: bool = False
|
| 31 |
+
errored: bool = False
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class RustSession:
|
| 35 |
+
"""Thin wrapper around smolcode_core.Session."""
|
| 36 |
+
|
| 37 |
+
def __init__(
|
| 38 |
+
self,
|
| 39 |
+
*,
|
| 40 |
+
workspace: str | None = None,
|
| 41 |
+
agent: str = "build",
|
| 42 |
+
yolo: bool = False,
|
| 43 |
+
model: str | None = None,
|
| 44 |
+
base_url: str | None = None,
|
| 45 |
+
api_key: str | None = None,
|
| 46 |
+
profile: str = "full",
|
| 47 |
+
approval_handler: ApprovalHandler | None = None,
|
| 48 |
+
) -> None:
|
| 49 |
+
if _rust is None:
|
| 50 |
+
raise RuntimeError(
|
| 51 |
+
"smolcode_core is not installed; build with "
|
| 52 |
+
"`maturin develop --release` in smolcode-cli/crates/smolcode-py"
|
| 53 |
+
)
|
| 54 |
+
if workspace is None:
|
| 55 |
+
workspace = os.environ.get(
|
| 56 |
+
"SMALLCODE_WORKSPACE",
|
| 57 |
+
tempfile.mkdtemp(prefix="smolcode-"),
|
| 58 |
+
)
|
| 59 |
+
self._session = _rust.Session(
|
| 60 |
+
workspace=workspace,
|
| 61 |
+
agent=agent,
|
| 62 |
+
yolo=yolo,
|
| 63 |
+
model=model,
|
| 64 |
+
base_url=base_url,
|
| 65 |
+
api_key=api_key,
|
| 66 |
+
profile=profile,
|
| 67 |
+
)
|
| 68 |
+
self.trace_collector = TraceCollector()
|
| 69 |
+
self.approval_handler = approval_handler
|
| 70 |
+
self.hit_max_steps = False
|
| 71 |
+
self.errored = False
|
| 72 |
+
self._steps: list[dict[str, Any]] = []
|
| 73 |
+
self._final: str = ""
|
| 74 |
+
self._cancelled = False
|
| 75 |
+
|
| 76 |
+
def request_cancel(self) -> None:
|
| 77 |
+
self._cancelled = True
|
| 78 |
+
self.cancel_turn()
|
| 79 |
+
|
| 80 |
+
@property
|
| 81 |
+
def cancelled(self) -> bool:
|
| 82 |
+
return self._cancelled
|
| 83 |
+
|
| 84 |
+
def clear_cancel(self) -> None:
|
| 85 |
+
self._cancelled = False
|
| 86 |
+
|
| 87 |
+
@property
|
| 88 |
+
def session_id(self) -> str:
|
| 89 |
+
return self._session.session_id
|
| 90 |
+
|
| 91 |
+
@property
|
| 92 |
+
def workspace_path(self) -> str:
|
| 93 |
+
return self._session.workspace()
|
| 94 |
+
|
| 95 |
+
def set_model(self, model: str) -> None:
|
| 96 |
+
self._session.set_model(model)
|
| 97 |
+
|
| 98 |
+
def set_agent(self, agent: str) -> None:
|
| 99 |
+
self._session.set_agent(agent)
|
| 100 |
+
|
| 101 |
+
def set_think(self, level: str) -> None:
|
| 102 |
+
self._session.set_think(level)
|
| 103 |
+
|
| 104 |
+
def register_tool(self, name: str, fn: Callable[[dict], dict]) -> None:
|
| 105 |
+
self._session.register_tool(name, fn)
|
| 106 |
+
|
| 107 |
+
def files(self) -> dict[str, str]:
|
| 108 |
+
out: dict[str, str] = {}
|
| 109 |
+
for path in self._session.workspace_files():
|
| 110 |
+
content = self._session.read_file(path)
|
| 111 |
+
if content is not None:
|
| 112 |
+
out[path] = content
|
| 113 |
+
return out
|
| 114 |
+
|
| 115 |
+
def run_shell(self, command: str) -> str:
|
| 116 |
+
return self._session.run_shell(command)
|
| 117 |
+
|
| 118 |
+
async def run(
|
| 119 |
+
self,
|
| 120 |
+
task: str,
|
| 121 |
+
*,
|
| 122 |
+
think: str | None = None,
|
| 123 |
+
yolo: bool | None = None,
|
| 124 |
+
) -> RustRunResult:
|
| 125 |
+
"""Run one agent turn to completion."""
|
| 126 |
+
self.hit_max_steps = False
|
| 127 |
+
self.errored = False
|
| 128 |
+
self._final = ""
|
| 129 |
+
self.clear_cancel()
|
| 130 |
+
self._session.start_turn(task, think=think, yolo=yolo)
|
| 131 |
+
final_text = ""
|
| 132 |
+
while True:
|
| 133 |
+
if self._cancelled:
|
| 134 |
+
break
|
| 135 |
+
ev = await asyncio.to_thread(self._session.poll_event)
|
| 136 |
+
if ev is None:
|
| 137 |
+
await asyncio.sleep(0.05)
|
| 138 |
+
continue
|
| 139 |
+
kind = ev.get("kind")
|
| 140 |
+
if kind == "approval":
|
| 141 |
+
approved = True
|
| 142 |
+
if self.approval_handler is not None:
|
| 143 |
+
approved = await self.approval_handler(ev.get("desc", ""))
|
| 144 |
+
elif not (yolo if yolo is not None else False):
|
| 145 |
+
approved = False
|
| 146 |
+
self._session.approve(approved)
|
| 147 |
+
continue
|
| 148 |
+
self._ingest_event(ev)
|
| 149 |
+
if kind == "final":
|
| 150 |
+
final_text = ev.get("text", "")
|
| 151 |
+
if kind == "done":
|
| 152 |
+
break
|
| 153 |
+
if kind == "error":
|
| 154 |
+
self.errored = True
|
| 155 |
+
self._final = final_text
|
| 156 |
+
if "step" in self._final.lower() and "without finishing" in self._final.lower():
|
| 157 |
+
self.hit_max_steps = True
|
| 158 |
+
self._session.record_turn(task, final_text)
|
| 159 |
+
return RustRunResult(
|
| 160 |
+
final=final_text,
|
| 161 |
+
hit_max_steps=self.hit_max_steps,
|
| 162 |
+
errored=self.errored,
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
async def poll_events_once(self) -> list[dict[str, Any]]:
|
| 166 |
+
"""Non-blocking poll for live UI updates during a turn."""
|
| 167 |
+
events: list[dict[str, Any]] = []
|
| 168 |
+
while True:
|
| 169 |
+
ev = await asyncio.to_thread(self._session.poll_event)
|
| 170 |
+
if ev is None:
|
| 171 |
+
break
|
| 172 |
+
kind = ev.get("kind")
|
| 173 |
+
if kind == "approval":
|
| 174 |
+
approved = True
|
| 175 |
+
if self.approval_handler is not None:
|
| 176 |
+
approved = await self.approval_handler(ev.get("desc", ""))
|
| 177 |
+
self._session.approve(approved)
|
| 178 |
+
continue
|
| 179 |
+
self._ingest_event(ev)
|
| 180 |
+
events.append(ev)
|
| 181 |
+
if kind in ("done",):
|
| 182 |
+
break
|
| 183 |
+
return events
|
| 184 |
+
|
| 185 |
+
def _ingest_event(self, ev: dict[str, Any]) -> None:
|
| 186 |
+
kind = ev.get("kind")
|
| 187 |
+
if kind == "tool_call":
|
| 188 |
+
args_raw = ev.get("args", "{}")
|
| 189 |
+
try:
|
| 190 |
+
args = json.loads(args_raw) if isinstance(args_raw, str) else args_raw
|
| 191 |
+
except json.JSONDecodeError:
|
| 192 |
+
args = {"raw": args_raw}
|
| 193 |
+
self.trace_collector.record_tool_call(ev.get("name", ""), args)
|
| 194 |
+
elif kind == "tool_result":
|
| 195 |
+
text = ev.get("text", "")
|
| 196 |
+
try:
|
| 197 |
+
result = json.loads(text)
|
| 198 |
+
except json.JSONDecodeError:
|
| 199 |
+
result = {"output": text}
|
| 200 |
+
self.trace_collector.record_tool_result(ev.get("name", ""), result)
|
| 201 |
+
elif kind == "final":
|
| 202 |
+
self.trace_collector.record_final(ev.get("text", ""))
|
| 203 |
+
elif kind == "error":
|
| 204 |
+
self.trace_collector.record_error(ev.get("text", ""))
|
| 205 |
+
self.errored = True
|
| 206 |
+
|
| 207 |
+
def save(self) -> None:
|
| 208 |
+
self._session.save()
|
| 209 |
+
|
| 210 |
+
@staticmethod
|
| 211 |
+
def list_sessions() -> list[dict[str, Any]]:
|
| 212 |
+
if _rust is None:
|
| 213 |
+
return []
|
| 214 |
+
return _rust.Session.list_sessions()
|
| 215 |
+
|
| 216 |
+
def load_session(self, session_id: str) -> bool:
|
| 217 |
+
return self._session.load_session(session_id)
|
| 218 |
+
|
| 219 |
+
def fork(self) -> str | None:
|
| 220 |
+
return self._session.fork()
|
| 221 |
+
|
| 222 |
+
def rename(self, title: str) -> bool:
|
| 223 |
+
return self._session.rename(title)
|
| 224 |
+
|
| 225 |
+
def delete(self) -> bool:
|
| 226 |
+
return self._session.delete()
|
| 227 |
+
|
| 228 |
+
def cancel_turn(self) -> None:
|
| 229 |
+
self._session.cancel_turn()
|
| 230 |
+
|
| 231 |
+
def render_config(self) -> str:
|
| 232 |
+
return self._session.render_config()
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def render_config(session: RustSession) -> str:
|
| 236 |
+
return session.render_config()
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def apply_settings(session: RustSession, settings: Any) -> None:
|
| 240 |
+
"""Apply UI settings to a live Rust session before each agent turn.
|
| 241 |
+
|
| 242 |
+
The "auto" / "auto:<size>" pseudo-selections are NOT real model tags — the Router
|
| 243 |
+
picks the model and sets it on the session (see router.run_live), so we must not
|
| 244 |
+
push them via set_model. Only concrete pins are applied here.
|
| 245 |
+
"""
|
| 246 |
+
session.set_think(settings.think)
|
| 247 |
+
model = settings.model or ""
|
| 248 |
+
if model and model != "auto" and not model.startswith("auto:"):
|
| 249 |
+
session.set_model(model)
|
| 250 |
+
session.set_agent(settings.agent)
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def list_commands(workspace: str) -> list[str]:
|
| 254 |
+
if _rust is None:
|
| 255 |
+
return []
|
| 256 |
+
return _rust.list_commands(workspace)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def expand_command(workspace: str, name: str, args: str = "") -> str | None:
|
| 260 |
+
if _rust is None:
|
| 261 |
+
return None
|
| 262 |
+
return _rust.expand_command(workspace, name, args)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def list_rules(workspace: str) -> list[dict[str, Any]]:
|
| 266 |
+
if _rust is None:
|
| 267 |
+
return []
|
| 268 |
+
return _rust.list_rules(workspace)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def list_skills(workspace: str) -> list[dict[str, Any]]:
|
| 272 |
+
if _rust is None:
|
| 273 |
+
return []
|
| 274 |
+
return _rust.list_skills(workspace)
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def expand_skill(workspace: str, name: str, args: str = "") -> str | None:
|
| 278 |
+
if _rust is None:
|
| 279 |
+
return None
|
| 280 |
+
return _rust.expand_skill(workspace, name, args)
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def list_mcp(session: RustSession) -> list[dict[str, Any]]:
|
| 284 |
+
return session._session.list_mcp()
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
def list_background_jobs() -> str:
|
| 288 |
+
if _rust is None:
|
| 289 |
+
return ""
|
| 290 |
+
return _rust.list_background_jobs()
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def write_agents_md(workspace: str) -> str:
|
| 294 |
+
if _rust is None:
|
| 295 |
+
raise RuntimeError("smolcode_core not installed")
|
| 296 |
+
return _rust.write_agents_md(workspace)
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def git_status(workspace: str) -> str:
|
| 300 |
+
if _rust is None:
|
| 301 |
+
return ""
|
| 302 |
+
return _rust.git_status(workspace)
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def workspace_tree(workspace: str, depth: int = 3) -> str:
|
| 306 |
+
if _rust is None:
|
| 307 |
+
return ""
|
| 308 |
+
return _rust.workspace_tree(workspace, depth=depth)
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
UI_FILE_LIMIT = 1500
|
| 312 |
+
AUTOCOMPLETE_FILE_LIMIT = 200
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
ATTACH_FILE_MAX_BYTES = 8192
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def read_workspace_file(
|
| 319 |
+
workspace: str,
|
| 320 |
+
path: str,
|
| 321 |
+
*,
|
| 322 |
+
max_bytes: int = ATTACH_FILE_MAX_BYTES,
|
| 323 |
+
rust: RustSession | None = None,
|
| 324 |
+
) -> str | None:
|
| 325 |
+
"""Read a workspace file for @-attachment inlining. Returns None if missing."""
|
| 326 |
+
if _rust is None:
|
| 327 |
+
return None
|
| 328 |
+
try:
|
| 329 |
+
session = rust if rust is not None else RustSession(workspace=workspace, yolo=True)
|
| 330 |
+
content = session._session.read_file(path)
|
| 331 |
+
if content is None:
|
| 332 |
+
return None
|
| 333 |
+
if len(content) > max_bytes:
|
| 334 |
+
return content[:max_bytes] + "\n… (truncated)"
|
| 335 |
+
return content
|
| 336 |
+
except Exception:
|
| 337 |
+
return None
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
def workspace_paths(workspace: str, *, limit: int = UI_FILE_LIMIT) -> tuple[list[str], int]:
|
| 341 |
+
"""Workspace paths for UI sidebars (no file reads). Returns (paths, total_count)."""
|
| 342 |
+
if _rust is None:
|
| 343 |
+
return [], 0
|
| 344 |
+
session = RustSession(workspace=workspace, yolo=True)
|
| 345 |
+
paths = sorted(session._session.workspace_files())
|
| 346 |
+
total = len(paths)
|
| 347 |
+
if total > limit:
|
| 348 |
+
paths = paths[:limit]
|
| 349 |
+
return paths, total
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def workspace_files(workspace: str) -> dict[str, str]:
|
| 353 |
+
session = RustSession(workspace=workspace, yolo=True)
|
| 354 |
+
return session.files()
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def export_transcript(session_id: str, path: str | None = None) -> str:
|
| 358 |
+
if _rust is None:
|
| 359 |
+
raise RuntimeError("smolcode_core not installed")
|
| 360 |
+
return _rust.export_transcript(session_id, path)
|
| 361 |
+
|
| 362 |
+
|
| 363 |
+
def session_timeline(session_id: str) -> list[str]:
|
| 364 |
+
if _rust is None:
|
| 365 |
+
return []
|
| 366 |
+
return _rust.session_timeline(session_id)
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def get_session_chat(session_id: str) -> list[dict[str, str]]:
|
| 370 |
+
if _rust is None:
|
| 371 |
+
return []
|
| 372 |
+
return _rust.get_session_chat(session_id)
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def chat_from_stored(lines: list[dict[str, str]]) -> list[dict[str, str]]:
|
| 376 |
+
"""Convert stored session lines to Gradio chat messages."""
|
| 377 |
+
out: list[dict[str, str]] = []
|
| 378 |
+
for m in lines:
|
| 379 |
+
role = m.get("role", "assistant")
|
| 380 |
+
text = m.get("text", "")
|
| 381 |
+
if role == "user":
|
| 382 |
+
out.append({"role": "user", "content": text})
|
| 383 |
+
else:
|
| 384 |
+
out.append({"role": "assistant", "content": text})
|
| 385 |
+
return out
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
def session_choices() -> list[str]:
|
| 389 |
+
"""Dropdown labels: `title (id)`."""
|
| 390 |
+
return [
|
| 391 |
+
f"{r['title']} ({r['id']})"
|
| 392 |
+
for r in RustSession.list_sessions()
|
| 393 |
+
]
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
def parse_session_label(label: str) -> str | None:
|
| 397 |
+
if not label or "(" not in label:
|
| 398 |
+
return None
|
| 399 |
+
return label.rsplit("(", 1)[-1].rstrip(")")
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def load_rust_config(
|
| 403 |
+
*,
|
| 404 |
+
model: str | None = None,
|
| 405 |
+
base_url: str | None = None,
|
| 406 |
+
api_key: str | None = None,
|
| 407 |
+
agent: str | None = None,
|
| 408 |
+
yolo: bool = False,
|
| 409 |
+
) -> dict[str, Any]:
|
| 410 |
+
"""Load layered config.toml via Rust Config."""
|
| 411 |
+
if _rust is None:
|
| 412 |
+
return {}
|
| 413 |
+
cfg = _rust.Config.load(
|
| 414 |
+
model=model,
|
| 415 |
+
base_url=base_url,
|
| 416 |
+
api_key=api_key,
|
| 417 |
+
agent=agent,
|
| 418 |
+
yolo=yolo,
|
| 419 |
+
)
|
| 420 |
+
return {
|
| 421 |
+
"model": cfg.model,
|
| 422 |
+
"base_url": cfg.base_url,
|
| 423 |
+
"agent": cfg.agent,
|
| 424 |
+
"yolo": cfg.yolo,
|
| 425 |
+
}
|
engine/sandbox.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Execution sandbox for model-generated code.
|
| 2 |
+
|
| 3 |
+
This is the agentic core's "hands": it runs code the model writes and reports
|
| 4 |
+
back stdout/stderr/exit so the agent can iterate to green.
|
| 5 |
+
|
| 6 |
+
SECURITY: model-generated code is untrusted. The default here is a *soft*
|
| 7 |
+
sandbox — a subprocess with a wall-clock timeout, a scratch working directory,
|
| 8 |
+
and output caps. It is adequate for local/laptop use. Before exposing a public
|
| 9 |
+
HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an
|
| 10 |
+
e2b/Docker microVM); the interface below does not change.
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import os
|
| 15 |
+
import shutil
|
| 16 |
+
import subprocess
|
| 17 |
+
import tempfile
|
| 18 |
+
from dataclasses import dataclass
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
DEFAULT_TIMEOUT = 20 # seconds
|
| 22 |
+
MAX_OUTPUT = 20_000 # chars per stream, to keep the LLM context bounded
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class RunResult:
|
| 27 |
+
ok: bool
|
| 28 |
+
stdout: str
|
| 29 |
+
stderr: str
|
| 30 |
+
exit_code: int
|
| 31 |
+
timed_out: bool = False
|
| 32 |
+
|
| 33 |
+
def as_tool_payload(self) -> dict:
|
| 34 |
+
"""Compact dict handed back to the LLM as the tool result."""
|
| 35 |
+
return {
|
| 36 |
+
"ok": self.ok,
|
| 37 |
+
"exit_code": self.exit_code,
|
| 38 |
+
"timed_out": self.timed_out,
|
| 39 |
+
"stdout": _clip(self.stdout),
|
| 40 |
+
"stderr": _clip(self.stderr),
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _clip(s: str, limit: int = MAX_OUTPUT) -> str:
|
| 45 |
+
if len(s) <= limit:
|
| 46 |
+
return s
|
| 47 |
+
return s[:limit] + f"\n...[truncated {len(s) - limit} chars]"
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class Workspace:
|
| 51 |
+
"""A scratch directory the agent reads/writes/executes within.
|
| 52 |
+
|
| 53 |
+
All file tools are confined to this directory; paths are resolved and
|
| 54 |
+
checked so the model cannot escape via `..` or absolute paths.
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
def __init__(self, root: str | None = None) -> None:
|
| 58 |
+
self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-"))
|
| 59 |
+
self.root.mkdir(parents=True, exist_ok=True)
|
| 60 |
+
|
| 61 |
+
# --- path safety -----------------------------------------------------
|
| 62 |
+
def _resolve(self, rel: str) -> Path:
|
| 63 |
+
p = (self.root / rel).resolve()
|
| 64 |
+
if not str(p).startswith(str(self.root.resolve())):
|
| 65 |
+
raise ValueError(f"path escapes workspace: {rel!r}")
|
| 66 |
+
return p
|
| 67 |
+
|
| 68 |
+
# --- file ops --------------------------------------------------------
|
| 69 |
+
def write_file(self, path: str, content: str) -> dict:
|
| 70 |
+
p = self._resolve(path)
|
| 71 |
+
p.parent.mkdir(parents=True, exist_ok=True)
|
| 72 |
+
p.write_text(content)
|
| 73 |
+
return {"ok": True, "path": path, "bytes": len(content.encode())}
|
| 74 |
+
|
| 75 |
+
def read_file(self, path: str) -> dict:
|
| 76 |
+
p = self._resolve(path)
|
| 77 |
+
if not p.exists():
|
| 78 |
+
return {"ok": False, "error": "not found", "path": path}
|
| 79 |
+
return {"ok": True, "path": path, "content": _clip(p.read_text())}
|
| 80 |
+
|
| 81 |
+
def list_files(self) -> list[str]:
|
| 82 |
+
return sorted(
|
| 83 |
+
str(p.relative_to(self.root))
|
| 84 |
+
for p in self.root.rglob("*")
|
| 85 |
+
if p.is_file()
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
# --- execution -------------------------------------------------------
|
| 89 |
+
def run_python(self, code: str | None = None, path: str | None = None,
|
| 90 |
+
timeout: int = DEFAULT_TIMEOUT) -> RunResult:
|
| 91 |
+
if path:
|
| 92 |
+
target = self._resolve(path)
|
| 93 |
+
argv = ["python3", str(target)]
|
| 94 |
+
else:
|
| 95 |
+
f = self._resolve("_snippet.py")
|
| 96 |
+
f.write_text(code or "")
|
| 97 |
+
argv = ["python3", str(f)]
|
| 98 |
+
return self._run(argv, timeout)
|
| 99 |
+
|
| 100 |
+
def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
|
| 101 |
+
# pytest if available, falling back to unittest discovery.
|
| 102 |
+
argv = ["python3", "-m", "pytest", "-q"]
|
| 103 |
+
return self._run(argv, timeout)
|
| 104 |
+
|
| 105 |
+
def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
|
| 106 |
+
"""Run a shell command in the workspace (login shell for full PATH).
|
| 107 |
+
|
| 108 |
+
Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the
|
| 109 |
+
same way run_python checks Python. Mirrors the Rust agent's run_shell and the
|
| 110 |
+
eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`.
|
| 111 |
+
"""
|
| 112 |
+
return self._run(["bash", "-lc", command], timeout)
|
| 113 |
+
|
| 114 |
+
def _run(self, argv: list[str], timeout: int) -> RunResult:
|
| 115 |
+
env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}
|
| 116 |
+
try:
|
| 117 |
+
proc = subprocess.run(
|
| 118 |
+
argv,
|
| 119 |
+
cwd=self.root,
|
| 120 |
+
env=env,
|
| 121 |
+
capture_output=True,
|
| 122 |
+
text=True,
|
| 123 |
+
timeout=timeout,
|
| 124 |
+
)
|
| 125 |
+
return RunResult(
|
| 126 |
+
ok=proc.returncode == 0,
|
| 127 |
+
stdout=proc.stdout,
|
| 128 |
+
stderr=proc.stderr,
|
| 129 |
+
exit_code=proc.returncode,
|
| 130 |
+
)
|
| 131 |
+
except subprocess.TimeoutExpired as e:
|
| 132 |
+
return RunResult(
|
| 133 |
+
ok=False,
|
| 134 |
+
stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""),
|
| 135 |
+
stderr=f"timed out after {timeout}s",
|
| 136 |
+
exit_code=124,
|
| 137 |
+
timed_out=True,
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
def cleanup(self) -> None:
|
| 141 |
+
shutil.rmtree(self.root, ignore_errors=True)
|
engine/themes.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Web UI color themes aligned with the CLI TUI palettes."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass(frozen=True)
|
| 8 |
+
class WebTheme:
|
| 9 |
+
name: str
|
| 10 |
+
bg: str
|
| 11 |
+
panel: str
|
| 12 |
+
bg_alt: str
|
| 13 |
+
accent: str
|
| 14 |
+
fg: str
|
| 15 |
+
dim: str
|
| 16 |
+
ok: str
|
| 17 |
+
tool: str
|
| 18 |
+
border: str
|
| 19 |
+
hf_yellow: str = "#FFD21E"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
WEB_THEMES: list[WebTheme] = [
|
| 23 |
+
WebTheme("smol-dark", "#0b1020", "#111827", "#1e293b", "#7c3aed", "#e2e8f0", "#64748b", "#34d399", "#a78bfa", "#334155"),
|
| 24 |
+
WebTheme("tokyo", "#1a1b26", "#24283b", "#1f2335", "#7dcfff", "#c0caf5", "#565f89", "#bb9af7", "#7dcfff", "#414868"),
|
| 25 |
+
WebTheme("gruvbox", "#282828", "#32302f", "#3c3836", "#fe8019", "#ebdbb2", "#928374", "#b8bb26", "#83a598", "#504945"),
|
| 26 |
+
WebTheme("mono", "#161616", "#1e1e1e", "#222222", "#e0e0e0", "#c0c0c0", "#707070", "#ffffff", "#a0a0a0", "#404040"),
|
| 27 |
+
WebTheme("catppuccin", "#1e1e2e", "#313244", "#313244", "#cba6f7", "#cdd6f4", "#6c7086", "#a6e3a1", "#89b4fa", "#45475a"),
|
| 28 |
+
WebTheme("nord", "#2e3440", "#3b4252", "#3b4252", "#88c0d0", "#eceff4", "#4c566a", "#a3be8c", "#81a1c1", "#3b4252"),
|
| 29 |
+
WebTheme("dracula", "#282a36", "#44475a", "#282a36", "#bd93f9", "#f8f8f2", "#6272a4", "#50fa7b", "#8be9fd", "#44475a"),
|
| 30 |
+
WebTheme("solarized", "#002b36", "#073642", "#073642", "#268bd2", "#839496", "#586e75", "#859900", "#2aa198", "#073642"),
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def theme_names() -> list[str]:
|
| 35 |
+
return [t.name for t in WEB_THEMES]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def theme_by_name(name: str) -> WebTheme:
|
| 39 |
+
for t in WEB_THEMES:
|
| 40 |
+
if t.name == name:
|
| 41 |
+
return t
|
| 42 |
+
return WEB_THEMES[0]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def theme_at(index: int) -> WebTheme:
|
| 46 |
+
return WEB_THEMES[index % len(WEB_THEMES)]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def theme_css_vars() -> str:
|
| 50 |
+
"""Per-theme CSS variable overrides for .sc-tui-shell[data-theme=...]."""
|
| 51 |
+
blocks: list[str] = []
|
| 52 |
+
for t in WEB_THEMES:
|
| 53 |
+
blocks.append(
|
| 54 |
+
f'.sc-tui-shell[data-theme="{t.name}"] {{'
|
| 55 |
+
f" --sc-bg:{t.bg}; --sc-panel:{t.panel}; --sc-bg-alt:{t.bg_alt};"
|
| 56 |
+
f" --sc-accent:{t.accent}; --sc-fg:{t.fg}; --sc-dim:{t.dim};"
|
| 57 |
+
f" --sc-ok:{t.ok}; --sc-tool:{t.tool}; --sc-border:{t.border};"
|
| 58 |
+
f" --hf-yellow:{t.hf_yellow}; }}"
|
| 59 |
+
)
|
| 60 |
+
return "\n".join(blocks)
|
engine/tools.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Coding tools exposed to the LiteForge agent.
|
| 2 |
+
|
| 3 |
+
Each tool is a Python callable registered via `liteforge.create_tool`. The agent
|
| 4 |
+
(running in Rust) decides when to call them; LiteForge invokes the callable with
|
| 5 |
+
a single `dict` of arguments and feeds the returned JSON-able dict back to the
|
| 6 |
+
model. All file/exec tools are confined to one `Workspace`.
|
| 7 |
+
|
| 8 |
+
Tool surface (kept deliberately small so a 3B model can use it reliably):
|
| 9 |
+
write_file(path, content) -> create/overwrite a file
|
| 10 |
+
read_file(path) -> read a file back
|
| 11 |
+
list_files() -> list workspace files
|
| 12 |
+
run_python(path) -> execute a file, return stdout/stderr/exit
|
| 13 |
+
run_tests() -> run pytest in the workspace
|
| 14 |
+
"""
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
+
import liteforge as lf
|
| 18 |
+
|
| 19 |
+
from . import browsercheck
|
| 20 |
+
from .preview import inline_app
|
| 21 |
+
from .sandbox import Workspace
|
| 22 |
+
from .trace_collector import TraceCollector
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _wrap(name: str, fn, collector: TraceCollector | None):
|
| 26 |
+
if collector is None:
|
| 27 |
+
return fn
|
| 28 |
+
|
| 29 |
+
def wrapped(args: dict):
|
| 30 |
+
collector.record_tool_call(name, args)
|
| 31 |
+
result = fn(args)
|
| 32 |
+
collector.record_tool_result(name, result)
|
| 33 |
+
return result
|
| 34 |
+
|
| 35 |
+
return wrapped
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# Tool names in the order _tools() returns them — lets a registry select a
|
| 39 |
+
# subset by name without relying on attributes of the opaque lf tool object.
|
| 40 |
+
_TOOL_ORDER = ("write_file", "read_file", "list_files", "run_python", "run_tests")
|
| 41 |
+
|
| 42 |
+
# Tools the web builder needs. Static apps are "verified" by rendering, not by
|
| 43 |
+
# running Python, so we drop run_python/run_tests — a smaller, less confusing
|
| 44 |
+
# surface for a 3B model that should be writing HTML, not spawning processes.
|
| 45 |
+
_WEB_TOOLS = ("write_file", "read_file", "list_files")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _registry(workspace: Workspace, names, collector: TraceCollector | None = None) -> lf.ToolRegistry:
|
| 49 |
+
reg = lf.ToolRegistry()
|
| 50 |
+
for name, tool in zip(_TOOL_ORDER, _tools(workspace, collector)):
|
| 51 |
+
if name in names:
|
| 52 |
+
reg.register(tool)
|
| 53 |
+
return reg
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def build_registry(workspace: Workspace, collector: TraceCollector | None = None) -> lf.ToolRegistry:
|
| 57 |
+
"""Return a ToolRegistry of all coding tools bound to `workspace`."""
|
| 58 |
+
return _registry(workspace, _TOOL_ORDER, collector)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def build_web_registry(workspace: Workspace, collector: TraceCollector | None = None) -> lf.ToolRegistry:
|
| 62 |
+
"""Return the smolbuilder web agent's tools: file ops + a headless app check."""
|
| 63 |
+
reg = _registry(workspace, _WEB_TOOLS, collector)
|
| 64 |
+
reg.register(_check_app_tool(workspace, collector))
|
| 65 |
+
return reg
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def check_app_impl(ws: Workspace, collector: TraceCollector | None, args: dict) -> dict:
|
| 69 |
+
"""Run check_app logic (shared by LiteForge tool and Rust python callback)."""
|
| 70 |
+
if not any(f == "index.html" for f in ws.list_files()):
|
| 71 |
+
return {"ok": False,
|
| 72 |
+
"errors": ["index.html not found: create it first with write_file."]}
|
| 73 |
+
files = {}
|
| 74 |
+
for rel in ws.list_files():
|
| 75 |
+
r = ws.read_file(rel)
|
| 76 |
+
if r.get("ok"):
|
| 77 |
+
files[rel] = r["content"]
|
| 78 |
+
ok, errors = browsercheck.check_html(inline_app(files))
|
| 79 |
+
if ok is None:
|
| 80 |
+
return {"ok": True, "errors": [],
|
| 81 |
+
"note": "runtime check unavailable here; assuming ok"}
|
| 82 |
+
if ok:
|
| 83 |
+
return {"ok": True, "errors": [],
|
| 84 |
+
"message": "The app loads and every button works."}
|
| 85 |
+
return {"ok": False, "errors": errors,
|
| 86 |
+
"hint": "Fix these JavaScript errors in index.html, then call check_app again."}
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _check_app_tool(ws: Workspace, collector: TraceCollector | None = None):
|
| 90 |
+
"""A `check_app` tool: actually run the built app and report JS errors."""
|
| 91 |
+
def check_app(args: dict) -> dict:
|
| 92 |
+
return check_app_impl(ws, collector, args)
|
| 93 |
+
|
| 94 |
+
check_app = _wrap("check_app", check_app, collector)
|
| 95 |
+
|
| 96 |
+
return lf.create_tool(
|
| 97 |
+
"check_app",
|
| 98 |
+
"Run the current web app in a headless browser: load index.html, execute "
|
| 99 |
+
"its JavaScript, click every button, and report any errors. Use this to "
|
| 100 |
+
"verify the app actually works before finishing.",
|
| 101 |
+
{"type": "object", "properties": {}},
|
| 102 |
+
check_app,
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _tools(ws: Workspace, collector: TraceCollector | None = None) -> list:
|
| 107 |
+
def write_file(args: dict) -> dict:
|
| 108 |
+
return ws.write_file(args["path"], args.get("content", ""))
|
| 109 |
+
|
| 110 |
+
def read_file(args: dict) -> dict:
|
| 111 |
+
return ws.read_file(args["path"])
|
| 112 |
+
|
| 113 |
+
def list_files(args: dict) -> dict:
|
| 114 |
+
return {"ok": True, "files": ws.list_files()}
|
| 115 |
+
|
| 116 |
+
def run_python(args: dict) -> dict:
|
| 117 |
+
return ws.run_python(path=args["path"]).as_tool_payload()
|
| 118 |
+
|
| 119 |
+
def run_tests(args: dict) -> dict:
|
| 120 |
+
return ws.run_tests().as_tool_payload()
|
| 121 |
+
|
| 122 |
+
write_file = _wrap("write_file", write_file, collector)
|
| 123 |
+
read_file = _wrap("read_file", read_file, collector)
|
| 124 |
+
list_files = _wrap("list_files", list_files, collector)
|
| 125 |
+
run_python = _wrap("run_python", run_python, collector)
|
| 126 |
+
run_tests = _wrap("run_tests", run_tests, collector)
|
| 127 |
+
|
| 128 |
+
return [
|
| 129 |
+
lf.create_tool(
|
| 130 |
+
"write_file",
|
| 131 |
+
"Create or overwrite a file in the workspace with the given text content.",
|
| 132 |
+
{
|
| 133 |
+
"type": "object",
|
| 134 |
+
"properties": {
|
| 135 |
+
"path": {"type": "string", "description": "Relative path, e.g. main.py"},
|
| 136 |
+
"content": {"type": "string", "description": "Full file contents"},
|
| 137 |
+
},
|
| 138 |
+
"required": ["path", "content"],
|
| 139 |
+
},
|
| 140 |
+
write_file,
|
| 141 |
+
),
|
| 142 |
+
lf.create_tool(
|
| 143 |
+
"read_file",
|
| 144 |
+
"Read a file from the workspace and return its contents.",
|
| 145 |
+
{
|
| 146 |
+
"type": "object",
|
| 147 |
+
"properties": {"path": {"type": "string"}},
|
| 148 |
+
"required": ["path"],
|
| 149 |
+
},
|
| 150 |
+
read_file,
|
| 151 |
+
),
|
| 152 |
+
lf.create_tool(
|
| 153 |
+
"list_files",
|
| 154 |
+
"List all files currently in the workspace.",
|
| 155 |
+
{"type": "object", "properties": {}},
|
| 156 |
+
list_files,
|
| 157 |
+
),
|
| 158 |
+
lf.create_tool(
|
| 159 |
+
"run_python",
|
| 160 |
+
"Run a Python file in the workspace. Returns stdout, stderr and exit code.",
|
| 161 |
+
{
|
| 162 |
+
"type": "object",
|
| 163 |
+
"properties": {"path": {"type": "string", "description": "File to run, e.g. main.py"}},
|
| 164 |
+
"required": ["path"],
|
| 165 |
+
},
|
| 166 |
+
run_python,
|
| 167 |
+
),
|
| 168 |
+
lf.create_tool(
|
| 169 |
+
"run_tests",
|
| 170 |
+
"Run the test suite (pytest) in the workspace. Returns pass/fail output.",
|
| 171 |
+
{"type": "object", "properties": {}},
|
| 172 |
+
run_tests,
|
| 173 |
+
),
|
| 174 |
+
]
|
engine/trace.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shareable agent traces (Build Small "Sharing is Caring" badge).
|
| 2 |
+
|
| 3 |
+
Turns a completed smolcode run into an OpenTelemetry-style JSON trace: a root
|
| 4 |
+
span minted by LiteForge's `Tracer` plus one child span per agent step, carrying
|
| 5 |
+
the step kind, duration, and token counts read from `AgentStep`. Publish a trace
|
| 6 |
+
file to the Hub so others can see exactly how the tiny model reasoned.
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import time
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
import liteforge as lf
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def build_trace(agent, task: str, final: str, *, preset: str, model: str) -> dict:
|
| 18 |
+
"""Build an OTel-ish trace document from a finished agent run."""
|
| 19 |
+
tracer = lf.Tracer("smolcode")
|
| 20 |
+
root = tracer.start_span("coding_task")
|
| 21 |
+
root.set_attribute("preset", preset)
|
| 22 |
+
root.set_attribute("model", model)
|
| 23 |
+
root.set_attribute("task", task)
|
| 24 |
+
trace_id = root.context.trace_id
|
| 25 |
+
root_id = root.context.span_id
|
| 26 |
+
|
| 27 |
+
spans: list[dict] = []
|
| 28 |
+
total_tokens = 0
|
| 29 |
+
history = agent.raw_history() if hasattr(agent, "raw_history") else getattr(agent, "history", lambda: [])()
|
| 30 |
+
for i, s in enumerate(history):
|
| 31 |
+
dur = getattr(s, "duration_ms", None) or 0
|
| 32 |
+
tot = getattr(s, "total_tokens", None) or 0
|
| 33 |
+
step_no = getattr(s, "step_number", getattr(s, "number", i))
|
| 34 |
+
step_type = getattr(s, "step_type", getattr(s, "kind", "step"))
|
| 35 |
+
result_text = getattr(s, "result", getattr(s, "detail", ""))
|
| 36 |
+
total_tokens += tot or 0
|
| 37 |
+
spans.append({
|
| 38 |
+
"trace_id": trace_id,
|
| 39 |
+
"span_id": f"{root_id[:-len(str(step_no))-1]}{step_no:02d}",
|
| 40 |
+
"parent_span_id": root_id,
|
| 41 |
+
"name": str(step_type),
|
| 42 |
+
"duration_ms": dur,
|
| 43 |
+
"attributes": {
|
| 44 |
+
"step_number": step_no,
|
| 45 |
+
"prompt_tokens": getattr(s, "prompt_tokens", None),
|
| 46 |
+
"completion_tokens": getattr(s, "completion_tokens", None),
|
| 47 |
+
"total_tokens": tot,
|
| 48 |
+
"result": str(result_text)[:200],
|
| 49 |
+
},
|
| 50 |
+
})
|
| 51 |
+
root.end()
|
| 52 |
+
|
| 53 |
+
return {
|
| 54 |
+
"trace_id": trace_id,
|
| 55 |
+
"service": "smolcode",
|
| 56 |
+
"preset": preset,
|
| 57 |
+
"model": model,
|
| 58 |
+
"task": task,
|
| 59 |
+
"final": final,
|
| 60 |
+
"n_steps": len(spans),
|
| 61 |
+
"total_tokens": total_tokens,
|
| 62 |
+
"root": {"span_id": root_id, "name": "coding_task"},
|
| 63 |
+
"spans": spans,
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def save_trace(trace: dict, out_dir: str | Path = "traces") -> Path:
|
| 68 |
+
d = Path(out_dir)
|
| 69 |
+
d.mkdir(parents=True, exist_ok=True)
|
| 70 |
+
stamp = time.strftime("%Y%m%d-%H%M%S")
|
| 71 |
+
path = d / f"trace-{stamp}.json"
|
| 72 |
+
path.write_text(json.dumps(trace, indent=2))
|
| 73 |
+
return path
|
engine/trace_collector.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Append-only trace event log for live UI updates.
|
| 2 |
+
|
| 3 |
+
Tool call args/results are captured by wrapping LiteForge tool callables.
|
| 4 |
+
LiteForge's agent history only exposes step kinds, not tool I/O.
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
import re
|
| 10 |
+
from dataclasses import dataclass, field
|
| 11 |
+
from typing import Any, Literal
|
| 12 |
+
|
| 13 |
+
TraceKind = Literal["tool_call", "tool_result", "tier_escalation", "final", "error"]
|
| 14 |
+
|
| 15 |
+
_REDACTED = "[REDACTED]"
|
| 16 |
+
_PREFIXES = ("sk-", "ghp_", "gho_", "ghs_", "github_pat_", "xoxb-", "xoxp-", "AKIA", "AIza", "glpat-")
|
| 17 |
+
_SENSITIVE_KEYS = (
|
| 18 |
+
"api_key", "apikey", "token", "secret", "password", "passwd",
|
| 19 |
+
"access_key", "client_secret", "private_key",
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class TraceEvent:
|
| 25 |
+
kind: TraceKind
|
| 26 |
+
name: str
|
| 27 |
+
detail: str
|
| 28 |
+
step: int | None = None
|
| 29 |
+
duration_ms: int | None = None
|
| 30 |
+
tokens: int | None = None
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class TraceCollector:
|
| 35 |
+
"""Thread-safe enough for asyncio single-task agent runs."""
|
| 36 |
+
|
| 37 |
+
events: list[TraceEvent] = field(default_factory=list)
|
| 38 |
+
_tool_step: int = 0
|
| 39 |
+
|
| 40 |
+
def record(self, kind: TraceKind, name: str, detail: str, **meta) -> None:
|
| 41 |
+
self.events.append(TraceEvent(kind=kind, name=name, detail=detail, **meta))
|
| 42 |
+
|
| 43 |
+
def record_tool_call(self, name: str, args: dict[str, Any]) -> None:
|
| 44 |
+
self.record("tool_call", name, _format_payload(args), step=self._tool_step)
|
| 45 |
+
|
| 46 |
+
def record_tool_result(self, name: str, result: dict[str, Any]) -> None:
|
| 47 |
+
self.record("tool_result", name, _format_payload(result), step=self._tool_step)
|
| 48 |
+
self._tool_step += 1
|
| 49 |
+
|
| 50 |
+
def record_escalation(self, from_tier: str, to_tier: str) -> None:
|
| 51 |
+
self.record("tier_escalation", to_tier, f"escalated from {from_tier}")
|
| 52 |
+
|
| 53 |
+
def record_final(self, text: str) -> None:
|
| 54 |
+
self.record("final", "response", redact(text))
|
| 55 |
+
|
| 56 |
+
def record_error(self, text: str) -> None:
|
| 57 |
+
self.record("error", "error", redact(text))
|
| 58 |
+
|
| 59 |
+
def snapshot(self) -> list[TraceEvent]:
|
| 60 |
+
return list(self.events)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def redact(text: str) -> str:
|
| 64 |
+
"""Conservative secret redaction for UI display."""
|
| 65 |
+
lines = []
|
| 66 |
+
for line in text.splitlines(keepends=True):
|
| 67 |
+
content, nl = (line[:-1], "\n") if line.endswith("\n") else (line, "")
|
| 68 |
+
lines.append(_redact_line(content) + nl)
|
| 69 |
+
return "".join(lines)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _redact_line(line: str) -> str:
|
| 73 |
+
out: list[str] = []
|
| 74 |
+
i = 0
|
| 75 |
+
while i < len(line):
|
| 76 |
+
ch = line[i]
|
| 77 |
+
if ch in "\"'`" or ch.isalnum() or ch in "_-":
|
| 78 |
+
j = i
|
| 79 |
+
while j < len(line) and not line[j].isspace() and line[j] not in ",;)]}":
|
| 80 |
+
j += 1
|
| 81 |
+
token = line[i:j]
|
| 82 |
+
if _looks_secret(token):
|
| 83 |
+
out.append(_REDACTED)
|
| 84 |
+
else:
|
| 85 |
+
out.append(token)
|
| 86 |
+
i = j
|
| 87 |
+
continue
|
| 88 |
+
if ch == "=" and i + 1 < len(line):
|
| 89 |
+
key_start = i
|
| 90 |
+
while key_start > 0 and (line[key_start - 1].isalnum() or line[key_start - 1] in "_-"):
|
| 91 |
+
key_start -= 1
|
| 92 |
+
key = line[key_start:i].lower()
|
| 93 |
+
if any(s in key for s in _SENSITIVE_KEYS):
|
| 94 |
+
out.append(line[i : i + 1])
|
| 95 |
+
i += 1
|
| 96 |
+
j = i
|
| 97 |
+
while j < len(line) and not line[j].isspace():
|
| 98 |
+
j += 1
|
| 99 |
+
out.append(_REDACTED)
|
| 100 |
+
i = j
|
| 101 |
+
continue
|
| 102 |
+
out.append(ch)
|
| 103 |
+
i += 1
|
| 104 |
+
return "".join(out)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _looks_secret(token: str) -> bool:
|
| 108 |
+
for prefix in _PREFIXES:
|
| 109 |
+
if token.startswith(prefix) and len(token) >= len(prefix) + 8:
|
| 110 |
+
return True
|
| 111 |
+
if len(token) >= 32 and re.fullmatch(r"[A-Za-z0-9_\-+/=]+", token):
|
| 112 |
+
upper = sum(1 for c in token if c.isupper())
|
| 113 |
+
lower = sum(1 for c in token if c.islower())
|
| 114 |
+
digit = sum(1 for c in token if c.isdigit())
|
| 115 |
+
if upper >= 4 and lower >= 4 and digit >= 2:
|
| 116 |
+
return True
|
| 117 |
+
return False
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def _format_payload(data: dict[str, Any], *, max_content: int = 600) -> str:
|
| 121 |
+
"""JSON-format tool args/results, truncating large file content."""
|
| 122 |
+
out = dict(data)
|
| 123 |
+
if "content" in out and isinstance(out["content"], str):
|
| 124 |
+
text = out["content"]
|
| 125 |
+
if len(text) > max_content:
|
| 126 |
+
out["content"] = text[:max_content] + f"\n… ({len(text)} chars total)"
|
| 127 |
+
raw = json.dumps(out, indent=2, ensure_ascii=False)
|
| 128 |
+
return redact(raw)
|
engine/ui_trace.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Trace rendering for the Gradio web UI."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from .agent import Step
|
| 5 |
+
from .trace_collector import TraceEvent
|
| 6 |
+
|
| 7 |
+
_TOOL_ICON = {
|
| 8 |
+
"write_file": "✏️", "read_file": "📖", "list_files": "📂",
|
| 9 |
+
"run_python": "▶️", "run_tests": "🧪", "check_app": "🌐",
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def merge_step_metadata(events: list[TraceEvent], raw_history: list) -> list[TraceEvent]:
|
| 14 |
+
"""Attach LiteForge timing/token stats to tool_call events."""
|
| 15 |
+
if not raw_history:
|
| 16 |
+
return events
|
| 17 |
+
calls = [e for e in events if e.kind == "tool_call"]
|
| 18 |
+
merged: list[TraceEvent] = []
|
| 19 |
+
call_idx = 0
|
| 20 |
+
for ev in events:
|
| 21 |
+
if ev.kind != "tool_call" or call_idx >= len(raw_history):
|
| 22 |
+
merged.append(ev)
|
| 23 |
+
continue
|
| 24 |
+
step = raw_history[call_idx]
|
| 25 |
+
call_idx += 1
|
| 26 |
+
merged.append(TraceEvent(
|
| 27 |
+
kind=ev.kind, name=ev.name, detail=ev.detail, step=ev.step,
|
| 28 |
+
duration_ms=getattr(step, "duration_ms", None),
|
| 29 |
+
tokens=getattr(step, "total_tokens", None),
|
| 30 |
+
))
|
| 31 |
+
return merged
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def format_trace_md(
|
| 35 |
+
events: list[TraceEvent],
|
| 36 |
+
*,
|
| 37 |
+
steps: list[Step] | None = None,
|
| 38 |
+
max_detail: int = 500,
|
| 39 |
+
idle: str = "_waiting for the model…_",
|
| 40 |
+
) -> str:
|
| 41 |
+
"""Render trace events as markdown with expandable tool I/O."""
|
| 42 |
+
if not events and not steps:
|
| 43 |
+
return idle
|
| 44 |
+
if not events and steps:
|
| 45 |
+
return _steps_only_md(steps)
|
| 46 |
+
|
| 47 |
+
lines: list[str] = []
|
| 48 |
+
step_no = 0
|
| 49 |
+
i = 0
|
| 50 |
+
while i < len(events):
|
| 51 |
+
ev = events[i]
|
| 52 |
+
if ev.kind == "tool_call":
|
| 53 |
+
icon = _TOOL_ICON.get(ev.name, "🔧")
|
| 54 |
+
meta = _meta_badge(ev)
|
| 55 |
+
summary = f"`{step_no}` {icon} **{ev.name}**{meta}"
|
| 56 |
+
detail = _truncate(ev.detail, max_detail)
|
| 57 |
+
block = f"<details><summary>{summary}</summary>\n\n```json\n{detail}\n```\n</details>"
|
| 58 |
+
if i + 1 < len(events) and events[i + 1].kind == "tool_result":
|
| 59 |
+
result = _truncate(events[i + 1].detail, max_detail)
|
| 60 |
+
block += f"\n\n↳ result:\n\n```json\n{result}\n```"
|
| 61 |
+
i += 1
|
| 62 |
+
lines.append(block)
|
| 63 |
+
step_no += 1
|
| 64 |
+
elif ev.kind == "tier_escalation":
|
| 65 |
+
lines.append(f"⬆️ **escalated** → `{ev.name}`: {ev.detail}")
|
| 66 |
+
elif ev.kind == "final":
|
| 67 |
+
lines.append("✅ **final answer**")
|
| 68 |
+
elif ev.kind == "error":
|
| 69 |
+
lines.append(f"⚠️ **error**: {_truncate(ev.detail, max_detail)}")
|
| 70 |
+
i += 1
|
| 71 |
+
return "\n\n".join(lines) if lines else idle
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def format_fanout_trace_md(results) -> str:
|
| 75 |
+
"""Per-subagent expandable traces for fan-out mode."""
|
| 76 |
+
if not results:
|
| 77 |
+
return "_no subagents_"
|
| 78 |
+
blocks = []
|
| 79 |
+
for r in results:
|
| 80 |
+
events = getattr(r, "trace_events", None) or []
|
| 81 |
+
inner = format_trace_md(events, steps=r.steps, idle="_no steps yet_")
|
| 82 |
+
verdict = "✓ verified" if r.verified else ("⚠️ error" if r.error else "· unverified")
|
| 83 |
+
blocks.append(
|
| 84 |
+
f"<details><summary>`{r.index + 1}` **subagent** ({r.model}): "
|
| 85 |
+
f"{len(r.steps)} steps · {verdict}</summary>\n\n{inner}\n</details>"
|
| 86 |
+
)
|
| 87 |
+
return "\n\n".join(blocks)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def _steps_only_md(steps: list[Step]) -> str:
|
| 91 |
+
lines = []
|
| 92 |
+
for s in steps:
|
| 93 |
+
kind = s.kind
|
| 94 |
+
if kind.startswith("tool_call:"):
|
| 95 |
+
tool = kind.split(":", 1)[1]
|
| 96 |
+
icon = _TOOL_ICON.get(tool, "🔧")
|
| 97 |
+
meta = ""
|
| 98 |
+
if s.total_tokens:
|
| 99 |
+
meta = f" · {s.total_tokens} tok"
|
| 100 |
+
lines.append(f"`{s.number}` {icon} **{tool}**{meta}")
|
| 101 |
+
elif kind == "response":
|
| 102 |
+
lines.append("✅ **final answer**")
|
| 103 |
+
else:
|
| 104 |
+
lines.append(f"• {kind}")
|
| 105 |
+
return "\n\n".join(lines) if lines else "_waiting for the model…_"
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def _meta_badge(ev: TraceEvent) -> str:
|
| 109 |
+
parts = []
|
| 110 |
+
if ev.duration_ms is not None:
|
| 111 |
+
parts.append(f"{ev.duration_ms}ms")
|
| 112 |
+
if ev.tokens is not None:
|
| 113 |
+
parts.append(f"{ev.tokens} tok")
|
| 114 |
+
return f" <span class='trace-meta'>({', '.join(parts)})</span>" if parts else ""
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _truncate(text: str, limit: int) -> str:
|
| 118 |
+
text = text.strip()
|
| 119 |
+
if len(text) <= limit:
|
| 120 |
+
return text
|
| 121 |
+
return text[:limit] + f"\n… ({len(text)} chars total)"
|
engine/web_tui.py
ADDED
|
@@ -0,0 +1,471 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""CLI-shaped web UI: transcript buffer, HTML rendering, layout helpers."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import html
|
| 5 |
+
from dataclasses import dataclass, field
|
| 6 |
+
|
| 7 |
+
from .gradio_shell import UiSettings
|
| 8 |
+
from .rust_session import list_commands
|
| 9 |
+
from .themes import theme_at, theme_names
|
| 10 |
+
|
| 11 |
+
_BUILTIN_SLASH = [
|
| 12 |
+
"/help", "/mode", "/think", "/mcp", "/rules", "/skills", "/skill", "/bg",
|
| 13 |
+
"/init", "/new", "/sessions", "/rename", "/fork", "/delete", "/timeline",
|
| 14 |
+
"/stats", "/export", "/search", "/config", "/commit", "/agents", "/models",
|
| 15 |
+
"/themes", "/files", "/clear", "/quit",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
_KIND_STYLE = {
|
| 19 |
+
"user": ("›", "#e2e8f0", "#1e293b"),
|
| 20 |
+
"assistant": ("◆", "#c4b5fd", "#1e1b4b"),
|
| 21 |
+
"tool": ("⚙", "#a78bfa", "#0f172a"),
|
| 22 |
+
"result": ("·", "#94a3b8", "#0f172a"),
|
| 23 |
+
"info": ("·", "#94a3b8", "#0f172a"),
|
| 24 |
+
"error": ("✕", "#f87171", "#450a0a"),
|
| 25 |
+
"final": ("✓", "#34d399", "#052e16"),
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class TranscriptLine:
|
| 31 |
+
kind: str
|
| 32 |
+
text: str
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@dataclass
|
| 36 |
+
class Transcript:
|
| 37 |
+
lines: list[TranscriptLine] = field(default_factory=list)
|
| 38 |
+
partial: str = ""
|
| 39 |
+
|
| 40 |
+
def clear(self) -> None:
|
| 41 |
+
self.lines.clear()
|
| 42 |
+
self.partial = ""
|
| 43 |
+
|
| 44 |
+
def append(self, kind: str, text: str) -> None:
|
| 45 |
+
text = (text or "").strip()
|
| 46 |
+
if not text:
|
| 47 |
+
return
|
| 48 |
+
self.lines.append(TranscriptLine(kind=kind, text=text))
|
| 49 |
+
|
| 50 |
+
def append_user(self, text: str) -> None:
|
| 51 |
+
self.append("user", text)
|
| 52 |
+
|
| 53 |
+
def append_assistant(self, text: str) -> None:
|
| 54 |
+
self.append("assistant", text)
|
| 55 |
+
|
| 56 |
+
def append_info(self, text: str) -> None:
|
| 57 |
+
self.append("info", text)
|
| 58 |
+
|
| 59 |
+
def append_error(self, text: str) -> None:
|
| 60 |
+
self.append("error", text)
|
| 61 |
+
|
| 62 |
+
def append_tool_call(self, name: str, args: str) -> None:
|
| 63 |
+
self.append("tool", f"{name} {args[:200]}")
|
| 64 |
+
|
| 65 |
+
def append_tool_result(self, name: str, text: str) -> None:
|
| 66 |
+
clipped = text[:400] + ("…" if len(text) > 400 else "")
|
| 67 |
+
self.append("result", f"{name}: {clipped}")
|
| 68 |
+
|
| 69 |
+
def set_partial(self, text: str) -> None:
|
| 70 |
+
self.partial = text
|
| 71 |
+
|
| 72 |
+
def from_stored_chat(self, stored: list[dict[str, str]]) -> None:
|
| 73 |
+
self.clear()
|
| 74 |
+
for m in stored:
|
| 75 |
+
role = m.get("role", "assistant")
|
| 76 |
+
kind = "user" if role == "user" else "assistant"
|
| 77 |
+
self.append(kind, m.get("text", ""))
|
| 78 |
+
|
| 79 |
+
def append_final(self, text: str) -> None:
|
| 80 |
+
self.append("final", text)
|
| 81 |
+
|
| 82 |
+
def plain_texts(self) -> list[str]:
|
| 83 |
+
return [ln.text for ln in self.lines]
|
| 84 |
+
|
| 85 |
+
def search(self, query: str, limit: int = 20) -> list[str]:
|
| 86 |
+
if not query.strip():
|
| 87 |
+
return []
|
| 88 |
+
q = query.lower()
|
| 89 |
+
hits: list[str] = []
|
| 90 |
+
for ln in self.lines:
|
| 91 |
+
if q in ln.text.lower():
|
| 92 |
+
hits.append(f"[{ln.kind}] {ln.text[:120]}")
|
| 93 |
+
if len(hits) >= limit:
|
| 94 |
+
break
|
| 95 |
+
return hits
|
| 96 |
+
|
| 97 |
+
def render_html(self, *, running: bool = False) -> str:
|
| 98 |
+
if not self.lines and not self.partial and not running:
|
| 99 |
+
return (
|
| 100 |
+
'<div class="sc-transcript-wrap">'
|
| 101 |
+
'<div class="sc-transcript-empty">'
|
| 102 |
+
"smolcode — describe a coding task, or type <code>/help</code>"
|
| 103 |
+
"</div></div>"
|
| 104 |
+
)
|
| 105 |
+
parts: list[str] = ['<div class="sc-transcript-inner">']
|
| 106 |
+
for ln in self.lines:
|
| 107 |
+
parts.append(_line_html(ln.kind, ln.text))
|
| 108 |
+
if self.partial:
|
| 109 |
+
parts.append(_line_html("assistant", self.partial + "▏"))
|
| 110 |
+
if running and not self.partial:
|
| 111 |
+
parts.append('<div class="sc-tline sc-tline-info">· thinking…</div>')
|
| 112 |
+
parts.append("</div>")
|
| 113 |
+
return f'<div class="sc-transcript-wrap">\n' + "\n".join(parts) + "\n</div>"
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _line_html(kind: str, text: str) -> str:
|
| 117 |
+
glyph, color, _bg = _KIND_STYLE.get(kind, _KIND_STYLE["info"])
|
| 118 |
+
body = html.escape(text).replace("\n", "<br>")
|
| 119 |
+
return (
|
| 120 |
+
f'<div class="sc-tline sc-tline-{kind}">'
|
| 121 |
+
f'<span class="sc-tglyph" style="color:{color}">{glyph}</span> '
|
| 122 |
+
f'<span class="sc-ttext">{body}</span></div>'
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def slash_commands(workspace: str) -> list[str]:
|
| 127 |
+
custom = [f"/{n}" for n in list_commands(workspace)]
|
| 128 |
+
return _BUILTIN_SLASH + custom
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def filter_slash_commands(prefix: str, workspace: str) -> list[str]:
|
| 132 |
+
p = prefix if prefix.startswith("/") else f"/{prefix}"
|
| 133 |
+
return [c for c in slash_commands(workspace) if c.startswith(p)]
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def header_bar_html(
|
| 137 |
+
*,
|
| 138 |
+
git_branch: str = "",
|
| 139 |
+
git_dirty: bool = False,
|
| 140 |
+
model: str = "",
|
| 141 |
+
host: str = "",
|
| 142 |
+
theme: str = "default",
|
| 143 |
+
) -> str:
|
| 144 |
+
git_part = ""
|
| 145 |
+
if git_branch:
|
| 146 |
+
dirty = " ●" if git_dirty else ""
|
| 147 |
+
git_part = f'<span class="sc-hgit">⎇ {html.escape(git_branch)}{dirty}</span>'
|
| 148 |
+
model_part = html.escape(model) if model else "—"
|
| 149 |
+
host_part = html.escape(host) if host else ""
|
| 150 |
+
return (
|
| 151 |
+
'<div class="sc-header-bar">'
|
| 152 |
+
f'<span class="sc-hbrand">◆ smol<span class="hf-accent">code</span></span>'
|
| 153 |
+
f"{git_part}"
|
| 154 |
+
f'<span class="sc-hmodel">{model_part}</span>'
|
| 155 |
+
f'<span class="sc-hhost">@ {host_part}</span>'
|
| 156 |
+
f'<span class="sc-htheme">{html.escape(theme)}</span>'
|
| 157 |
+
"</div>"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def status_bar_html(
|
| 162 |
+
settings: UiSettings,
|
| 163 |
+
*,
|
| 164 |
+
session_title: str = "new session",
|
| 165 |
+
model: str = "",
|
| 166 |
+
running: bool = False,
|
| 167 |
+
) -> str:
|
| 168 |
+
mode = settings.mode.upper()
|
| 169 |
+
if settings.mode == "auto":
|
| 170 |
+
mode = "AUTO"
|
| 171 |
+
elif settings.mode == "plan":
|
| 172 |
+
mode = "PLAN"
|
| 173 |
+
else:
|
| 174 |
+
mode = "EDIT"
|
| 175 |
+
think = ""
|
| 176 |
+
if settings.think and settings.think != "off":
|
| 177 |
+
think = f'<span class="sc-chip sc-chip-think">think:{settings.think}</span>'
|
| 178 |
+
run = '<span class="sc-chip sc-chip-run">running</span>' if running else ""
|
| 179 |
+
ws = html.escape(settings.workspace[:48])
|
| 180 |
+
sess = html.escape(session_title[:32])
|
| 181 |
+
ag = html.escape(settings.agent)
|
| 182 |
+
mdl = html.escape(model or settings.model or "—")
|
| 183 |
+
return (
|
| 184 |
+
'<div class="sc-status-bar">'
|
| 185 |
+
f'<span class="sc-chip sc-chip-brand">smolcode</span>'
|
| 186 |
+
f'<span class="sc-chip">{sess}</span>'
|
| 187 |
+
f'<span class="sc-chip sc-chip-dim">{ws}</span>'
|
| 188 |
+
f'<button type="button" class="sc-chip sc-chip-clickable" data-picker="agents">{ag}</button>'
|
| 189 |
+
f'<button type="button" class="sc-chip sc-chip-clickable sc-chip-mode" data-action="cycle-mode">{mode}</button>'
|
| 190 |
+
f"{think}{run}"
|
| 191 |
+
f'<button type="button" class="sc-chip sc-chip-clickable sc-chip-model" data-picker="models">{mdl}</button>'
|
| 192 |
+
f'<button type="button" class="sc-chip sc-chip-clickable sc-chip-dim" data-picker="themes">theme</button>'
|
| 193 |
+
"</div>"
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def parse_git_header(git_text: str) -> tuple[str, bool]:
|
| 198 |
+
branch = ""
|
| 199 |
+
dirty = False
|
| 200 |
+
for line in git_text.splitlines():
|
| 201 |
+
if line.startswith("##"):
|
| 202 |
+
branch = line[2:].strip().split("...")[0]
|
| 203 |
+
if line.strip() and not line.startswith("#"):
|
| 204 |
+
dirty = True
|
| 205 |
+
return branch, dirty
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def host_from_url(base_url: str) -> str:
|
| 209 |
+
u = base_url.strip()
|
| 210 |
+
for prefix in ("https://", "http://"):
|
| 211 |
+
if u.startswith(prefix):
|
| 212 |
+
u = u[len(prefix):]
|
| 213 |
+
return u.split("/")[0] if u else ""
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def cycle_mode(current: str) -> str:
|
| 217 |
+
order = ["normal", "auto", "plan"]
|
| 218 |
+
try:
|
| 219 |
+
i = order.index(current)
|
| 220 |
+
except ValueError:
|
| 221 |
+
return "normal"
|
| 222 |
+
return order[(i + 1) % len(order)]
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def cycle_think(current: str) -> str:
|
| 226 |
+
order = ["off", "low", "high", "xtra"]
|
| 227 |
+
try:
|
| 228 |
+
i = order.index(current)
|
| 229 |
+
except ValueError:
|
| 230 |
+
return "off"
|
| 231 |
+
return order[(i + 1) % len(order)]
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def cycle_agent(current: str) -> str:
|
| 235 |
+
order = ["build", "plan"]
|
| 236 |
+
try:
|
| 237 |
+
i = order.index(current)
|
| 238 |
+
except ValueError:
|
| 239 |
+
return "build"
|
| 240 |
+
return order[(i + 1) % len(order)]
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def cycle_model(models: list[str], current: str) -> str:
|
| 244 |
+
if not models:
|
| 245 |
+
return current
|
| 246 |
+
try:
|
| 247 |
+
i = models.index(current)
|
| 248 |
+
except ValueError:
|
| 249 |
+
return models[0]
|
| 250 |
+
return models[(i + 1) % len(models)]
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def ingest_agent_event(transcript: Transcript, ev: dict) -> None:
|
| 254 |
+
kind = ev.get("kind")
|
| 255 |
+
if kind == "token":
|
| 256 |
+
transcript.set_partial(transcript.partial + ev.get("text", ""))
|
| 257 |
+
elif kind == "assistant":
|
| 258 |
+
transcript.set_partial(ev.get("text", ""))
|
| 259 |
+
elif kind == "tool_call":
|
| 260 |
+
transcript.set_partial("")
|
| 261 |
+
transcript.append_tool_call(ev.get("name", ""), ev.get("args", ""))
|
| 262 |
+
elif kind == "tool_result":
|
| 263 |
+
transcript.append_tool_result(ev.get("name", ""), ev.get("text", ""))
|
| 264 |
+
elif kind == "final":
|
| 265 |
+
transcript.set_partial("")
|
| 266 |
+
transcript.append_final(ev.get("text", ""))
|
| 267 |
+
elif kind == "error":
|
| 268 |
+
transcript.set_partial("")
|
| 269 |
+
transcript.append_error(ev.get("text", ""))
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def help_overlay_html() -> str:
|
| 273 |
+
lines = [
|
| 274 |
+
"Enter — run task",
|
| 275 |
+
"Shift+Enter — newline",
|
| 276 |
+
"/ — slash commands (Tab complete)",
|
| 277 |
+
"@ — attach file",
|
| 278 |
+
"! cmd — shell (no LLM)",
|
| 279 |
+
"Ctrl+L — clear transcript",
|
| 280 |
+
"Ctrl+X — leader key menu",
|
| 281 |
+
"Tab — cycle agent",
|
| 282 |
+
"Shift+Tab — cycle mode",
|
| 283 |
+
"F2 — cycle model",
|
| 284 |
+
"Esc — interrupt / close overlay",
|
| 285 |
+
]
|
| 286 |
+
body = "<br>".join(html.escape(ln) for ln in lines)
|
| 287 |
+
return f'<div class="sc-overlay-body"><b>smolcode keys</b><br><br>{body}</div>'
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def whichkey_overlay_html() -> str:
|
| 291 |
+
lines = [
|
| 292 |
+
"m models", "a agents", "t themes", "l sessions",
|
| 293 |
+
"n new session", "b sidebar", "s stats/files", "f focus files",
|
| 294 |
+
"h help", "o mode", "e think", "q quit",
|
| 295 |
+
]
|
| 296 |
+
body = "<br>".join(html.escape(ln) for ln in lines)
|
| 297 |
+
return f'<div class="sc-overlay-body"><b>ctrl+x leader</b><br><br>{body}</div>'
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def render_picker_html(
|
| 301 |
+
kind: str,
|
| 302 |
+
items: list[str],
|
| 303 |
+
selected: int,
|
| 304 |
+
*,
|
| 305 |
+
title: str | None = None,
|
| 306 |
+
) -> str:
|
| 307 |
+
"""TUI-style bordered picker list with scroll window."""
|
| 308 |
+
label = title or kind
|
| 309 |
+
if not items:
|
| 310 |
+
return (
|
| 311 |
+
f'<div class="sc-picker" data-kind="{html.escape(kind)}">'
|
| 312 |
+
f'<div class="sc-picker-title">{html.escape(label)}</div>'
|
| 313 |
+
'<div class="sc-picker-empty">(empty)</div></div>'
|
| 314 |
+
)
|
| 315 |
+
win = 12
|
| 316 |
+
sel = min(max(0, selected), len(items) - 1)
|
| 317 |
+
start = max(0, sel - win // 2)
|
| 318 |
+
end = min(len(items), start + win)
|
| 319 |
+
start = max(0, end - win)
|
| 320 |
+
rows: list[str] = []
|
| 321 |
+
for i in range(start, end):
|
| 322 |
+
item = items[i]
|
| 323 |
+
marker = "❯" if i == sel else " "
|
| 324 |
+
cls = "sc-picker-item sc-picker-sel" if i == sel else "sc-picker-item"
|
| 325 |
+
rows.append(
|
| 326 |
+
f'<button type="button" class="{cls}" data-idx="{i}" '
|
| 327 |
+
f'onclick="window.__smolcodePick && window.__smolcodePick({i})">'
|
| 328 |
+
f'<span class="sc-picker-mark">{marker}</span>'
|
| 329 |
+
f"<span>{html.escape(item)}</span></button>"
|
| 330 |
+
)
|
| 331 |
+
body = "\n".join(rows)
|
| 332 |
+
return (
|
| 333 |
+
f'<div class="sc-picker" data-kind="{html.escape(kind)}">'
|
| 334 |
+
f'<div class="sc-picker-title">{html.escape(label)}</div>'
|
| 335 |
+
f'<div class="sc-picker-list">{body}</div>'
|
| 336 |
+
f'<div class="sc-picker-hint">↑↓ navigate · Enter select · Esc close</div>'
|
| 337 |
+
f"</div>"
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def shell_theme_html(theme_idx: int) -> str:
|
| 342 |
+
"""Inject data-theme on the TUI shell wrapper."""
|
| 343 |
+
name = theme_at(theme_idx).name
|
| 344 |
+
safe = html.escape(name, quote=True)
|
| 345 |
+
return (
|
| 346 |
+
f'<script>(function(){{var el=document.querySelector(".sc-tui-shell");'
|
| 347 |
+
f'if(el)el.setAttribute("data-theme","{safe}");}})();</script>'
|
| 348 |
+
)
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
def agent_choices() -> list[str]:
|
| 352 |
+
return ["build", "plan"]
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
def theme_picker_items() -> list[str]:
|
| 356 |
+
return theme_names()
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
def _sorted_file_paths(files: dict[str, str] | list[str]) -> list[str]:
|
| 360 |
+
if isinstance(files, dict):
|
| 361 |
+
return sorted(files.keys())
|
| 362 |
+
return sorted(files)
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
def _paths_for_ui(files: dict[str, str] | list[str] | None) -> list[str]:
|
| 366 |
+
return _sorted_file_paths(files or [])
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def _files_sidebar_body(paths: list[str], *, selected: int = 0, max_rows: int = 48) -> str:
|
| 370 |
+
"""Flat file list grouped by directory, matching the CLI TUI sidebar."""
|
| 371 |
+
if not paths:
|
| 372 |
+
return '<div class="sc-sb-empty">no files</div>'
|
| 373 |
+
|
| 374 |
+
rows: list[str] = []
|
| 375 |
+
sel_row: int | None = None
|
| 376 |
+
last_dir = ""
|
| 377 |
+
sel = min(selected, max(0, len(paths) - 1))
|
| 378 |
+
|
| 379 |
+
for i, path in enumerate(paths):
|
| 380 |
+
if "/" in path:
|
| 381 |
+
j = path.rfind("/")
|
| 382 |
+
dir_part, file_part = path[:j], path[j + 1 :]
|
| 383 |
+
else:
|
| 384 |
+
dir_part, file_part = "", path
|
| 385 |
+
if dir_part != last_dir:
|
| 386 |
+
last_dir = dir_part
|
| 387 |
+
label = "." if not dir_part else f"{dir_part}/"
|
| 388 |
+
rows.append(f'<div class="sc-sb-dir">▾ {html.escape(label)}</div>')
|
| 389 |
+
is_sel = i == sel
|
| 390 |
+
if is_sel:
|
| 391 |
+
sel_row = len(rows)
|
| 392 |
+
prefix = "❯" if is_sel else ""
|
| 393 |
+
cls = "sc-sb-file sc-sb-sel" if is_sel else "sc-sb-file"
|
| 394 |
+
rows.append(
|
| 395 |
+
f'<div class="{cls}">'
|
| 396 |
+
f'<span class="sc-sb-mark">{prefix}</span>'
|
| 397 |
+
f'<span class="sc-sb-glyph"> </span>'
|
| 398 |
+
f'<span class="sc-sb-name">{html.escape(file_part)}</span>'
|
| 399 |
+
f"</div>"
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
total = len(rows)
|
| 403 |
+
start = 0
|
| 404 |
+
if total > max_rows:
|
| 405 |
+
anchor = sel_row if sel_row is not None else 0
|
| 406 |
+
start = min(max(0, anchor - max_rows + 1), total - max_rows)
|
| 407 |
+
|
| 408 |
+
visible = rows[start : start + max_rows]
|
| 409 |
+
if total > max_rows and start + max_rows < total:
|
| 410 |
+
more = total - (start + max_rows) + 1
|
| 411 |
+
visible.append(f'<div class="sc-sb-more">… +{more} more</div>')
|
| 412 |
+
|
| 413 |
+
return "\n".join(visible)
|
| 414 |
+
|
| 415 |
+
|
| 416 |
+
def _stats_sidebar_body(
|
| 417 |
+
*,
|
| 418 |
+
session_id: str,
|
| 419 |
+
file_count: int,
|
| 420 |
+
agent: str,
|
| 421 |
+
extra_lines: list[str] | None = None,
|
| 422 |
+
) -> str:
|
| 423 |
+
parts = [
|
| 424 |
+
f'<div class="sc-sb-stat sc-sb-dim">{html.escape(session_id[:26])}</div>',
|
| 425 |
+
'<div class="sc-sb-stat"></div>',
|
| 426 |
+
]
|
| 427 |
+
for line in extra_lines or []:
|
| 428 |
+
parts.append(f'<div class="sc-sb-stat">{html.escape(line)}</div>')
|
| 429 |
+
parts.append(f'<div class="sc-sb-stat">files: {file_count}</div>')
|
| 430 |
+
parts.append(f'<div class="sc-sb-stat">agent: {html.escape(agent)}</div>')
|
| 431 |
+
return "\n".join(parts)
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
def render_sidebar_html(
|
| 435 |
+
*,
|
| 436 |
+
view: str = "files",
|
| 437 |
+
files: dict[str, str] | list[str] | None = None,
|
| 438 |
+
selected: int = 0,
|
| 439 |
+
focused: bool = False,
|
| 440 |
+
session_id: str = "(none)",
|
| 441 |
+
agent: str = "build",
|
| 442 |
+
stats_lines: list[str] | None = None,
|
| 443 |
+
file_total: int | None = None,
|
| 444 |
+
) -> str:
|
| 445 |
+
"""CLI TUI-shaped sidebar panel (flat file list or stats)."""
|
| 446 |
+
paths = _paths_for_ui(files)
|
| 447 |
+
total = file_total if file_total is not None else len(paths)
|
| 448 |
+
title = "stats" if view == "stats" else ("files ▸" if focused else "files")
|
| 449 |
+
panel_cls = "sc-sidebar-panel"
|
| 450 |
+
if focused:
|
| 451 |
+
panel_cls += " sc-sidebar-focused"
|
| 452 |
+
|
| 453 |
+
if view == "stats":
|
| 454 |
+
body = _stats_sidebar_body(
|
| 455 |
+
session_id=session_id,
|
| 456 |
+
file_count=total,
|
| 457 |
+
agent=agent,
|
| 458 |
+
extra_lines=stats_lines,
|
| 459 |
+
)
|
| 460 |
+
else:
|
| 461 |
+
body = _files_sidebar_body(paths, selected=selected)
|
| 462 |
+
if total > len(paths):
|
| 463 |
+
body += f'\n<div class="sc-sb-more">… {total - len(paths)} more files</div>'
|
| 464 |
+
|
| 465 |
+
return (
|
| 466 |
+
f'<div class="{panel_cls}">'
|
| 467 |
+
f'<div class="sc-sidebar-title">{html.escape(title)}</div>'
|
| 468 |
+
f'<div class="sc-sidebar-body">{body}</div>'
|
| 469 |
+
f"</div>"
|
| 470 |
+
)
|
| 471 |
+
|
engine/webcheck.js
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Headless smoke-check for a model-built web app, used by smolbuilder so the
|
| 2 |
+
// agent can actually *test* what it builds (the web equivalent of run_python).
|
| 3 |
+
//
|
| 4 |
+
// Loads index.html in jsdom, runs its scripts, then clicks every <button>, and
|
| 5 |
+
// reports any JavaScript errors. The goal is high precision: a correct app
|
| 6 |
+
// reports zero errors; a broken one (null element refs, undefined functions,
|
| 7 |
+
// syntax errors, exceptions on click) reports them so the agent can fix it.
|
| 8 |
+
//
|
| 9 |
+
// We stub the browser APIs jsdom doesn't implement (canvas 2d/webgl context,
|
| 10 |
+
// alert/confirm/prompt, matchMedia, media play) so apps that *use* them aren't
|
| 11 |
+
// falsely flagged — we're checking the app's own logic, not jsdom's coverage.
|
| 12 |
+
//
|
| 13 |
+
// Output: a single JSON line {ok, errors, buttons, clicked}. Exit 0 always
|
| 14 |
+
// (the verdict is in the JSON); exit 3 only if jsdom itself is missing.
|
| 15 |
+
'use strict';
|
| 16 |
+
|
| 17 |
+
let JSDOM, VirtualConsole;
|
| 18 |
+
try {
|
| 19 |
+
({ JSDOM, VirtualConsole } = require('jsdom'));
|
| 20 |
+
} catch (e) {
|
| 21 |
+
process.stdout.write(JSON.stringify({ ok: null, infra: 'jsdom not installed' }) + '\n');
|
| 22 |
+
process.exit(3);
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
const fs = require('fs');
|
| 26 |
+
|
| 27 |
+
function makeCtx() {
|
| 28 |
+
// A permissive 2d/webgl context stub: method calls no-op, the few methods
|
| 29 |
+
// whose *return value* is used hand back something safe to deref.
|
| 30 |
+
return new Proxy({}, {
|
| 31 |
+
get(_t, p) {
|
| 32 |
+
if (p === 'measureText') return () => ({ width: 0 });
|
| 33 |
+
if (p === 'getImageData') return () => ({ data: new Uint8ClampedArray(4), width: 1, height: 1 });
|
| 34 |
+
if (p === 'createLinearGradient' || p === 'createRadialGradient' || p === 'createPattern')
|
| 35 |
+
return () => ({ addColorStop() {} });
|
| 36 |
+
if (p === 'canvas') return { width: 300, height: 150 };
|
| 37 |
+
return () => undefined;
|
| 38 |
+
},
|
| 39 |
+
set() { return true; },
|
| 40 |
+
});
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
function stubBrowser(window) {
|
| 44 |
+
try { window.HTMLCanvasElement.prototype.getContext = () => makeCtx(); } catch (e) {}
|
| 45 |
+
const noop = () => {};
|
| 46 |
+
window.alert = noop;
|
| 47 |
+
window.confirm = () => true;
|
| 48 |
+
window.prompt = () => '';
|
| 49 |
+
window.scrollTo = noop;
|
| 50 |
+
window.scroll = noop;
|
| 51 |
+
if (!window.matchMedia)
|
| 52 |
+
window.matchMedia = () => ({ matches: false, media: '', addListener: noop, removeListener: noop, addEventListener: noop, removeEventListener: noop });
|
| 53 |
+
try { window.HTMLMediaElement.prototype.play = () => Promise.resolve(); } catch (e) {}
|
| 54 |
+
try { window.HTMLMediaElement.prototype.pause = noop; } catch (e) {}
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
const file = process.argv[2];
|
| 58 |
+
const html = fs.readFileSync(file, 'utf8');
|
| 59 |
+
const errors = [];
|
| 60 |
+
const push = (m) => { if (m && errors.indexOf(m) === -1) errors.push(String(m).slice(0, 400)); };
|
| 61 |
+
|
| 62 |
+
const vc = new VirtualConsole();
|
| 63 |
+
vc.on('jsdomError', (e) => push('script error: ' + (e && e.detail ? (e.detail.message || e.detail) : (e && e.message))));
|
| 64 |
+
|
| 65 |
+
let dom;
|
| 66 |
+
try {
|
| 67 |
+
dom = new JSDOM(html, {
|
| 68 |
+
runScripts: 'dangerously',
|
| 69 |
+
pretendToBeVisual: true,
|
| 70 |
+
virtualConsole: vc,
|
| 71 |
+
beforeParse(window) {
|
| 72 |
+
stubBrowser(window);
|
| 73 |
+
window.addEventListener('error', (ev) => push('uncaught: ' + (ev.error ? (ev.error.message || ev.error) : ev.message)));
|
| 74 |
+
window.addEventListener('unhandledrejection', (ev) => push('promise rejection: ' + (ev.reason && ev.reason.message ? ev.reason.message : ev.reason)));
|
| 75 |
+
},
|
| 76 |
+
});
|
| 77 |
+
} catch (e) {
|
| 78 |
+
push('load failed: ' + e.message);
|
| 79 |
+
process.stdout.write(JSON.stringify({ ok: false, errors, buttons: 0, clicked: 0 }) + '\n');
|
| 80 |
+
process.exit(0);
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
const { window } = dom;
|
| 84 |
+
const doc = window.document;
|
| 85 |
+
|
| 86 |
+
function clickAll() {
|
| 87 |
+
const buttons = Array.from(doc.querySelectorAll('button, [onclick], input[type=button], input[type=submit]'));
|
| 88 |
+
let clicked = 0;
|
| 89 |
+
for (const el of buttons) {
|
| 90 |
+
try {
|
| 91 |
+
if (el.disabled) el.disabled = false; // exercise the handler regardless of initial state
|
| 92 |
+
el.click();
|
| 93 |
+
clicked++;
|
| 94 |
+
} catch (e) {
|
| 95 |
+
push('click "' + (el.textContent || el.id || el.tagName).trim().slice(0, 30) + '": ' + e.message);
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
return { n: buttons.length, clicked };
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
// Let inline scripts settle, click, then let one timer tick surface late errors.
|
| 102 |
+
setTimeout(() => {
|
| 103 |
+
const { n, clicked } = clickAll();
|
| 104 |
+
setTimeout(() => {
|
| 105 |
+
process.stdout.write(JSON.stringify({ ok: errors.length === 0, errors, buttons: n, clicked }) + '\n');
|
| 106 |
+
process.exit(0);
|
| 107 |
+
}, 250);
|
| 108 |
+
}, 50);
|
engine/webcheck.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Headless verification of model-built web apps (the web `run_python`).
|
| 2 |
+
|
| 3 |
+
smolbuilder's agent writes HTML/CSS/JS but, unlike the Python path, had no way
|
| 4 |
+
to *run* it — so it shipped broken apps and couldn't tell. This bridges to a
|
| 5 |
+
small Node + jsdom checker (engine/webcheck.js) that loads the page, runs its
|
| 6 |
+
scripts, clicks every button, and reports JavaScript errors.
|
| 7 |
+
|
| 8 |
+
Graceful degradation is deliberate: if Node or jsdom isn't available (e.g. a
|
| 9 |
+
minimal Space image), we return `None` ("unverifiable") rather than failing the
|
| 10 |
+
build — the agent/router fall back to the structural check.
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
import shutil
|
| 16 |
+
import subprocess
|
| 17 |
+
import tempfile
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
|
| 20 |
+
_CHECKER = Path(__file__).with_name("webcheck.js")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def available() -> bool:
|
| 24 |
+
"""True if we can actually run the headless check (Node present)."""
|
| 25 |
+
return shutil.which("node") is not None and _CHECKER.exists()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def check_html(html: str, timeout: int = 20) -> tuple[bool | None, list[str]]:
|
| 29 |
+
"""Run the headless check on an HTML document.
|
| 30 |
+
|
| 31 |
+
Returns (ok, errors):
|
| 32 |
+
- (True, []) the app loaded and all buttons clicked without error
|
| 33 |
+
- (False, [...]) real JavaScript errors were found
|
| 34 |
+
- (None, [...]) unverifiable (Node/jsdom missing, or the checker broke)
|
| 35 |
+
"""
|
| 36 |
+
node = shutil.which("node")
|
| 37 |
+
if not node or not _CHECKER.exists():
|
| 38 |
+
return None, ["node/jsdom unavailable (skipped runtime check)"]
|
| 39 |
+
|
| 40 |
+
with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
|
| 41 |
+
f.write(html)
|
| 42 |
+
path = f.name
|
| 43 |
+
try:
|
| 44 |
+
proc = subprocess.run(
|
| 45 |
+
[node, str(_CHECKER), path],
|
| 46 |
+
capture_output=True, text=True, timeout=timeout,
|
| 47 |
+
)
|
| 48 |
+
except subprocess.TimeoutExpired:
|
| 49 |
+
return None, [f"runtime check timed out after {timeout}s"]
|
| 50 |
+
finally:
|
| 51 |
+
Path(path).unlink(missing_ok=True)
|
| 52 |
+
|
| 53 |
+
if proc.returncode == 3: # jsdom not installed
|
| 54 |
+
return None, ["jsdom not installed (skipped runtime check)"]
|
| 55 |
+
line = (proc.stdout or "").strip().splitlines()
|
| 56 |
+
if not line:
|
| 57 |
+
return None, [f"runtime check produced no output: {proc.stderr.strip()[:200]}"]
|
| 58 |
+
try:
|
| 59 |
+
data = json.loads(line[-1])
|
| 60 |
+
except json.JSONDecodeError:
|
| 61 |
+
return None, [f"runtime check output unparseable: {line[-1][:200]}"]
|
| 62 |
+
|
| 63 |
+
if data.get("ok") is None:
|
| 64 |
+
return None, [data.get("infra", "unverifiable")]
|
| 65 |
+
return bool(data.get("ok")), list(data.get("errors", []))
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=5.49,<6
|
| 2 |
+
liteforge==0.2.5
|
smolcode_core-0.1.0-cp312-cp312-manylinux_2_39_x86_64.whl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d179e40e7e38999081cfdd9461c0879b1843f81ea39d4dac3262a3eab5d7931
|
| 3 |
+
size 13694530
|
static/web_tui.js
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
(function () {
|
| 2 |
+
"use strict";
|
| 3 |
+
|
| 4 |
+
if (window.__smolcodeTuiInit) return;
|
| 5 |
+
window.__smolcodeTuiInit = true;
|
| 6 |
+
|
| 7 |
+
let leaderPending = false;
|
| 8 |
+
let leaderTimer = null;
|
| 9 |
+
|
| 10 |
+
function click(id) {
|
| 11 |
+
const root = document.getElementById(id);
|
| 12 |
+
if (!root) return;
|
| 13 |
+
const btn = root.tagName === "BUTTON" ? root : root.querySelector("button");
|
| 14 |
+
(btn || root).click();
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
function setHiddenValue(id, value) {
|
| 18 |
+
const root = document.getElementById(id);
|
| 19 |
+
if (!root) return;
|
| 20 |
+
const el = root.tagName === "TEXTAREA" || root.tagName === "INPUT"
|
| 21 |
+
? root
|
| 22 |
+
: root.querySelector("textarea, input");
|
| 23 |
+
if (!el) return;
|
| 24 |
+
el.value = value;
|
| 25 |
+
el.dispatchEvent(new Event("input", { bubbles: true }));
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
window.__smolcodePick = function (idx) {
|
| 29 |
+
setHiddenValue("sc-picker-pick", String(idx));
|
| 30 |
+
click("sc-picker-confirm");
|
| 31 |
+
};
|
| 32 |
+
|
| 33 |
+
function editor() {
|
| 34 |
+
const root = document.getElementById("sc-editor");
|
| 35 |
+
if (root) {
|
| 36 |
+
if (root.tagName === "TEXTAREA" || root.tagName === "INPUT") return root;
|
| 37 |
+
const inner = root.querySelector("textarea, input[type='text']");
|
| 38 |
+
if (inner) return inner;
|
| 39 |
+
}
|
| 40 |
+
const boxes = document.querySelectorAll("[data-testid='textbox']");
|
| 41 |
+
return boxes.length ? boxes[boxes.length - 1] : null;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
function PopupController(popupEl, kind) {
|
| 45 |
+
this.popup = popupEl;
|
| 46 |
+
this.kind = kind || "slash";
|
| 47 |
+
this.matches = [];
|
| 48 |
+
this.sel = 0;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
PopupController.prototype.hide = function () {
|
| 52 |
+
if (this.popup) this.popup.style.display = "none";
|
| 53 |
+
this.matches = [];
|
| 54 |
+
this.sel = 0;
|
| 55 |
+
};
|
| 56 |
+
|
| 57 |
+
PopupController.prototype.render = function (matches, ta, replaceFrom) {
|
| 58 |
+
this.matches = matches;
|
| 59 |
+
this.sel = 0;
|
| 60 |
+
this.replaceFrom = replaceFrom;
|
| 61 |
+
this.ta = ta;
|
| 62 |
+
if (!this.popup) return;
|
| 63 |
+
this.popup.innerHTML = "";
|
| 64 |
+
const self = this;
|
| 65 |
+
matches.slice(0, 12).forEach(function (item, i) {
|
| 66 |
+
const row = document.createElement("div");
|
| 67 |
+
row.className = "sc-popup-item" + (i === 0 ? " sc-popup-sel" : "");
|
| 68 |
+
row.textContent = item;
|
| 69 |
+
row.onclick = function () {
|
| 70 |
+
self.sel = i;
|
| 71 |
+
self.accept();
|
| 72 |
+
};
|
| 73 |
+
self.popup.appendChild(row);
|
| 74 |
+
});
|
| 75 |
+
const rect = ta.getBoundingClientRect();
|
| 76 |
+
this.popup.style.display = matches.length ? "block" : "none";
|
| 77 |
+
this.popup.style.left = rect.left + "px";
|
| 78 |
+
this.popup.style.top = Math.max(0, rect.top - 160) + "px";
|
| 79 |
+
this.popup.style.width = Math.max(220, rect.width) + "px";
|
| 80 |
+
this._highlight();
|
| 81 |
+
};
|
| 82 |
+
|
| 83 |
+
PopupController.prototype._highlight = function () {
|
| 84 |
+
if (!this.popup) return;
|
| 85 |
+
const items = this.popup.querySelectorAll(".sc-popup-item");
|
| 86 |
+
items.forEach(function (el, i) {
|
| 87 |
+
el.classList.toggle("sc-popup-sel", i === this.sel);
|
| 88 |
+
}, this);
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
PopupController.prototype.move = function (delta) {
|
| 92 |
+
if (!this.matches.length) return;
|
| 93 |
+
this.sel = (this.sel + delta + this.matches.length) % this.matches.length;
|
| 94 |
+
this._highlight();
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
PopupController.prototype.accept = function () {
|
| 98 |
+
if (!this.matches.length || !this.ta) return;
|
| 99 |
+
const val = this.ta.value;
|
| 100 |
+
if (this.kind === "file") {
|
| 101 |
+
const atMatch = val.match(/(?:^|\s)@(\S*)$/);
|
| 102 |
+
if (!atMatch) return;
|
| 103 |
+
const atPos = val.length - atMatch[0].length + (atMatch[0].charAt(0) === " " ? 1 : 0);
|
| 104 |
+
const item = this.matches[this.sel];
|
| 105 |
+
this.ta.value = val.slice(0, atPos) + "@" + item + " ";
|
| 106 |
+
} else {
|
| 107 |
+
const item = this.matches[this.sel];
|
| 108 |
+
const rest = val.slice(this.replaceFrom);
|
| 109 |
+
this.ta.value = item + rest;
|
| 110 |
+
}
|
| 111 |
+
this.ta.dispatchEvent(new Event("input", { bubbles: true }));
|
| 112 |
+
this.hide();
|
| 113 |
+
this.ta.focus();
|
| 114 |
+
};
|
| 115 |
+
|
| 116 |
+
PopupController.prototype.tabComplete = function () {
|
| 117 |
+
if (!this.matches.length) return false;
|
| 118 |
+
this.accept();
|
| 119 |
+
return true;
|
| 120 |
+
};
|
| 121 |
+
|
| 122 |
+
PopupController.prototype.visible = function () {
|
| 123 |
+
return this.popup && this.popup.style.display === "block" && this.matches.length > 0;
|
| 124 |
+
};
|
| 125 |
+
|
| 126 |
+
function ensurePopup(cls) {
|
| 127 |
+
let el = document.querySelector("." + cls);
|
| 128 |
+
if (!el) {
|
| 129 |
+
el = document.createElement("div");
|
| 130 |
+
el.className = cls + " sc-popup";
|
| 131 |
+
document.body.appendChild(el);
|
| 132 |
+
}
|
| 133 |
+
return el;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
const slashPopup = new PopupController(ensurePopup("sc-slash-popup"), "slash");
|
| 137 |
+
const filePopup = new PopupController(ensurePopup("sc-file-popup"), "file");
|
| 138 |
+
|
| 139 |
+
function hidePopups() {
|
| 140 |
+
slashPopup.hide();
|
| 141 |
+
filePopup.hide();
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
function onEditorInput(ta) {
|
| 145 |
+
const val = ta.value;
|
| 146 |
+
const cmds = window.__smolcode_commands || [];
|
| 147 |
+
|
| 148 |
+
if (val.startsWith("/") && !val.includes(" ")) {
|
| 149 |
+
const m = cmds.filter(function (c) { return c.startsWith(val); });
|
| 150 |
+
slashPopup.render(m, ta, val.length);
|
| 151 |
+
filePopup.hide();
|
| 152 |
+
return;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
slashPopup.hide();
|
| 156 |
+
const atMatch = val.match(/(?:^|\s)@(\S*)$/);
|
| 157 |
+
if (atMatch) {
|
| 158 |
+
const prefix = atMatch[1];
|
| 159 |
+
const files = window.__smolcode_files || [];
|
| 160 |
+
const m = files.filter(function (f) { return f.startsWith(prefix); });
|
| 161 |
+
const atPos = val.length - atMatch[0].length + (atMatch[0].charAt(0) === " " ? 1 : 0);
|
| 162 |
+
filePopup.render(m, ta, atPos);
|
| 163 |
+
return;
|
| 164 |
+
}
|
| 165 |
+
filePopup.hide();
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
function activePopup() {
|
| 169 |
+
if (slashPopup.visible()) return slashPopup;
|
| 170 |
+
if (filePopup.visible()) return filePopup;
|
| 171 |
+
return null;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
function onEditorKeyDown(e) {
|
| 175 |
+
const ta = e.target;
|
| 176 |
+
const popup = activePopup();
|
| 177 |
+
|
| 178 |
+
if (popup && (e.key === "ArrowDown" || e.key === "ArrowUp")) {
|
| 179 |
+
e.preventDefault();
|
| 180 |
+
popup.move(e.key === "ArrowDown" ? 1 : -1);
|
| 181 |
+
return;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
if (popup && e.key === "Enter" && !e.shiftKey) {
|
| 185 |
+
e.preventDefault();
|
| 186 |
+
popup.accept();
|
| 187 |
+
return;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
if (e.key === "Tab" && popup && !e.shiftKey) {
|
| 191 |
+
e.preventDefault();
|
| 192 |
+
popup.tabComplete();
|
| 193 |
+
return;
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
if (e.key === "Enter" && !e.shiftKey && !e.altKey) {
|
| 197 |
+
e.preventDefault();
|
| 198 |
+
hidePopups();
|
| 199 |
+
click("sc-submit");
|
| 200 |
+
return;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
if (e.key === "Escape") {
|
| 204 |
+
hidePopups();
|
| 205 |
+
if (document.querySelector(".sc-overlay")) {
|
| 206 |
+
click("sc-close-overlay");
|
| 207 |
+
} else {
|
| 208 |
+
click("sc-interrupt");
|
| 209 |
+
}
|
| 210 |
+
return;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
if (e.ctrlKey && (e.key === "l" || e.key === "L")) {
|
| 214 |
+
e.preventDefault();
|
| 215 |
+
click("sc-clear");
|
| 216 |
+
return;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
if (e.ctrlKey && (e.key === "x" || e.key === "X")) {
|
| 220 |
+
e.preventDefault();
|
| 221 |
+
leaderPending = true;
|
| 222 |
+
if (leaderTimer) clearTimeout(leaderTimer);
|
| 223 |
+
leaderTimer = setTimeout(function () { leaderPending = false; }, 2000);
|
| 224 |
+
click("sc-whichkey");
|
| 225 |
+
return;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
if (leaderPending && !e.ctrlKey && !e.metaKey && e.key.length === 1) {
|
| 229 |
+
leaderPending = false;
|
| 230 |
+
if (leaderTimer) clearTimeout(leaderTimer);
|
| 231 |
+
const map = {
|
| 232 |
+
m: "sc-open-picker-models",
|
| 233 |
+
a: "sc-open-picker-agents",
|
| 234 |
+
t: "sc-open-picker-themes",
|
| 235 |
+
l: "sc-open-picker-sessions",
|
| 236 |
+
n: "sc-new-session",
|
| 237 |
+
b: "sc-toggle-sidebar",
|
| 238 |
+
s: "sc-toggle-sidebar-view",
|
| 239 |
+
h: "sc-help",
|
| 240 |
+
o: "sc-cycle-mode",
|
| 241 |
+
e: "sc-cycle-think",
|
| 242 |
+
};
|
| 243 |
+
const btn = map[e.key.toLowerCase()];
|
| 244 |
+
if (btn) {
|
| 245 |
+
e.preventDefault();
|
| 246 |
+
click(btn);
|
| 247 |
+
}
|
| 248 |
+
return;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
if (e.key === "Tab" && !e.shiftKey) {
|
| 252 |
+
if (trySlashTabComplete(ta, e)) return;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
if (e.key === "Tab" && !e.shiftKey && !activePopup()) {
|
| 256 |
+
e.preventDefault();
|
| 257 |
+
click("sc-cycle-agent");
|
| 258 |
+
return;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
if (e.key === "Tab" && e.shiftKey) {
|
| 262 |
+
e.preventDefault();
|
| 263 |
+
click("sc-cycle-mode");
|
| 264 |
+
return;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
if (e.key === "F2") {
|
| 268 |
+
e.preventDefault();
|
| 269 |
+
click("sc-cycle-model");
|
| 270 |
+
return;
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
if (document.querySelector(".sc-picker") && !ta) {
|
| 274 |
+
if (e.key === "ArrowDown") {
|
| 275 |
+
e.preventDefault();
|
| 276 |
+
click("sc-picker-down");
|
| 277 |
+
} else if (e.key === "ArrowUp") {
|
| 278 |
+
e.preventDefault();
|
| 279 |
+
click("sc-picker-up");
|
| 280 |
+
} else if (e.key === "Enter") {
|
| 281 |
+
e.preventDefault();
|
| 282 |
+
click("sc-picker-confirm");
|
| 283 |
+
}
|
| 284 |
+
}
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
function onGlobalKeyDown(e) {
|
| 288 |
+
if (document.querySelector(".sc-picker") && document.activeElement !== editor()) {
|
| 289 |
+
if (e.key === "ArrowDown") {
|
| 290 |
+
e.preventDefault();
|
| 291 |
+
click("sc-picker-down");
|
| 292 |
+
} else if (e.key === "ArrowUp") {
|
| 293 |
+
e.preventDefault();
|
| 294 |
+
click("sc-picker-up");
|
| 295 |
+
} else if (e.key === "Enter") {
|
| 296 |
+
e.preventDefault();
|
| 297 |
+
click("sc-picker-confirm");
|
| 298 |
+
}
|
| 299 |
+
}
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
function bindEditor() {
|
| 303 |
+
const ta = editor();
|
| 304 |
+
if (!ta || ta.dataset.scBound) return;
|
| 305 |
+
ta.dataset.scBound = "1";
|
| 306 |
+
ta.addEventListener("input", function () { onEditorInput(ta); });
|
| 307 |
+
ta.addEventListener("keydown", onEditorKeyDown);
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
function bindChips() {
|
| 311 |
+
/* chips re-render with status HTML; use delegation in init() */
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
function onDocumentClick(e) {
|
| 315 |
+
const chip = e.target.closest("[data-picker]");
|
| 316 |
+
if (chip) {
|
| 317 |
+
const kind = chip.getAttribute("data-picker");
|
| 318 |
+
const map = {
|
| 319 |
+
models: "sc-open-picker-models",
|
| 320 |
+
agents: "sc-open-picker-agents",
|
| 321 |
+
themes: "sc-open-picker-themes",
|
| 322 |
+
sessions: "sc-open-picker-sessions",
|
| 323 |
+
};
|
| 324 |
+
if (map[kind]) {
|
| 325 |
+
e.preventDefault();
|
| 326 |
+
click(map[kind]);
|
| 327 |
+
}
|
| 328 |
+
return;
|
| 329 |
+
}
|
| 330 |
+
const modeBtn = e.target.closest("[data-action='cycle-mode']");
|
| 331 |
+
if (modeBtn) {
|
| 332 |
+
e.preventDefault();
|
| 333 |
+
click("sc-cycle-mode");
|
| 334 |
+
}
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
function slashMatches(val) {
|
| 338 |
+
if (!val.startsWith("/") || val.includes(" ")) return [];
|
| 339 |
+
const cmds = window.__smolcode_commands || [];
|
| 340 |
+
return cmds.filter(function (c) { return c.startsWith(val); });
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
function trySlashTabComplete(ta, e) {
|
| 344 |
+
const val = ta.value;
|
| 345 |
+
const matches = slashMatches(val);
|
| 346 |
+
if (!matches.length) return false;
|
| 347 |
+
e.preventDefault();
|
| 348 |
+
const popup = activePopup();
|
| 349 |
+
if (popup && popup.kind === "slash" && popup.matches.length) {
|
| 350 |
+
popup.tabComplete();
|
| 351 |
+
return true;
|
| 352 |
+
}
|
| 353 |
+
ta.value = matches[0];
|
| 354 |
+
ta.dispatchEvent(new Event("input", { bubbles: true }));
|
| 355 |
+
hidePopups();
|
| 356 |
+
return true;
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
function init() {
|
| 360 |
+
document.addEventListener("click", onDocumentClick);
|
| 361 |
+
document.addEventListener("click", function (e) {
|
| 362 |
+
const overlay = document.querySelector(".sc-overlay");
|
| 363 |
+
if (overlay && e.target === overlay) click("sc-close-overlay");
|
| 364 |
+
});
|
| 365 |
+
document.addEventListener("keydown", onGlobalKeyDown);
|
| 366 |
+
const obs = new MutationObserver(function () {
|
| 367 |
+
bindEditor();
|
| 368 |
+
});
|
| 369 |
+
obs.observe(document.body, { childList: true, subtree: true });
|
| 370 |
+
bindEditor();
|
| 371 |
+
setTimeout(bindEditor, 300);
|
| 372 |
+
setTimeout(bindEditor, 1500);
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
if (document.readyState === "loading") {
|
| 376 |
+
document.addEventListener("DOMContentLoaded", init);
|
| 377 |
+
} else {
|
| 378 |
+
init();
|
| 379 |
+
}
|
| 380 |
+
})();
|