Commit ·
053ee0d
0
Parent(s):
Build split-brain copilot scaffold
Browse files- .env.example +3 -0
- .gitignore +5 -0
- AGENTS.md +793 -0
- README.md +92 -0
- app.py +331 -0
- modal_backend/__init__.py +1 -0
- modal_backend/sandbox.py +52 -0
- modal_backend/verifier.py +141 -0
- requirements.txt +5 -0
- static/engine.js +58 -0
- static/style.css +212 -0
- static/ui.js +87 -0
.env.example
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OPENAI_API_KEY=
|
| 2 |
+
MODAL_VERIFIER_URL=
|
| 3 |
+
MODAL_SANDBOX_URL=
|
.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv/
|
| 2 |
+
.env
|
| 3 |
+
__pycache__/
|
| 4 |
+
*.py[cod]
|
| 5 |
+
.DS_Store
|
AGENTS.md
ADDED
|
@@ -0,0 +1,793 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Split-Brain Speculative Co-Pilot
|
| 2 |
+
### Build Small Hackathon — Complete Build Instructions
|
| 3 |
+
|
| 4 |
+
> **Concept:** A 1B model runs entirely in the user's browser via WebGPU + transformers.js, streaming code instantly. A 14B model on Modal verifies the draft in the background. When the verifier catches a bug, the UI rolls back the local generation and replaces it with the corrected cloud block — live, visually.
|
| 5 |
+
>
|
| 6 |
+
> **Models:** `Qwen2.5-Coder-1.5B` (browser, WebGPU) + `Qwen2.5-Coder-14B-Instruct` (Modal, GGUF via llama.cpp) — combined 15.5B, well under the 32B cap.
|
| 7 |
+
>
|
| 8 |
+
> **Bonus badges targeted:** Off the Grid · Llama Champion · Off-Brand · Field Notes
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## 0. Prerequisites
|
| 13 |
+
|
| 14 |
+
- Python 3.11+
|
| 15 |
+
- Node.js 18+ (for local frontend testing)
|
| 16 |
+
- Modal account with `modal` CLI installed and authenticated (`modal token new`)
|
| 17 |
+
- Hugging Face account, joined the `build-small-hackathon` org, HF token with write access
|
| 18 |
+
- `huggingface-cli` installed and logged in (`huggingface-cli login`)
|
| 19 |
+
- Chrome 113+ (WebGPU required — Firefox and Safari are out, document this clearly)
|
| 20 |
+
- Git
|
| 21 |
+
|
| 22 |
+
---
|
| 23 |
+
|
| 24 |
+
## 1. Repository Structure
|
| 25 |
+
|
| 26 |
+
Set up the project layout before writing any code.
|
| 27 |
+
|
| 28 |
+
```
|
| 29 |
+
split-brain-copilot/
|
| 30 |
+
├── app.py # Gradio app entry point (HF Space root)
|
| 31 |
+
├── modal_backend/
|
| 32 |
+
│ ├── __init__.py
|
| 33 |
+
│ ├── verifier.py # Modal app: 14B inference endpoint
|
| 34 |
+
│ └── sandbox.py # Modal app: code execution sandbox
|
| 35 |
+
├── static/
|
| 36 |
+
│ ├── engine.js # transformers.js WebGPU inference engine
|
| 37 |
+
│ ├── ui.js # Stream rendering, rollback animation, diff logic
|
| 38 |
+
│ └── style.css # Custom UI (required for Off-Brand badge)
|
| 39 |
+
├── requirements.txt
|
| 40 |
+
└── README.md # HF Space card + demo video embed
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
Initialize git and create a HF Space repo (check whether we have huggingface cli installed and token set or logged in):
|
| 44 |
+
|
| 45 |
+
```bash
|
| 46 |
+
git init
|
| 47 |
+
huggingface-cli repo create split-brain-copilot --type space --space-sdk gradio
|
| 48 |
+
git remote add origin https://huggingface.co/spaces/YOUR_HF_USERNAME/split-brain-copilot
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
---
|
| 52 |
+
|
| 53 |
+
## 2. Modal Backend — 14B Verifier Endpoint
|
| 54 |
+
|
| 55 |
+
### 2.1 Download the GGUF model to a Modal Volume
|
| 56 |
+
|
| 57 |
+
The 14B model is too large to bake into the image. Use a Modal Volume for persistent storage.
|
| 58 |
+
|
| 59 |
+
```python
|
| 60 |
+
# modal_backend/verifier.py
|
| 61 |
+
import modal
|
| 62 |
+
|
| 63 |
+
app = modal.App("split-brain-verifier")
|
| 64 |
+
|
| 65 |
+
# Persistent volume — survives cold starts
|
| 66 |
+
model_volume = modal.Volume.from_name("qwen-14b-volume", create_if_missing=True)
|
| 67 |
+
|
| 68 |
+
MODEL_DIR = "/models"
|
| 69 |
+
MODEL_FILENAME = "qwen2.5-coder-14b-instruct-q4_k_m.gguf"
|
| 70 |
+
# Source: bartowski/Qwen2.5-Coder-14B-Instruct-GGUF on HuggingFace
|
| 71 |
+
MODEL_REPO = "bartowski/Qwen2.5-Coder-14B-Instruct-GGUF"
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
Create a one-time download function:
|
| 75 |
+
|
| 76 |
+
```python
|
| 77 |
+
@app.function(
|
| 78 |
+
volumes={MODEL_DIR: model_volume},
|
| 79 |
+
timeout=3600,
|
| 80 |
+
secrets=[modal.Secret.from_name("huggingface-secret")],
|
| 81 |
+
)
|
| 82 |
+
def download_model():
|
| 83 |
+
from huggingface_hub import hf_hub_download
|
| 84 |
+
import os
|
| 85 |
+
hf_hub_download(
|
| 86 |
+
repo_id=MODEL_REPO,
|
| 87 |
+
filename=MODEL_FILENAME,
|
| 88 |
+
local_dir=MODEL_DIR,
|
| 89 |
+
)
|
| 90 |
+
model_volume.commit()
|
| 91 |
+
print(f"Downloaded to {MODEL_DIR}/{MODEL_FILENAME}")
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
Run this once: `modal run modal_backend/verifier.py::download_model`
|
| 95 |
+
|
| 96 |
+
### 2.2 Build the llama.cpp image
|
| 97 |
+
|
| 98 |
+
```python
|
| 99 |
+
llama_image = (
|
| 100 |
+
modal.Image.debian_slim(python_version="3.11")
|
| 101 |
+
.apt_install("build-essential", "cmake", "git", "libgomp1")
|
| 102 |
+
.run_commands(
|
| 103 |
+
"git clone https://github.com/ggerganov/llama.cpp /llama.cpp",
|
| 104 |
+
"cd /llama.cpp && cmake -B build -DLLAMA_CURL=OFF && cmake --build build --config Release -j$(nproc)",
|
| 105 |
+
"cd /llama.cpp && pip install -e .",
|
| 106 |
+
)
|
| 107 |
+
.pip_install("llama-cpp-python==0.3.4", "fastapi", "uvicorn")
|
| 108 |
+
)
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
### 2.3 Verifier inference function
|
| 112 |
+
|
| 113 |
+
The verifier receives the speculated code draft and the original prompt, and returns a verdict: `PASS`, `FIX`, or `REWRITE` with corrected code.
|
| 114 |
+
|
| 115 |
+
```python
|
| 116 |
+
@app.cls(
|
| 117 |
+
image=llama_image,
|
| 118 |
+
gpu=modal.gpu.A10G(),
|
| 119 |
+
volumes={MODEL_DIR: model_volume},
|
| 120 |
+
container_idle_timeout=300,
|
| 121 |
+
allow_concurrent_inputs=10,
|
| 122 |
+
)
|
| 123 |
+
class Verifier:
|
| 124 |
+
@modal.enter()
|
| 125 |
+
def load_model(self):
|
| 126 |
+
from llama_cpp import Llama
|
| 127 |
+
self.llm = Llama(
|
| 128 |
+
model_path=f"{MODEL_DIR}/{MODEL_FILENAME}",
|
| 129 |
+
n_gpu_layers=-1, # all layers on GPU
|
| 130 |
+
n_ctx=8192,
|
| 131 |
+
n_batch=512,
|
| 132 |
+
verbose=False,
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
@modal.method()
|
| 136 |
+
def verify(self, prompt: str, draft_code: str, language: str = "python") -> dict:
|
| 137 |
+
system = f"""You are a code verifier. A smaller model drafted the following {language} code.
|
| 138 |
+
Your job:
|
| 139 |
+
1. Check for bugs, logic errors, type errors, off-by-one errors, and security issues.
|
| 140 |
+
2. If the code is correct, respond with exactly: {{"verdict": "PASS"}}
|
| 141 |
+
3. If fixable, respond with: {{"verdict": "FIX", "corrected_code": "<fixed code here>", "reason": "<one line>"}}
|
| 142 |
+
4. If fundamentally wrong, respond with: {{"verdict": "REWRITE", "corrected_code": "<rewritten code>", "reason": "<one line>"}}
|
| 143 |
+
Respond ONLY with valid JSON. No markdown, no explanation outside the JSON."""
|
| 144 |
+
|
| 145 |
+
user = f"Original prompt:\n{prompt}\n\nDrafted code:\n```{language}\n{draft_code}\n```"
|
| 146 |
+
|
| 147 |
+
response = self.llm.create_chat_completion(
|
| 148 |
+
messages=[
|
| 149 |
+
{"role": "system", "content": system},
|
| 150 |
+
{"role": "user", "content": user},
|
| 151 |
+
],
|
| 152 |
+
max_tokens=2048,
|
| 153 |
+
temperature=0.1,
|
| 154 |
+
)
|
| 155 |
+
import json
|
| 156 |
+
raw = response["choices"][0]["message"]["content"].strip()
|
| 157 |
+
try:
|
| 158 |
+
return json.loads(raw)
|
| 159 |
+
except json.JSONDecodeError:
|
| 160 |
+
# Fallback: treat as PASS if we can't parse
|
| 161 |
+
return {"verdict": "PASS"}
|
| 162 |
+
```
|
| 163 |
+
|
| 164 |
+
### 2.4 Modal Sandbox — code execution (optional but impressive)
|
| 165 |
+
|
| 166 |
+
Sandboxed execution confirms the corrected code actually runs. This is what earns you extra credibility in the demo.
|
| 167 |
+
|
| 168 |
+
```python
|
| 169 |
+
# modal_backend/sandbox.py
|
| 170 |
+
import modal
|
| 171 |
+
|
| 172 |
+
app = modal.App("split-brain-sandbox")
|
| 173 |
+
|
| 174 |
+
@app.function(timeout=30)
|
| 175 |
+
def execute_python(code: str) -> dict:
|
| 176 |
+
"""Run untrusted code in a Modal sandbox and return stdout/stderr."""
|
| 177 |
+
sandbox = modal.Sandbox.create(
|
| 178 |
+
"python3", "-c", code,
|
| 179 |
+
image=modal.Image.debian_slim().pip_install("numpy"),
|
| 180 |
+
timeout=10,
|
| 181 |
+
cpu=0.5,
|
| 182 |
+
)
|
| 183 |
+
sandbox.wait()
|
| 184 |
+
return {
|
| 185 |
+
"stdout": sandbox.stdout.read(),
|
| 186 |
+
"stderr": sandbox.stderr.read(),
|
| 187 |
+
"returncode": sandbox.returncode,
|
| 188 |
+
}
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
### 2.5 Deploy the Modal backend
|
| 192 |
+
|
| 193 |
+
```bash
|
| 194 |
+
modal deploy modal_backend/verifier.py
|
| 195 |
+
modal deploy modal_backend/sandbox.py
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
Note the endpoint URLs printed after deploy. You'll need them in `app.py`.
|
| 199 |
+
|
| 200 |
+
Store your Modal token and HF token as Modal secrets:
|
| 201 |
+
```bash
|
| 202 |
+
modal secret create huggingface-secret HF_TOKEN=hf_xxx
|
| 203 |
+
```
|
| 204 |
+
|
| 205 |
+
---
|
| 206 |
+
|
| 207 |
+
## 3. Browser Engine — transformers.js + WebGPU
|
| 208 |
+
|
| 209 |
+
### 3.1 Model choice for the browser
|
| 210 |
+
|
| 211 |
+
Use `Qwen2.5-Coder-1.5B-Instruct` in ONNX/WebGPU format. Xenova and onnx-community maintain these on HF Hub. Target:
|
| 212 |
+
`onnx-community/Qwen2.5-Coder-1.5B-Instruct` with `dtype: "q4"` for fast WebGPU loading (~800MB, fits comfortably in browser VRAM on a modern GPU).
|
| 213 |
+
|
| 214 |
+
### 3.2 engine.js — WebGPU inference
|
| 215 |
+
|
| 216 |
+
```javascript
|
| 217 |
+
// static/engine.js
|
| 218 |
+
import { pipeline, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.0/dist/transformers.min.js";
|
| 219 |
+
|
| 220 |
+
const MODEL_ID = "onnx-community/Qwen2.5-Coder-1.5B-Instruct";
|
| 221 |
+
let generator = null;
|
| 222 |
+
let isLoaded = false;
|
| 223 |
+
|
| 224 |
+
export async function loadModel(onProgress) {
|
| 225 |
+
if (isLoaded) return;
|
| 226 |
+
generator = await pipeline("text-generation", MODEL_ID, {
|
| 227 |
+
dtype: "q4",
|
| 228 |
+
device: "webgpu",
|
| 229 |
+
progress_callback: onProgress,
|
| 230 |
+
});
|
| 231 |
+
isLoaded = true;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
export async function generateCode(prompt, language, onToken, onComplete) {
|
| 235 |
+
if (!generator) throw new Error("Model not loaded");
|
| 236 |
+
|
| 237 |
+
const messages = [
|
| 238 |
+
{
|
| 239 |
+
role: "system",
|
| 240 |
+
content: `You are an expert ${language} programmer. Write clean, correct, production-ready code. Output ONLY the code block, no explanation.`
|
| 241 |
+
},
|
| 242 |
+
{ role: "user", content: prompt }
|
| 243 |
+
];
|
| 244 |
+
|
| 245 |
+
const streamer = new TextStreamer(generator.tokenizer, {
|
| 246 |
+
skip_prompt: true,
|
| 247 |
+
callback_function: (token) => {
|
| 248 |
+
onToken(token);
|
| 249 |
+
},
|
| 250 |
+
});
|
| 251 |
+
|
| 252 |
+
const result = await generator(messages, {
|
| 253 |
+
max_new_tokens: 1024,
|
| 254 |
+
temperature: 0.2,
|
| 255 |
+
do_sample: true,
|
| 256 |
+
streamer,
|
| 257 |
+
});
|
| 258 |
+
|
| 259 |
+
const fullCode = result[0].generated_text.at(-1).content;
|
| 260 |
+
onComplete(fullCode);
|
| 261 |
+
return fullCode;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
export function isWebGPUSupported() {
|
| 265 |
+
return !!navigator.gpu;
|
| 266 |
+
}
|
| 267 |
+
```
|
| 268 |
+
|
| 269 |
+
### 3.3 ui.js — stream rendering + rollback animation
|
| 270 |
+
|
| 271 |
+
```javascript
|
| 272 |
+
// static/ui.js
|
| 273 |
+
|
| 274 |
+
let currentTokens = [];
|
| 275 |
+
let streamBuffer = "";
|
| 276 |
+
|
| 277 |
+
export function initEditor(containerId) {
|
| 278 |
+
// Attach to the Gradio custom HTML component
|
| 279 |
+
const container = document.getElementById(containerId);
|
| 280 |
+
container.innerHTML = `
|
| 281 |
+
<div id="stream-display" class="code-stream"></div>
|
| 282 |
+
<div id="status-bar" class="status-bar">
|
| 283 |
+
<span id="status-text">Ready</span>
|
| 284 |
+
<span id="token-count">0 tok/s</span>
|
| 285 |
+
<span id="verifier-status"></span>
|
| 286 |
+
</div>
|
| 287 |
+
`;
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
export function appendToken(token) {
|
| 291 |
+
streamBuffer += token;
|
| 292 |
+
currentTokens.push(token);
|
| 293 |
+
const display = document.getElementById("stream-display");
|
| 294 |
+
if (display) display.textContent = streamBuffer;
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
export function setStatus(text, type = "neutral") {
|
| 298 |
+
const el = document.getElementById("status-text");
|
| 299 |
+
if (el) {
|
| 300 |
+
el.textContent = text;
|
| 301 |
+
el.className = `status-${type}`;
|
| 302 |
+
}
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
export function setVerifierStatus(verdict) {
|
| 306 |
+
const el = document.getElementById("verifier-status");
|
| 307 |
+
if (!el) return;
|
| 308 |
+
const icons = { PASS: "✅ Verified", FIX: "🔧 Fixed", REWRITE: "🔄 Rewritten", CHECKING: "🔍 Verifying..." };
|
| 309 |
+
el.textContent = icons[verdict] || "";
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
export async function rollbackAndReplace(correctedCode, reason) {
|
| 313 |
+
const display = document.getElementById("stream-display");
|
| 314 |
+
if (!display) return;
|
| 315 |
+
|
| 316 |
+
// Flash red to signal rollback
|
| 317 |
+
display.classList.add("rollback-flash");
|
| 318 |
+
setVerifierStatus("FIX");
|
| 319 |
+
setStatus(`Verifier corrected: ${reason}`, "warning");
|
| 320 |
+
|
| 321 |
+
await sleep(400);
|
| 322 |
+
display.classList.remove("rollback-flash");
|
| 323 |
+
|
| 324 |
+
// Type in corrected code character by character
|
| 325 |
+
display.textContent = "";
|
| 326 |
+
streamBuffer = correctedCode;
|
| 327 |
+
currentTokens = [];
|
| 328 |
+
|
| 329 |
+
for (let i = 0; i < correctedCode.length; i++) {
|
| 330 |
+
display.textContent += correctedCode[i];
|
| 331 |
+
if (i % 5 === 0) await sleep(8); // smooth typewriter
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
setVerifierStatus("PASS");
|
| 335 |
+
setStatus("Ready", "neutral");
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
export function getCurrentCode() {
|
| 339 |
+
return streamBuffer;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
export function reset() {
|
| 343 |
+
streamBuffer = "";
|
| 344 |
+
currentTokens = [];
|
| 345 |
+
const display = document.getElementById("stream-display");
|
| 346 |
+
if (display) display.textContent = "";
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
function sleep(ms) {
|
| 350 |
+
return new Promise(resolve => setTimeout(resolve, ms));
|
| 351 |
+
}
|
| 352 |
+
```
|
| 353 |
+
|
| 354 |
+
### 3.4 style.css — custom UI (Off-Brand badge)
|
| 355 |
+
|
| 356 |
+
```css
|
| 357 |
+
/* static/style.css */
|
| 358 |
+
:root {
|
| 359 |
+
--bg: #0d1117;
|
| 360 |
+
--surface: #161b22;
|
| 361 |
+
--border: #30363d;
|
| 362 |
+
--accent: #58a6ff;
|
| 363 |
+
--accent-warn: #f0883e;
|
| 364 |
+
--text: #e6edf3;
|
| 365 |
+
--text-muted: #8b949e;
|
| 366 |
+
--green: #3fb950;
|
| 367 |
+
--red: #f85149;
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
body { background: var(--bg); color: var(--text); font-family: 'JetBrains Mono', monospace; }
|
| 371 |
+
|
| 372 |
+
.code-stream {
|
| 373 |
+
background: var(--surface);
|
| 374 |
+
border: 1px solid var(--border);
|
| 375 |
+
border-radius: 8px;
|
| 376 |
+
padding: 16px;
|
| 377 |
+
min-height: 300px;
|
| 378 |
+
font-family: 'JetBrains Mono', monospace;
|
| 379 |
+
font-size: 13px;
|
| 380 |
+
line-height: 1.6;
|
| 381 |
+
white-space: pre-wrap;
|
| 382 |
+
overflow-y: auto;
|
| 383 |
+
transition: border-color 0.2s;
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
.rollback-flash {
|
| 387 |
+
border-color: var(--red) !important;
|
| 388 |
+
background: rgba(248, 81, 73, 0.08) !important;
|
| 389 |
+
animation: flash 0.4s ease;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
@keyframes flash {
|
| 393 |
+
0% { background: rgba(248, 81, 73, 0.25); }
|
| 394 |
+
100% { background: rgba(248, 81, 73, 0.08); }
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
.status-bar {
|
| 398 |
+
display: flex;
|
| 399 |
+
justify-content: space-between;
|
| 400 |
+
padding: 8px 12px;
|
| 401 |
+
background: var(--surface);
|
| 402 |
+
border: 1px solid var(--border);
|
| 403 |
+
border-top: none;
|
| 404 |
+
border-radius: 0 0 8px 8px;
|
| 405 |
+
font-size: 12px;
|
| 406 |
+
color: var(--text-muted);
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
.status-warning { color: var(--accent-warn); }
|
| 410 |
+
.status-success { color: var(--green); }
|
| 411 |
+
.status-neutral { color: var(--text-muted); }
|
| 412 |
+
|
| 413 |
+
/* Gradio overrides */
|
| 414 |
+
.gradio-container { background: var(--bg) !important; }
|
| 415 |
+
footer { display: none !important; }
|
| 416 |
+
|
| 417 |
+
/* WebGPU loading bar */
|
| 418 |
+
.loading-bar {
|
| 419 |
+
height: 3px;
|
| 420 |
+
background: var(--border);
|
| 421 |
+
border-radius: 2px;
|
| 422 |
+
overflow: hidden;
|
| 423 |
+
margin: 8px 0;
|
| 424 |
+
}
|
| 425 |
+
.loading-bar-fill {
|
| 426 |
+
height: 100%;
|
| 427 |
+
background: var(--accent);
|
| 428 |
+
transition: width 0.3s ease;
|
| 429 |
+
}
|
| 430 |
+
```
|
| 431 |
+
|
| 432 |
+
---
|
| 433 |
+
|
| 434 |
+
## 4. Gradio App — app.py
|
| 435 |
+
|
| 436 |
+
This is the HF Space entry point. Gradio acts as the shell; the real UI lives in the custom HTML component injected via `gr.HTML`.
|
| 437 |
+
|
| 438 |
+
```python
|
| 439 |
+
# app.py
|
| 440 |
+
import gradio as gr
|
| 441 |
+
import httpx
|
| 442 |
+
import json
|
| 443 |
+
import os
|
| 444 |
+
import asyncio
|
| 445 |
+
from pathlib import Path
|
| 446 |
+
|
| 447 |
+
MODAL_VERIFIER_URL = os.environ.get("MODAL_VERIFIER_URL") # set as HF Space secret
|
| 448 |
+
MODAL_SANDBOX_URL = os.environ.get("MODAL_SANDBOX_URL") # set as HF Space secret
|
| 449 |
+
|
| 450 |
+
LANGUAGES = ["Python", "JavaScript", "TypeScript", "Rust", "Go", "C++"]
|
| 451 |
+
|
| 452 |
+
def load_static(filename):
|
| 453 |
+
return Path(f"static/{filename}").read_text()
|
| 454 |
+
|
| 455 |
+
custom_html = f"""
|
| 456 |
+
<!DOCTYPE html>
|
| 457 |
+
<html>
|
| 458 |
+
<head>
|
| 459 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 460 |
+
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
|
| 461 |
+
<style>{load_static('style.css')}</style>
|
| 462 |
+
</head>
|
| 463 |
+
<body>
|
| 464 |
+
<div id="split-brain-root">
|
| 465 |
+
<div class="webgpu-notice" id="webgpu-warning" style="display:none; color:#f85149; padding:8px; border:1px solid #f85149; border-radius:6px; margin-bottom:12px;">
|
| 466 |
+
⚠️ WebGPU not detected. Please use Chrome 113+ on desktop for local inference.
|
| 467 |
+
</div>
|
| 468 |
+
<div id="load-section">
|
| 469 |
+
<button id="load-btn" onclick="initEngine()">⚡ Load 1.5B Model (WebGPU)</button>
|
| 470 |
+
<div class="loading-bar"><div class="loading-bar-fill" id="load-progress" style="width:0%"></div></div>
|
| 471 |
+
<span id="load-status" style="font-size:12px; color:#8b949e;"></span>
|
| 472 |
+
</div>
|
| 473 |
+
<div id="stream-display" class="code-stream" style="margin-top:12px;">Waiting for model load...</div>
|
| 474 |
+
<div class="status-bar">
|
| 475 |
+
<span id="status-text">Idle</span>
|
| 476 |
+
<span id="token-count"></span>
|
| 477 |
+
<span id="verifier-status"></span>
|
| 478 |
+
</div>
|
| 479 |
+
</div>
|
| 480 |
+
<script type="module">
|
| 481 |
+
{load_static('engine.js')}
|
| 482 |
+
{load_static('ui.js')}
|
| 483 |
+
|
| 484 |
+
// Check WebGPU on load
|
| 485 |
+
if (!isWebGPUSupported()) {{
|
| 486 |
+
document.getElementById('webgpu-warning').style.display = 'block';
|
| 487 |
+
document.getElementById('load-btn').disabled = true;
|
| 488 |
+
}}
|
| 489 |
+
|
| 490 |
+
window.initEngine = async function() {{
|
| 491 |
+
document.getElementById('load-btn').disabled = true;
|
| 492 |
+
document.getElementById('load-status').textContent = 'Loading model weights...';
|
| 493 |
+
await loadModel((progress) => {{
|
| 494 |
+
if (progress.progress) {{
|
| 495 |
+
document.getElementById('load-progress').style.width = progress.progress + '%';
|
| 496 |
+
document.getElementById('load-status').textContent = `${{progress.file || 'Loading'}} — ${{Math.round(progress.progress)}}%`;
|
| 497 |
+
}}
|
| 498 |
+
}});
|
| 499 |
+
document.getElementById('load-status').textContent = '✅ Model ready — WebGPU active';
|
| 500 |
+
document.getElementById('load-section').style.opacity = '0.5';
|
| 501 |
+
}};
|
| 502 |
+
|
| 503 |
+
// Gradio will call this via the hidden trigger
|
| 504 |
+
window.runLocalGeneration = async function(prompt, language) {{
|
| 505 |
+
reset();
|
| 506 |
+
setStatus('Generating locally (WebGPU)...', 'neutral');
|
| 507 |
+
|
| 508 |
+
let tokenCount = 0;
|
| 509 |
+
const startTime = Date.now();
|
| 510 |
+
|
| 511 |
+
const fullCode = await generateCode(prompt, language,
|
| 512 |
+
(token) => {{
|
| 513 |
+
appendToken(token);
|
| 514 |
+
tokenCount++;
|
| 515 |
+
const elapsed = (Date.now() - startTime) / 1000;
|
| 516 |
+
const tps = Math.round(tokenCount / elapsed);
|
| 517 |
+
document.getElementById('token-count').textContent = `${{tps}} tok/s`;
|
| 518 |
+
}},
|
| 519 |
+
(code) => {{
|
| 520 |
+
setStatus('Local generation complete. Verifying...', 'neutral');
|
| 521 |
+
}}
|
| 522 |
+
);
|
| 523 |
+
|
| 524 |
+
// Send to Gradio backend for verification
|
| 525 |
+
// Use the hidden Gradio state to trigger the verify function
|
| 526 |
+
document.getElementById('draft-output-hidden').value = fullCode;
|
| 527 |
+
document.getElementById('trigger-verify-btn').click();
|
| 528 |
+
}};
|
| 529 |
+
|
| 530 |
+
window.applyVerification = function(verdictJson) {{
|
| 531 |
+
const verdict = JSON.parse(verdictJson);
|
| 532 |
+
if (verdict.verdict === 'PASS') {{
|
| 533 |
+
setVerifierStatus('PASS');
|
| 534 |
+
setStatus('✅ Verified clean', 'success');
|
| 535 |
+
}} else {{
|
| 536 |
+
rollbackAndReplace(verdict.corrected_code, verdict.reason);
|
| 537 |
+
}}
|
| 538 |
+
}};
|
| 539 |
+
</script>
|
| 540 |
+
</body>
|
| 541 |
+
</html>
|
| 542 |
+
"""
|
| 543 |
+
|
| 544 |
+
async def verify_with_modal(prompt: str, draft_code: str, language: str) -> str:
|
| 545 |
+
"""Call Modal verifier endpoint and return JSON string."""
|
| 546 |
+
if not MODAL_VERIFIER_URL:
|
| 547 |
+
return json.dumps({"verdict": "PASS"})
|
| 548 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 549 |
+
resp = await client.post(
|
| 550 |
+
MODAL_VERIFIER_URL,
|
| 551 |
+
json={"prompt": prompt, "draft_code": draft_code, "language": language},
|
| 552 |
+
)
|
| 553 |
+
resp.raise_for_status()
|
| 554 |
+
return resp.text
|
| 555 |
+
|
| 556 |
+
async def execute_in_sandbox(code: str) -> dict:
|
| 557 |
+
"""Call Modal sandbox and return execution result."""
|
| 558 |
+
if not MODAL_SANDBOX_URL:
|
| 559 |
+
return {"stdout": "", "stderr": "Sandbox not configured", "returncode": -1}
|
| 560 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 561 |
+
resp = await client.post(MODAL_SANDBOX_URL, json={"code": code})
|
| 562 |
+
return resp.json()
|
| 563 |
+
|
| 564 |
+
with gr.Blocks(
|
| 565 |
+
title="Split-Brain Co-Pilot",
|
| 566 |
+
css="footer {display:none}",
|
| 567 |
+
theme=gr.themes.Base(
|
| 568 |
+
primary_hue="blue",
|
| 569 |
+
neutral_hue="slate",
|
| 570 |
+
),
|
| 571 |
+
) as demo:
|
| 572 |
+
gr.HTML("<h1 style='text-align:center; color:#58a6ff;'>⚡ Split-Brain Co-Pilot</h1>")
|
| 573 |
+
gr.HTML("<p style='text-align:center; color:#8b949e;'>1.5B model runs in your browser (WebGPU). 14B model on Modal verifies and corrects.</p>")
|
| 574 |
+
|
| 575 |
+
with gr.Row():
|
| 576 |
+
with gr.Column(scale=2):
|
| 577 |
+
prompt_input = gr.Textbox(
|
| 578 |
+
label="What do you want to build?",
|
| 579 |
+
placeholder="e.g. A function that parses a CSV and returns the top 5 rows by a given column",
|
| 580 |
+
lines=3,
|
| 581 |
+
)
|
| 582 |
+
language_select = gr.Dropdown(
|
| 583 |
+
choices=LANGUAGES,
|
| 584 |
+
value="Python",
|
| 585 |
+
label="Language",
|
| 586 |
+
)
|
| 587 |
+
generate_btn = gr.Button("⚡ Generate (WebGPU → Verify)", variant="primary")
|
| 588 |
+
|
| 589 |
+
with gr.Column(scale=3):
|
| 590 |
+
# Custom HTML component for streaming display
|
| 591 |
+
gr.HTML(custom_html)
|
| 592 |
+
|
| 593 |
+
# Hidden elements for JS ↔ Gradio bridge
|
| 594 |
+
draft_hidden = gr.Textbox(visible=False, elem_id="draft-output-hidden")
|
| 595 |
+
verify_trigger = gr.Button("verify", visible=False, elem_id="trigger-verify-btn")
|
| 596 |
+
verdict_output = gr.Textbox(visible=False, label="verdict")
|
| 597 |
+
|
| 598 |
+
with gr.Row():
|
| 599 |
+
sandbox_output = gr.Code(label="Sandbox Execution Output", language="python", visible=False)
|
| 600 |
+
|
| 601 |
+
# Gradio event: user clicks Generate → JS takes over for local inference
|
| 602 |
+
generate_btn.click(
|
| 603 |
+
fn=None,
|
| 604 |
+
inputs=[prompt_input, language_select],
|
| 605 |
+
outputs=[],
|
| 606 |
+
js="(prompt, lang) => { window.runLocalGeneration(prompt, lang); return []; }",
|
| 607 |
+
)
|
| 608 |
+
|
| 609 |
+
# Gradio event: JS triggers verify after local generation completes
|
| 610 |
+
async def run_verification(prompt, draft_code, language):
|
| 611 |
+
verdict_json = await verify_with_modal(prompt, draft_code, language)
|
| 612 |
+
return verdict_json
|
| 613 |
+
|
| 614 |
+
verify_trigger.click(
|
| 615 |
+
fn=run_verification,
|
| 616 |
+
inputs=[prompt_input, draft_hidden, language_select],
|
| 617 |
+
outputs=[verdict_output],
|
| 618 |
+
)
|
| 619 |
+
|
| 620 |
+
# Apply verdict back to JS
|
| 621 |
+
verdict_output.change(
|
| 622 |
+
fn=None,
|
| 623 |
+
inputs=[verdict_output],
|
| 624 |
+
outputs=[],
|
| 625 |
+
js="(verdict) => { window.applyVerification(verdict); return []; }",
|
| 626 |
+
)
|
| 627 |
+
|
| 628 |
+
if __name__ == "__main__":
|
| 629 |
+
demo.launch()
|
| 630 |
+
```
|
| 631 |
+
|
| 632 |
+
---
|
| 633 |
+
|
| 634 |
+
## 5. Modal Web Endpoint Wrapper
|
| 635 |
+
|
| 636 |
+
The Modal functions need to be exposed as HTTP endpoints that `app.py` can call via httpx. Add this to `verifier.py`:
|
| 637 |
+
|
| 638 |
+
```python
|
| 639 |
+
from fastapi import FastAPI
|
| 640 |
+
from pydantic import BaseModel
|
| 641 |
+
|
| 642 |
+
web_app = FastAPI()
|
| 643 |
+
|
| 644 |
+
class VerifyRequest(BaseModel):
|
| 645 |
+
prompt: str
|
| 646 |
+
draft_code: str
|
| 647 |
+
language: str = "python"
|
| 648 |
+
|
| 649 |
+
@app.function(
|
| 650 |
+
image=llama_image,
|
| 651 |
+
gpu=modal.gpu.A10G(),
|
| 652 |
+
volumes={MODEL_DIR: model_volume},
|
| 653 |
+
container_idle_timeout=300,
|
| 654 |
+
)
|
| 655 |
+
@modal.asgi_app()
|
| 656 |
+
def verifier_endpoint():
|
| 657 |
+
verifier = Verifier()
|
| 658 |
+
|
| 659 |
+
@web_app.post("/verify")
|
| 660 |
+
async def verify(req: VerifyRequest):
|
| 661 |
+
result = verifier.verify.remote(req.prompt, req.draft_code, req.language)
|
| 662 |
+
return result
|
| 663 |
+
|
| 664 |
+
return web_app
|
| 665 |
+
```
|
| 666 |
+
|
| 667 |
+
After deploying, Modal gives you a URL like `https://your-username--split-brain-verifier-verifier-endpoint.modal.run`. Set this as the HF Space secret `MODAL_VERIFIER_URL`.
|
| 668 |
+
|
| 669 |
+
---
|
| 670 |
+
|
| 671 |
+
## 6. HF Space Configuration
|
| 672 |
+
|
| 673 |
+
### 6.1 README.md (Space card)
|
| 674 |
+
|
| 675 |
+
```yaml
|
| 676 |
+
---
|
| 677 |
+
title: Split-Brain Co-Pilot
|
| 678 |
+
emoji: ⚡
|
| 679 |
+
colorFrom: blue
|
| 680 |
+
colorTo: indigo
|
| 681 |
+
sdk: gradio
|
| 682 |
+
sdk_version: 5.30.0
|
| 683 |
+
app_file: app.py
|
| 684 |
+
pinned: true
|
| 685 |
+
license: apache-2.0
|
| 686 |
+
tags:
|
| 687 |
+
- code-generation
|
| 688 |
+
- webgpu
|
| 689 |
+
- speculative-decoding
|
| 690 |
+
- llama.cpp
|
| 691 |
+
- local-first
|
| 692 |
+
---
|
| 693 |
+
```
|
| 694 |
+
|
| 695 |
+
### 6.2 requirements.txt
|
| 696 |
+
|
| 697 |
+
```
|
| 698 |
+
gradio==5.30.0
|
| 699 |
+
httpx==0.27.0
|
| 700 |
+
modal==0.73.0
|
| 701 |
+
huggingface-hub==0.23.0
|
| 702 |
+
```
|
| 703 |
+
|
| 704 |
+
### 6.3 HF Space secrets
|
| 705 |
+
|
| 706 |
+
Set these in the Space Settings → Repository secrets:
|
| 707 |
+
|
| 708 |
+
| Secret name | Value |
|
| 709 |
+
|---|---|
|
| 710 |
+
| `MODAL_VERIFIER_URL` | Your Modal verifier endpoint URL |
|
| 711 |
+
| `MODAL_SANDBOX_URL` | Your Modal sandbox endpoint URL |
|
| 712 |
+
| `MODAL_TOKEN_ID` | From `modal token show` |
|
| 713 |
+
| `MODAL_TOKEN_SECRET` | From `modal token show` |
|
| 714 |
+
|
| 715 |
+
---
|
| 716 |
+
|
| 717 |
+
## 7. Cold Start Mitigation
|
| 718 |
+
|
| 719 |
+
Modal A10G containers take 10–40 seconds to cold start. Handle this gracefully:
|
| 720 |
+
|
| 721 |
+
In `verify_with_modal`, add a keep-warm ping. Add this to `verifier.py`:
|
| 722 |
+
|
| 723 |
+
```python
|
| 724 |
+
@app.function(schedule=modal.Cron("*/5 * * * *"))
|
| 725 |
+
def keep_warm():
|
| 726 |
+
"""Ping the verifier every 5 minutes to avoid cold starts during the demo window."""
|
| 727 |
+
Verifier().verify.remote("test", "print('hello')", "python")
|
| 728 |
+
```
|
| 729 |
+
|
| 730 |
+
Deploy this separately: `modal deploy modal_backend/verifier.py`
|
| 731 |
+
|
| 732 |
+
In the UI, show "Verifier warming up..." in the status bar while the first request is in flight and display a spinner. Do not let the UI appear broken during cold start.
|
| 733 |
+
|
| 734 |
+
---
|
| 735 |
+
|
| 736 |
+
## 8. Demo Video Script
|
| 737 |
+
|
| 738 |
+
The demo video is a submission requirement. Plan it around these beats:
|
| 739 |
+
|
| 740 |
+
- Open Chrome, show the app. Explain the split-brain concept in one sentence.
|
| 741 |
+
- Click "Load 1.5B Model" — show the WebGPU loading progress bar.
|
| 742 |
+
- Type a non-trivial prompt: "Write a Python function that finds all prime numbers up to n using a segmented sieve, handling edge cases."
|
| 743 |
+
- Hit Generate — show tokens streaming at 80–120 tok/s with the token counter live.
|
| 744 |
+
- Show the "Verifying..." status kick in immediately after local generation completes.
|
| 745 |
+
- If the verifier returns FIX or REWRITE: show the red flash rollback animation and the corrected code typing in.
|
| 746 |
+
- Show the sandbox execution output (stdout) confirming the corrected code runs.
|
| 747 |
+
- End on the split status bar: "Local: WebGPU · Cloud: Modal A10G · Verdict: ✅ Verified"
|
| 748 |
+
|
| 749 |
+
Keep the video under 3 minutes. Record with OBS or Loom. No cuts during the generation — the live stream is the point.
|
| 750 |
+
|
| 751 |
+
---
|
| 752 |
+
|
| 753 |
+
## 9. Bonus Badge Checklist
|
| 754 |
+
|
| 755 |
+
| Badge | How you earn it | Status |
|
| 756 |
+
|---|---|---|
|
| 757 |
+
| **Off the Grid** | 1.5B runs 100% in browser, no cloud API for inference | ✅ Automatic |
|
| 758 |
+
| **Llama Champion** | 14B served via llama.cpp on Modal | ✅ Automatic |
|
| 759 |
+
| **Off-Brand** | Custom dark theme, rollback animation, token counter, status bar | ✅ Build it |
|
| 760 |
+
| **Field Notes** | Write a blog post on HF or Dev.to explaining the speculative split-brain architecture | ✅ Write it post-build |
|
| 761 |
+
|
| 762 |
+
---
|
| 763 |
+
|
| 764 |
+
## 10. Submission Checklist
|
| 765 |
+
|
| 766 |
+
Before June 15 deadline:
|
| 767 |
+
|
| 768 |
+
- [ ] Modal verifier deployed and endpoint URL confirmed working
|
| 769 |
+
- [ ] HF Space live and publicly accessible under `build-small-hackathon` org
|
| 770 |
+
- [ ] WebGPU model loads in Chrome without errors
|
| 771 |
+
- [ ] Token streaming visible in UI
|
| 772 |
+
- [ ] Rollback animation triggers on at least one FIX/REWRITE verdict
|
| 773 |
+
- [ ] Sandbox execution output shown in demo
|
| 774 |
+
- [ ] Demo video recorded and uploaded (YouTube unlisted or HF)
|
| 775 |
+
- [ ] Social media post published (Twitter/X or LinkedIn) with Space link and demo video
|
| 776 |
+
- [ ] README.md Space card complete with description, tags, and video embed
|
| 777 |
+
- [ ] Field Notes blog post published and linked in README
|
| 778 |
+
|
| 779 |
+
---
|
| 780 |
+
|
| 781 |
+
## 11. Known Gotchas
|
| 782 |
+
|
| 783 |
+
**WebGPU VRAM:** The 1.5B Q4 ONNX model needs ~1GB VRAM. On machines with integrated graphics sharing system RAM, this works but may be slow. Document the Chrome + dedicated GPU requirement.
|
| 784 |
+
|
| 785 |
+
**CORS:** Modal's ASGI endpoints allow cross-origin by default, but if you hit CORS errors in the browser JS, add `fastapi.middleware.cors.CORSMiddleware` to the web_app with `allow_origins=["*"]`.
|
| 786 |
+
|
| 787 |
+
**transformers.js version:** Pin to `3.5.x`. Breaking changes in 3.x are frequent. The CDN import in `engine.js` uses the pinned version — don't use `@latest`.
|
| 788 |
+
|
| 789 |
+
**Gradio JS bridge:** The `gr.Button(visible=False)` trigger pattern is the cleanest way to fire a Python function from browser JS in Gradio 5.x without websocket hacks. Do not use `gr.Request` for this — it won't work from inside a custom HTML block.
|
| 790 |
+
|
| 791 |
+
**Modal Volume first deploy:** The volume download must complete before the verifier function can load the model. Run `download_model` manually once and confirm with `modal volume ls qwen-14b-volume /models` before deploying the endpoint.
|
| 792 |
+
|
| 793 |
+
**HF Space cold start:** HF Spaces themselves also cold start. If the Space hasn't been visited recently, Gradio takes 20–30 seconds to boot. Add a loading spinner at the Gradio level using `gr.HTML` with a brief "Space initializing..." message that auto-hides once the page is interactive.
|
README.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Split-Brain Co-Pilot
|
| 3 |
+
emoji: ⚡
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.30.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
tags:
|
| 12 |
+
- code-generation
|
| 13 |
+
- webgpu
|
| 14 |
+
- speculative-decoding
|
| 15 |
+
- llama.cpp
|
| 16 |
+
- local-first
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
# Split-Brain Co-Pilot
|
| 20 |
+
|
| 21 |
+
A speculative coding assistant for the Build Small Hackathon: a 1.5B code model drafts locally in Chrome with WebGPU, while a 14B Qwen verifier on Modal checks the result in the background. When the verifier catches a problem, the UI flashes, rolls back, and types in the corrected cloud block.
|
| 22 |
+
|
| 23 |
+
## Architecture
|
| 24 |
+
|
| 25 |
+
- Local brain: `onnx-community/Qwen2.5-Coder-1.5B-Instruct` through transformers.js `3.5.x`, WebGPU, Q4 weights.
|
| 26 |
+
- Cloud brain: `bartowski/Qwen2.5-Coder-14B-Instruct-GGUF` (`Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf`) served on Modal A10G through llama.cpp.
|
| 27 |
+
- Shell: Gradio 5 Space with a custom HTML/CSS/JS streaming surface.
|
| 28 |
+
- Optional proof step: Modal sandbox execution endpoint for generated Python code.
|
| 29 |
+
|
| 30 |
+
## Requirements
|
| 31 |
+
|
| 32 |
+
Use Chrome 113+ on desktop. Firefox and Safari do not currently support the WebGPU path this demo needs. The browser model needs roughly 1 GB of available GPU memory, so dedicated GPU machines will feel much better than older integrated graphics.
|
| 33 |
+
|
| 34 |
+
## Local Run
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
python3 -m venv .venv
|
| 38 |
+
source .venv/bin/activate
|
| 39 |
+
pip install -r requirements.txt
|
| 40 |
+
python app.py
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
Without `MODAL_VERIFIER_URL`, the app uses a PASS fallback so the WebGPU UI can be tested locally.
|
| 44 |
+
|
| 45 |
+
Copy `.env.example` to `.env` for local secrets. The `.env` file is ignored by git.
|
| 46 |
+
|
| 47 |
+
## Modal Setup
|
| 48 |
+
|
| 49 |
+
Install and authenticate the Modal CLI:
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
pip install modal
|
| 53 |
+
modal token new
|
| 54 |
+
modal secret create huggingface-secret HF_TOKEN=hf_xxx
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
Download the 14B GGUF model into the persistent volume once:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
modal run modal_backend/verifier.py::download_model
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
Deploy the verifier and sandbox:
|
| 64 |
+
|
| 65 |
+
```bash
|
| 66 |
+
modal deploy modal_backend/verifier.py
|
| 67 |
+
modal deploy modal_backend/sandbox.py
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
Set these Space secrets after deploy:
|
| 71 |
+
|
| 72 |
+
| Secret | Value |
|
| 73 |
+
| --- | --- |
|
| 74 |
+
| `MODAL_VERIFIER_URL` | Modal verifier endpoint URL, with or without `/verify` |
|
| 75 |
+
| `MODAL_SANDBOX_URL` | Modal sandbox endpoint URL, with or without `/execute` |
|
| 76 |
+
| `MODAL_TOKEN_ID` | From `modal token show` |
|
| 77 |
+
| `MODAL_TOKEN_SECRET` | From `modal token show` |
|
| 78 |
+
|
| 79 |
+
This project uses `modal==1.4.3`; older `0.73.x` clients are now rejected by Modal as deprecated.
|
| 80 |
+
|
| 81 |
+
## Demo Beat
|
| 82 |
+
|
| 83 |
+
Prompt idea: "Write a Python function that finds all prime numbers up to n using a segmented sieve, handling edge cases."
|
| 84 |
+
|
| 85 |
+
Show the model loading bar, token streaming, verifier status, rollback animation on a FIX/REWRITE verdict, and the final verified state.
|
| 86 |
+
|
| 87 |
+
## Badge Targets
|
| 88 |
+
|
| 89 |
+
- Off the Grid: local 1.5B browser inference.
|
| 90 |
+
- Llama Champion: 14B llama.cpp verifier on Modal.
|
| 91 |
+
- Off-Brand: custom UI, rollback flash, status bar, token counter.
|
| 92 |
+
- Field Notes: publish a post-build architecture writeup and link it here.
|
app.py
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import httpx
|
| 7 |
+
|
| 8 |
+
LANGUAGES = ["Python", "JavaScript", "TypeScript", "Rust", "Go", "C++"]
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def load_local_env() -> None:
|
| 12 |
+
env_path = Path(".env")
|
| 13 |
+
if not env_path.exists():
|
| 14 |
+
return
|
| 15 |
+
|
| 16 |
+
for line in env_path.read_text(encoding="utf-8").splitlines():
|
| 17 |
+
if not line or line.startswith("#") or "=" not in line:
|
| 18 |
+
continue
|
| 19 |
+
key, value = line.split("=", 1)
|
| 20 |
+
os.environ.setdefault(key.strip(), value.strip())
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
load_local_env()
|
| 24 |
+
MODAL_VERIFIER_URL = os.environ.get("MODAL_VERIFIER_URL")
|
| 25 |
+
MODAL_SANDBOX_URL = os.environ.get("MODAL_SANDBOX_URL")
|
| 26 |
+
os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def load_static(filename: str) -> str:
|
| 30 |
+
return Path("static", filename).read_text(encoding="utf-8")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def endpoint_url(url: str | None, path: str) -> str | None:
|
| 34 |
+
if not url:
|
| 35 |
+
return None
|
| 36 |
+
clean = url.rstrip("/")
|
| 37 |
+
if clean.endswith(path):
|
| 38 |
+
return clean
|
| 39 |
+
return f"{clean}{path}"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
custom_html = f"""
|
| 43 |
+
<div id="split-brain-root">
|
| 44 |
+
<div class="split-topline">
|
| 45 |
+
<span>Local: WebGPU 1.5B</span>
|
| 46 |
+
<span>Cloud: Modal A10G 14B</span>
|
| 47 |
+
</div>
|
| 48 |
+
<div class="webgpu-notice" id="webgpu-warning" hidden>
|
| 49 |
+
WebGPU not detected. Use Chrome 113+ on desktop for local inference.
|
| 50 |
+
</div>
|
| 51 |
+
<div id="load-section" class="load-section">
|
| 52 |
+
<button id="load-btn" class="local-button" onclick="window.initEngine()">Load 1.5B Model</button>
|
| 53 |
+
<div class="loading-bar"><div class="loading-bar-fill" id="load-progress"></div></div>
|
| 54 |
+
<span id="load-status" class="load-status">Model not loaded</span>
|
| 55 |
+
</div>
|
| 56 |
+
<pre id="stream-display" class="code-stream">Waiting for model load...</pre>
|
| 57 |
+
<div class="status-bar">
|
| 58 |
+
<span id="status-text">Idle</span>
|
| 59 |
+
<span id="token-count">0 tok/s</span>
|
| 60 |
+
<span id="verifier-status">Verifier idle</span>
|
| 61 |
+
</div>
|
| 62 |
+
</div>
|
| 63 |
+
<script type="module">
|
| 64 |
+
{load_static("engine.js")}
|
| 65 |
+
{load_static("ui.js")}
|
| 66 |
+
|
| 67 |
+
const warning = document.getElementById("webgpu-warning");
|
| 68 |
+
const loadButton = document.getElementById("load-btn");
|
| 69 |
+
|
| 70 |
+
function findGradioInput(id) {{
|
| 71 |
+
const root = document.getElementById(id);
|
| 72 |
+
if (!root) return null;
|
| 73 |
+
if (root.matches("input, textarea")) return root;
|
| 74 |
+
return root.querySelector("input, textarea");
|
| 75 |
+
}}
|
| 76 |
+
|
| 77 |
+
function findGradioButton(id) {{
|
| 78 |
+
const root = document.getElementById(id);
|
| 79 |
+
if (!root) return null;
|
| 80 |
+
if (root.matches("button")) return root;
|
| 81 |
+
return root.querySelector("button");
|
| 82 |
+
}}
|
| 83 |
+
|
| 84 |
+
if (!isWebGPUSupported()) {{
|
| 85 |
+
warning.hidden = false;
|
| 86 |
+
loadButton.disabled = true;
|
| 87 |
+
setStatus("Chrome 113+ with WebGPU required", "warning");
|
| 88 |
+
}}
|
| 89 |
+
|
| 90 |
+
window.initEngine = async function() {{
|
| 91 |
+
loadButton.disabled = true;
|
| 92 |
+
document.getElementById("load-status").textContent = "Loading model weights...";
|
| 93 |
+
try {{
|
| 94 |
+
await loadModel((progress) => {{
|
| 95 |
+
const value = progress.progress ? Math.round(progress.progress) : 0;
|
| 96 |
+
document.getElementById("load-progress").style.width = `${{value}}%`;
|
| 97 |
+
if (progress.file) {{
|
| 98 |
+
document.getElementById("load-status").textContent = `${{progress.file}} - ${{value}}%`;
|
| 99 |
+
}}
|
| 100 |
+
}});
|
| 101 |
+
document.getElementById("load-progress").style.width = "100%";
|
| 102 |
+
document.getElementById("load-status").textContent = "Model ready - WebGPU active";
|
| 103 |
+
document.getElementById("load-section").classList.add("loaded");
|
| 104 |
+
setStatus("Ready", "success");
|
| 105 |
+
}} catch (error) {{
|
| 106 |
+
loadButton.disabled = false;
|
| 107 |
+
setStatus(`Model load failed: ${{error.message}}`, "warning");
|
| 108 |
+
document.getElementById("load-status").textContent = "Load failed";
|
| 109 |
+
}}
|
| 110 |
+
}};
|
| 111 |
+
|
| 112 |
+
window.runLocalGeneration = async function(prompt, language) {{
|
| 113 |
+
if (!prompt || !prompt.trim()) {{
|
| 114 |
+
setStatus("Enter a prompt first", "warning");
|
| 115 |
+
return [];
|
| 116 |
+
}}
|
| 117 |
+
|
| 118 |
+
reset();
|
| 119 |
+
setVerifierStatus("IDLE");
|
| 120 |
+
setStatus("Generating locally (WebGPU)...", "neutral");
|
| 121 |
+
|
| 122 |
+
let tokenCount = 0;
|
| 123 |
+
const startTime = Date.now();
|
| 124 |
+
|
| 125 |
+
try {{
|
| 126 |
+
const fullCode = await generateCode(
|
| 127 |
+
prompt,
|
| 128 |
+
language,
|
| 129 |
+
(token) => {{
|
| 130 |
+
appendToken(token);
|
| 131 |
+
tokenCount += 1;
|
| 132 |
+
const elapsed = Math.max((Date.now() - startTime) / 1000, 0.1);
|
| 133 |
+
document.getElementById("token-count").textContent = `${{Math.round(tokenCount / elapsed)}} tok/s`;
|
| 134 |
+
}},
|
| 135 |
+
() => {{
|
| 136 |
+
setStatus("Local generation complete. Verifier warming up...", "neutral");
|
| 137 |
+
setVerifierStatus("CHECKING");
|
| 138 |
+
}}
|
| 139 |
+
);
|
| 140 |
+
|
| 141 |
+
const hidden = findGradioInput("draft-output-hidden");
|
| 142 |
+
const trigger = findGradioButton("trigger-verify-btn");
|
| 143 |
+
if (!hidden || !trigger) {{
|
| 144 |
+
setStatus("Gradio verification bridge not ready", "warning");
|
| 145 |
+
return [];
|
| 146 |
+
}}
|
| 147 |
+
|
| 148 |
+
hidden.value = fullCode;
|
| 149 |
+
hidden.dispatchEvent(new Event("input", {{ bubbles: true }}));
|
| 150 |
+
trigger.click();
|
| 151 |
+
}} catch (error) {{
|
| 152 |
+
setStatus(`Generation failed: ${{error.message}}`, "warning");
|
| 153 |
+
}}
|
| 154 |
+
return [];
|
| 155 |
+
}};
|
| 156 |
+
|
| 157 |
+
window.applyVerification = function(verdictJson) {{
|
| 158 |
+
if (!verdictJson) return [];
|
| 159 |
+
let verdict;
|
| 160 |
+
try {{
|
| 161 |
+
verdict = JSON.parse(verdictJson);
|
| 162 |
+
}} catch (error) {{
|
| 163 |
+
setStatus("Verifier returned invalid JSON", "warning");
|
| 164 |
+
return [];
|
| 165 |
+
}}
|
| 166 |
+
|
| 167 |
+
if (verdict.verdict === "PASS") {{
|
| 168 |
+
setVerifierStatus("PASS");
|
| 169 |
+
setStatus("Verified clean", "success");
|
| 170 |
+
}} else {{
|
| 171 |
+
rollbackAndReplace(verdict.corrected_code || "", verdict.reason || "Verifier supplied a correction", verdict.verdict);
|
| 172 |
+
}}
|
| 173 |
+
return [];
|
| 174 |
+
}};
|
| 175 |
+
</script>
|
| 176 |
+
"""
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
async def verify_with_modal(prompt: str, draft_code: str, language: str) -> str:
|
| 180 |
+
verifier_url = endpoint_url(MODAL_VERIFIER_URL, "/verify")
|
| 181 |
+
if not verifier_url:
|
| 182 |
+
return json.dumps(
|
| 183 |
+
{
|
| 184 |
+
"verdict": "PASS",
|
| 185 |
+
"reason": "MODAL_VERIFIER_URL is not configured; local demo fallback used.",
|
| 186 |
+
}
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
async with httpx.AsyncClient(timeout=90.0) as client:
|
| 190 |
+
response = await client.post(
|
| 191 |
+
verifier_url,
|
| 192 |
+
json={"prompt": prompt, "draft_code": draft_code, "language": language.lower()},
|
| 193 |
+
)
|
| 194 |
+
response.raise_for_status()
|
| 195 |
+
return response.text
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
async def execute_in_sandbox(code: str) -> dict:
|
| 199 |
+
sandbox_url = endpoint_url(MODAL_SANDBOX_URL, "/execute")
|
| 200 |
+
if not sandbox_url:
|
| 201 |
+
return {"stdout": "", "stderr": "Sandbox not configured", "returncode": -1}
|
| 202 |
+
|
| 203 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 204 |
+
response = await client.post(sandbox_url, json={"code": code})
|
| 205 |
+
response.raise_for_status()
|
| 206 |
+
return response.json()
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def code_from_verdict(draft_code: str, verdict_json: str) -> str:
|
| 210 |
+
if not verdict_json:
|
| 211 |
+
return draft_code
|
| 212 |
+
try:
|
| 213 |
+
verdict = json.loads(verdict_json)
|
| 214 |
+
except json.JSONDecodeError:
|
| 215 |
+
return draft_code
|
| 216 |
+
return verdict.get("corrected_code") or draft_code
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
async def run_sandbox(language: str, draft_code: str, verdict_json: str) -> str:
|
| 220 |
+
if language.lower() != "python":
|
| 221 |
+
return "Sandbox execution is currently wired for Python only."
|
| 222 |
+
|
| 223 |
+
code = code_from_verdict(draft_code, verdict_json)
|
| 224 |
+
if not code.strip():
|
| 225 |
+
return "No generated code is available yet."
|
| 226 |
+
|
| 227 |
+
result = await execute_in_sandbox(code)
|
| 228 |
+
stdout = result.get("stdout", "")
|
| 229 |
+
stderr = result.get("stderr", "")
|
| 230 |
+
returncode = result.get("returncode", "")
|
| 231 |
+
return "\n".join(
|
| 232 |
+
[
|
| 233 |
+
f"returncode: {returncode}",
|
| 234 |
+
"",
|
| 235 |
+
"stdout:",
|
| 236 |
+
stdout or "<empty>",
|
| 237 |
+
"",
|
| 238 |
+
"stderr:",
|
| 239 |
+
stderr or "<empty>",
|
| 240 |
+
]
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
with gr.Blocks(
|
| 245 |
+
title="Split-Brain Co-Pilot",
|
| 246 |
+
css=load_static("style.css"),
|
| 247 |
+
theme=gr.themes.Base(primary_hue="blue", neutral_hue="slate"),
|
| 248 |
+
) as demo:
|
| 249 |
+
gr.HTML(
|
| 250 |
+
"""
|
| 251 |
+
<section class="app-header">
|
| 252 |
+
<p class="eyebrow">Build Small Hackathon</p>
|
| 253 |
+
<h1>Split-Brain Co-Pilot</h1>
|
| 254 |
+
<p>Draft locally in Chrome with a 1.5B WebGPU model. Verify in the background with a 14B Modal brain.</p>
|
| 255 |
+
</section>
|
| 256 |
+
<div class="space-init" id="space-init">Space initializing...</div>
|
| 257 |
+
<script>
|
| 258 |
+
requestAnimationFrame(() => {
|
| 259 |
+
const el = document.getElementById("space-init");
|
| 260 |
+
if (el) el.hidden = true;
|
| 261 |
+
});
|
| 262 |
+
</script>
|
| 263 |
+
"""
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
with gr.Row(equal_height=False):
|
| 267 |
+
with gr.Column(scale=2, min_width=320):
|
| 268 |
+
prompt_input = gr.Textbox(
|
| 269 |
+
label="Prompt",
|
| 270 |
+
placeholder="Write a Python function that finds all prime numbers up to n using a segmented sieve, handling edge cases.",
|
| 271 |
+
lines=6,
|
| 272 |
+
)
|
| 273 |
+
language_select = gr.Dropdown(choices=LANGUAGES, value="Python", label="Language")
|
| 274 |
+
generate_btn = gr.Button("Generate -> Verify", variant="primary")
|
| 275 |
+
with gr.Column(scale=3, min_width=420):
|
| 276 |
+
gr.HTML(custom_html)
|
| 277 |
+
draft_hidden = gr.Textbox(
|
| 278 |
+
label="draft bridge",
|
| 279 |
+
elem_id="draft-output-hidden",
|
| 280 |
+
elem_classes=["bridge-hidden"],
|
| 281 |
+
)
|
| 282 |
+
verify_trigger = gr.Button(
|
| 283 |
+
"verify",
|
| 284 |
+
elem_id="trigger-verify-btn",
|
| 285 |
+
elem_classes=["bridge-hidden"],
|
| 286 |
+
)
|
| 287 |
+
verdict_output = gr.Textbox(
|
| 288 |
+
label="verdict",
|
| 289 |
+
elem_classes=["bridge-hidden"],
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
with gr.Row():
|
| 293 |
+
sandbox_btn = gr.Button("Run Python Sandbox", variant="secondary")
|
| 294 |
+
sandbox_output = gr.Code(label="Sandbox Execution Output", language="shell")
|
| 295 |
+
|
| 296 |
+
generate_btn.click(
|
| 297 |
+
fn=None,
|
| 298 |
+
inputs=[prompt_input, language_select],
|
| 299 |
+
outputs=[],
|
| 300 |
+
js="(prompt, lang) => window.runLocalGeneration(prompt, lang)",
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
async def run_verification(prompt: str, draft_code: str, language: str) -> str:
|
| 304 |
+
return await verify_with_modal(prompt, draft_code, language)
|
| 305 |
+
|
| 306 |
+
verify_trigger.click(
|
| 307 |
+
fn=run_verification,
|
| 308 |
+
inputs=[prompt_input, draft_hidden, language_select],
|
| 309 |
+
outputs=[verdict_output],
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
verdict_output.change(
|
| 313 |
+
fn=None,
|
| 314 |
+
inputs=[verdict_output],
|
| 315 |
+
outputs=[],
|
| 316 |
+
js="(verdict) => window.applyVerification(verdict)",
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
sandbox_btn.click(
|
| 320 |
+
fn=run_sandbox,
|
| 321 |
+
inputs=[language_select, draft_hidden, verdict_output],
|
| 322 |
+
outputs=[sandbox_output],
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
if __name__ == "__main__":
|
| 327 |
+
demo.launch(
|
| 328 |
+
server_name=os.environ.get("GRADIO_SERVER_NAME", "127.0.0.1"),
|
| 329 |
+
server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
|
| 330 |
+
show_api=False,
|
| 331 |
+
)
|
modal_backend/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Modal backend package for Split-Brain Co-Pilot."""
|
modal_backend/sandbox.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import modal
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
app = modal.App("split-brain-sandbox")
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@app.function(timeout=30)
|
| 8 |
+
def execute_python(code: str) -> dict:
|
| 9 |
+
"""Run code in a Modal sandbox and return stdout/stderr."""
|
| 10 |
+
sandbox = modal.Sandbox.create(
|
| 11 |
+
"python3",
|
| 12 |
+
"-c",
|
| 13 |
+
code,
|
| 14 |
+
image=modal.Image.debian_slim().pip_install("numpy"),
|
| 15 |
+
timeout=10,
|
| 16 |
+
cpu=0.5,
|
| 17 |
+
)
|
| 18 |
+
sandbox.wait()
|
| 19 |
+
return {
|
| 20 |
+
"stdout": sandbox.stdout.read(),
|
| 21 |
+
"stderr": sandbox.stderr.read(),
|
| 22 |
+
"returncode": sandbox.returncode,
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@app.function(image=modal.Image.debian_slim().pip_install("fastapi", "pydantic"))
|
| 27 |
+
@modal.asgi_app()
|
| 28 |
+
def sandbox_endpoint():
|
| 29 |
+
from fastapi import FastAPI
|
| 30 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 31 |
+
from pydantic import BaseModel
|
| 32 |
+
|
| 33 |
+
web_app = FastAPI()
|
| 34 |
+
web_app.add_middleware(
|
| 35 |
+
CORSMiddleware,
|
| 36 |
+
allow_origins=["*"],
|
| 37 |
+
allow_methods=["*"],
|
| 38 |
+
allow_headers=["*"],
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
class ExecuteRequest(BaseModel):
|
| 42 |
+
code: str
|
| 43 |
+
|
| 44 |
+
@web_app.post("/execute")
|
| 45 |
+
async def execute(req: ExecuteRequest):
|
| 46 |
+
return execute_python.remote(req.code)
|
| 47 |
+
|
| 48 |
+
@web_app.get("/health")
|
| 49 |
+
async def health():
|
| 50 |
+
return {"ok": True}
|
| 51 |
+
|
| 52 |
+
return web_app
|
modal_backend/verifier.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
import modal
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
app = modal.App("split-brain-verifier")
|
| 7 |
+
|
| 8 |
+
model_volume = modal.Volume.from_name("qwen-14b-volume", create_if_missing=True)
|
| 9 |
+
|
| 10 |
+
MODEL_DIR = "/models"
|
| 11 |
+
MODEL_FILENAME = "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf"
|
| 12 |
+
MODEL_REPO = "bartowski/Qwen2.5-Coder-14B-Instruct-GGUF"
|
| 13 |
+
|
| 14 |
+
download_image = modal.Image.debian_slim(python_version="3.11").pip_install(
|
| 15 |
+
"huggingface-hub"
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@app.function(
|
| 20 |
+
image=download_image,
|
| 21 |
+
volumes={MODEL_DIR: model_volume},
|
| 22 |
+
timeout=3600,
|
| 23 |
+
secrets=[modal.Secret.from_name("huggingface-secret")],
|
| 24 |
+
)
|
| 25 |
+
def download_model():
|
| 26 |
+
from huggingface_hub import hf_hub_download
|
| 27 |
+
|
| 28 |
+
hf_hub_download(
|
| 29 |
+
repo_id=MODEL_REPO,
|
| 30 |
+
filename=MODEL_FILENAME,
|
| 31 |
+
local_dir=MODEL_DIR,
|
| 32 |
+
)
|
| 33 |
+
model_volume.commit()
|
| 34 |
+
print(f"Downloaded to {MODEL_DIR}/{MODEL_FILENAME}")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
llama_image = (
|
| 38 |
+
modal.Image.debian_slim(python_version="3.11")
|
| 39 |
+
.apt_install("build-essential", "cmake", "git", "libgomp1")
|
| 40 |
+
.run_commands(
|
| 41 |
+
"git clone https://github.com/ggerganov/llama.cpp /llama.cpp",
|
| 42 |
+
"cd /llama.cpp && cmake -B build -DLLAMA_CURL=OFF && cmake --build build --config Release -j$(nproc)",
|
| 43 |
+
"cd /llama.cpp && pip install -e .",
|
| 44 |
+
)
|
| 45 |
+
.pip_install("llama-cpp-python==0.3.4", "fastapi", "uvicorn", "pydantic")
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@app.cls(
|
| 50 |
+
image=llama_image,
|
| 51 |
+
gpu="A10G",
|
| 52 |
+
volumes={MODEL_DIR: model_volume},
|
| 53 |
+
scaledown_window=300,
|
| 54 |
+
)
|
| 55 |
+
@modal.concurrent(max_inputs=10)
|
| 56 |
+
class Verifier:
|
| 57 |
+
@modal.enter()
|
| 58 |
+
def load_model(self):
|
| 59 |
+
from llama_cpp import Llama
|
| 60 |
+
|
| 61 |
+
self.llm = Llama(
|
| 62 |
+
model_path=f"{MODEL_DIR}/{MODEL_FILENAME}",
|
| 63 |
+
n_gpu_layers=-1,
|
| 64 |
+
n_ctx=8192,
|
| 65 |
+
n_batch=512,
|
| 66 |
+
verbose=False,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
@modal.method()
|
| 70 |
+
def verify(self, prompt: str, draft_code: str, language: str = "python") -> dict:
|
| 71 |
+
system = f"""You are a code verifier. A smaller model drafted the following {language} code.
|
| 72 |
+
Your job:
|
| 73 |
+
1. Check for bugs, logic errors, type errors, off-by-one errors, and security issues.
|
| 74 |
+
2. If the code is correct, respond with exactly: {{"verdict": "PASS"}}
|
| 75 |
+
3. If fixable, respond with: {{"verdict": "FIX", "corrected_code": "<fixed code here>", "reason": "<one line>"}}
|
| 76 |
+
4. If fundamentally wrong, respond with: {{"verdict": "REWRITE", "corrected_code": "<rewritten code>", "reason": "<one line>"}}
|
| 77 |
+
Respond ONLY with valid JSON. No markdown, no explanation outside the JSON."""
|
| 78 |
+
|
| 79 |
+
user = f"Original prompt:\n{prompt}\n\nDrafted code:\n```{language}\n{draft_code}\n```"
|
| 80 |
+
|
| 81 |
+
response = self.llm.create_chat_completion(
|
| 82 |
+
messages=[
|
| 83 |
+
{"role": "system", "content": system},
|
| 84 |
+
{"role": "user", "content": user},
|
| 85 |
+
],
|
| 86 |
+
max_tokens=2048,
|
| 87 |
+
temperature=0.1,
|
| 88 |
+
)
|
| 89 |
+
raw = response["choices"][0]["message"]["content"].strip()
|
| 90 |
+
try:
|
| 91 |
+
parsed = json.loads(raw)
|
| 92 |
+
except json.JSONDecodeError:
|
| 93 |
+
return {"verdict": "PASS", "reason": "Verifier response could not be parsed."}
|
| 94 |
+
|
| 95 |
+
if parsed.get("verdict") not in {"PASS", "FIX", "REWRITE"}:
|
| 96 |
+
return {"verdict": "PASS", "reason": "Verifier returned an unknown verdict."}
|
| 97 |
+
return parsed
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
web_image = llama_image
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
@app.function(
|
| 104 |
+
image=web_image,
|
| 105 |
+
gpu="A10G",
|
| 106 |
+
volumes={MODEL_DIR: model_volume},
|
| 107 |
+
scaledown_window=300,
|
| 108 |
+
)
|
| 109 |
+
@modal.asgi_app()
|
| 110 |
+
def verifier_endpoint():
|
| 111 |
+
from fastapi import FastAPI
|
| 112 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 113 |
+
from pydantic import BaseModel
|
| 114 |
+
|
| 115 |
+
web_app = FastAPI()
|
| 116 |
+
web_app.add_middleware(
|
| 117 |
+
CORSMiddleware,
|
| 118 |
+
allow_origins=["*"],
|
| 119 |
+
allow_methods=["*"],
|
| 120 |
+
allow_headers=["*"],
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
class VerifyRequest(BaseModel):
|
| 124 |
+
prompt: str
|
| 125 |
+
draft_code: str
|
| 126 |
+
language: str = "python"
|
| 127 |
+
|
| 128 |
+
@web_app.post("/verify")
|
| 129 |
+
async def verify(req: VerifyRequest):
|
| 130 |
+
return Verifier().verify.remote(req.prompt, req.draft_code, req.language)
|
| 131 |
+
|
| 132 |
+
@web_app.get("/health")
|
| 133 |
+
async def health():
|
| 134 |
+
return {"ok": True}
|
| 135 |
+
|
| 136 |
+
return web_app
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
@app.function(schedule=modal.Cron("*/5 * * * *"))
|
| 140 |
+
def keep_warm():
|
| 141 |
+
Verifier().verify.remote("test", "print('hello')", "python")
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.30.0
|
| 2 |
+
httpx==0.27.0
|
| 3 |
+
modal==1.4.3
|
| 4 |
+
cbor2==5.6.5
|
| 5 |
+
huggingface-hub==0.28.1
|
static/engine.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { pipeline, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.0/dist/transformers.min.js";
|
| 2 |
+
|
| 3 |
+
const MODEL_ID = "onnx-community/Qwen2.5-Coder-1.5B-Instruct";
|
| 4 |
+
|
| 5 |
+
let generator = null;
|
| 6 |
+
let isLoaded = false;
|
| 7 |
+
|
| 8 |
+
export async function loadModel(onProgress) {
|
| 9 |
+
if (isLoaded) return;
|
| 10 |
+
|
| 11 |
+
generator = await pipeline("text-generation", MODEL_ID, {
|
| 12 |
+
dtype: "q4",
|
| 13 |
+
device: "webgpu",
|
| 14 |
+
progress_callback: onProgress,
|
| 15 |
+
});
|
| 16 |
+
isLoaded = true;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
export async function generateCode(prompt, language, onToken, onComplete) {
|
| 20 |
+
if (!generator) throw new Error("Model not loaded");
|
| 21 |
+
|
| 22 |
+
const messages = [
|
| 23 |
+
{
|
| 24 |
+
role: "system",
|
| 25 |
+
content: `You are an expert ${language} programmer. Write clean, correct, production-ready code. Output only code, with no markdown or explanation.`,
|
| 26 |
+
},
|
| 27 |
+
{ role: "user", content: prompt },
|
| 28 |
+
];
|
| 29 |
+
|
| 30 |
+
const streamer = new TextStreamer(generator.tokenizer, {
|
| 31 |
+
skip_prompt: true,
|
| 32 |
+
callback_function: onToken,
|
| 33 |
+
});
|
| 34 |
+
|
| 35 |
+
const result = await generator(messages, {
|
| 36 |
+
max_new_tokens: 1024,
|
| 37 |
+
temperature: 0.2,
|
| 38 |
+
do_sample: true,
|
| 39 |
+
streamer,
|
| 40 |
+
});
|
| 41 |
+
|
| 42 |
+
const generated = result?.[0]?.generated_text;
|
| 43 |
+
const fullCode = Array.isArray(generated)
|
| 44 |
+
? generated.at(-1).content
|
| 45 |
+
: String(generated || "");
|
| 46 |
+
onComplete(fullCode);
|
| 47 |
+
return fullCode;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
export function isWebGPUSupported() {
|
| 51 |
+
return Boolean(navigator.gpu);
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
Object.assign(window, {
|
| 55 |
+
loadModel,
|
| 56 |
+
generateCode,
|
| 57 |
+
isWebGPUSupported,
|
| 58 |
+
});
|
static/style.css
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
--bg: #0d1117;
|
| 3 |
+
--surface: #161b22;
|
| 4 |
+
--surface-2: #0f1720;
|
| 5 |
+
--border: #30363d;
|
| 6 |
+
--accent: #58a6ff;
|
| 7 |
+
--accent-warn: #f0883e;
|
| 8 |
+
--text: #e6edf3;
|
| 9 |
+
--text-muted: #8b949e;
|
| 10 |
+
--green: #3fb950;
|
| 11 |
+
--red: #f85149;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
body,
|
| 15 |
+
.gradio-container {
|
| 16 |
+
background: var(--bg) !important;
|
| 17 |
+
color: var(--text) !important;
|
| 18 |
+
font-family: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
footer {
|
| 22 |
+
display: none !important;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.bridge-hidden {
|
| 26 |
+
display: none !important;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
.app-header {
|
| 30 |
+
margin: 0 auto 20px;
|
| 31 |
+
max-width: 980px;
|
| 32 |
+
text-align: center;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.app-header h1 {
|
| 36 |
+
margin: 4px 0 8px;
|
| 37 |
+
color: var(--text);
|
| 38 |
+
font-size: clamp(32px, 6vw, 56px);
|
| 39 |
+
letter-spacing: 0;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.app-header p {
|
| 43 |
+
margin: 0;
|
| 44 |
+
color: var(--text-muted);
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
.app-header .eyebrow {
|
| 48 |
+
color: var(--accent);
|
| 49 |
+
font-size: 12px;
|
| 50 |
+
font-weight: 700;
|
| 51 |
+
text-transform: uppercase;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
.space-init {
|
| 55 |
+
margin: 0 auto 12px;
|
| 56 |
+
max-width: 980px;
|
| 57 |
+
color: var(--text-muted);
|
| 58 |
+
text-align: center;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
#split-brain-root {
|
| 62 |
+
color: var(--text);
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.split-topline,
|
| 66 |
+
.status-bar {
|
| 67 |
+
display: flex;
|
| 68 |
+
gap: 12px;
|
| 69 |
+
justify-content: space-between;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.split-topline {
|
| 73 |
+
margin-bottom: 10px;
|
| 74 |
+
color: var(--text-muted);
|
| 75 |
+
font-size: 12px;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
.webgpu-notice {
|
| 79 |
+
margin-bottom: 12px;
|
| 80 |
+
border: 1px solid var(--red);
|
| 81 |
+
border-radius: 6px;
|
| 82 |
+
padding: 10px 12px;
|
| 83 |
+
color: var(--red);
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.load-section {
|
| 87 |
+
display: grid;
|
| 88 |
+
grid-template-columns: auto 1fr;
|
| 89 |
+
gap: 10px 12px;
|
| 90 |
+
align-items: center;
|
| 91 |
+
margin-bottom: 12px;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.load-section.loaded {
|
| 95 |
+
opacity: 0.72;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.local-button {
|
| 99 |
+
border: 1px solid var(--accent);
|
| 100 |
+
border-radius: 6px;
|
| 101 |
+
background: var(--accent);
|
| 102 |
+
color: #07111f;
|
| 103 |
+
cursor: pointer;
|
| 104 |
+
font: inherit;
|
| 105 |
+
font-weight: 700;
|
| 106 |
+
padding: 10px 14px;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
.local-button:disabled {
|
| 110 |
+
cursor: not-allowed;
|
| 111 |
+
opacity: 0.55;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.loading-bar {
|
| 115 |
+
height: 8px;
|
| 116 |
+
overflow: hidden;
|
| 117 |
+
border-radius: 999px;
|
| 118 |
+
background: var(--border);
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.loading-bar-fill {
|
| 122 |
+
width: 0%;
|
| 123 |
+
height: 100%;
|
| 124 |
+
background: var(--accent);
|
| 125 |
+
transition: width 0.3s ease;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.load-status {
|
| 129 |
+
grid-column: 1 / -1;
|
| 130 |
+
color: var(--text-muted);
|
| 131 |
+
font-size: 12px;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.code-stream {
|
| 135 |
+
box-sizing: border-box;
|
| 136 |
+
min-height: 390px;
|
| 137 |
+
max-height: 58vh;
|
| 138 |
+
margin: 0;
|
| 139 |
+
overflow: auto;
|
| 140 |
+
border: 1px solid var(--border);
|
| 141 |
+
border-radius: 8px 8px 0 0;
|
| 142 |
+
background: var(--surface);
|
| 143 |
+
color: var(--text);
|
| 144 |
+
font-family: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
| 145 |
+
font-size: 13px;
|
| 146 |
+
line-height: 1.6;
|
| 147 |
+
padding: 16px;
|
| 148 |
+
white-space: pre-wrap;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
.rollback-flash {
|
| 152 |
+
border-color: var(--red) !important;
|
| 153 |
+
animation: flash 0.45s ease;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
@keyframes flash {
|
| 157 |
+
0% {
|
| 158 |
+
background: rgba(248, 81, 73, 0.24);
|
| 159 |
+
}
|
| 160 |
+
100% {
|
| 161 |
+
background: var(--surface);
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
.status-bar {
|
| 166 |
+
align-items: center;
|
| 167 |
+
border: 1px solid var(--border);
|
| 168 |
+
border-top: 0;
|
| 169 |
+
border-radius: 0 0 8px 8px;
|
| 170 |
+
background: var(--surface-2);
|
| 171 |
+
color: var(--text-muted);
|
| 172 |
+
font-size: 12px;
|
| 173 |
+
padding: 10px 12px;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
.status-warning,
|
| 177 |
+
.verdict-fix,
|
| 178 |
+
.verdict-rewrite {
|
| 179 |
+
color: var(--accent-warn);
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
.status-success,
|
| 183 |
+
.verdict-pass {
|
| 184 |
+
color: var(--green);
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
.status-neutral,
|
| 188 |
+
.verdict-idle {
|
| 189 |
+
color: var(--text-muted);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
textarea,
|
| 193 |
+
select,
|
| 194 |
+
button {
|
| 195 |
+
font-family: inherit !important;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
@media (max-width: 720px) {
|
| 199 |
+
.load-section {
|
| 200 |
+
grid-template-columns: 1fr;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
.split-topline,
|
| 204 |
+
.status-bar {
|
| 205 |
+
align-items: flex-start;
|
| 206 |
+
flex-direction: column;
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
.code-stream {
|
| 210 |
+
min-height: 320px;
|
| 211 |
+
}
|
| 212 |
+
}
|
static/ui.js
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
let currentTokens = [];
|
| 2 |
+
let streamBuffer = "";
|
| 3 |
+
|
| 4 |
+
export function appendToken(token) {
|
| 5 |
+
streamBuffer += token;
|
| 6 |
+
currentTokens.push(token);
|
| 7 |
+
|
| 8 |
+
const display = document.getElementById("stream-display");
|
| 9 |
+
if (display) {
|
| 10 |
+
display.textContent = streamBuffer;
|
| 11 |
+
display.scrollTop = display.scrollHeight;
|
| 12 |
+
}
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
export function setStatus(text, type = "neutral") {
|
| 16 |
+
const el = document.getElementById("status-text");
|
| 17 |
+
if (!el) return;
|
| 18 |
+
|
| 19 |
+
el.textContent = text;
|
| 20 |
+
el.className = `status-${type}`;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
export function setVerifierStatus(verdict) {
|
| 24 |
+
const el = document.getElementById("verifier-status");
|
| 25 |
+
if (!el) return;
|
| 26 |
+
|
| 27 |
+
const labels = {
|
| 28 |
+
IDLE: "Verifier idle",
|
| 29 |
+
PASS: "Verified",
|
| 30 |
+
FIX: "Fixed",
|
| 31 |
+
REWRITE: "Rewritten",
|
| 32 |
+
CHECKING: "Verifying...",
|
| 33 |
+
};
|
| 34 |
+
el.textContent = labels[verdict] || "";
|
| 35 |
+
el.className = `verdict-${String(verdict || "idle").toLowerCase()}`;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
export async function rollbackAndReplace(correctedCode, reason, verdict = "FIX") {
|
| 39 |
+
const display = document.getElementById("stream-display");
|
| 40 |
+
if (!display) return;
|
| 41 |
+
|
| 42 |
+
display.classList.add("rollback-flash");
|
| 43 |
+
setVerifierStatus(verdict);
|
| 44 |
+
setStatus(`Verifier corrected: ${reason}`, "warning");
|
| 45 |
+
|
| 46 |
+
await sleep(450);
|
| 47 |
+
display.classList.remove("rollback-flash");
|
| 48 |
+
display.textContent = "";
|
| 49 |
+
streamBuffer = correctedCode;
|
| 50 |
+
currentTokens = [];
|
| 51 |
+
|
| 52 |
+
for (let i = 0; i < correctedCode.length; i += 1) {
|
| 53 |
+
display.textContent += correctedCode[i];
|
| 54 |
+
if (i % 5 === 0) await sleep(8);
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
setVerifierStatus("PASS");
|
| 58 |
+
setStatus("Corrected block verified", "success");
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
export function getCurrentCode() {
|
| 62 |
+
return streamBuffer;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
export function reset() {
|
| 66 |
+
streamBuffer = "";
|
| 67 |
+
currentTokens = [];
|
| 68 |
+
|
| 69 |
+
const display = document.getElementById("stream-display");
|
| 70 |
+
if (display) display.textContent = "";
|
| 71 |
+
|
| 72 |
+
const tokenCount = document.getElementById("token-count");
|
| 73 |
+
if (tokenCount) tokenCount.textContent = "0 tok/s";
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
function sleep(ms) {
|
| 77 |
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
Object.assign(window, {
|
| 81 |
+
appendToken,
|
| 82 |
+
setStatus,
|
| 83 |
+
setVerifierStatus,
|
| 84 |
+
rollbackAndReplace,
|
| 85 |
+
getCurrentCode,
|
| 86 |
+
reset,
|
| 87 |
+
});
|