Spaces:

build-small-hackathon
/

split-brain-copilot

Running

+# Split-Brain Speculative Co-Pilot
+### Build Small Hackathon — Complete Build Instructions
+> **Concept:** A 1B model runs entirely in the user's browser via WebGPU + transformers.js, streaming code instantly. A 14B model on Modal verifies the draft in the background. When the verifier catches a bug, the UI rolls back the local generation and replaces it with the corrected cloud block — live, visually.
+>
+> **Models:** `Qwen2.5-Coder-1.5B` (browser, WebGPU) + `Qwen2.5-Coder-14B-Instruct` (Modal, GGUF via llama.cpp) — combined 15.5B, well under the 32B cap.
+>
+> **Bonus badges targeted:** Off the Grid · Llama Champion · Off-Brand · Field Notes
+---
+## 0. Prerequisites
+- Python 3.11+
+- Node.js 18+ (for local frontend testing)
+- Modal account with `modal` CLI installed and authenticated (`modal token new`)
+- Hugging Face account, joined the `build-small-hackathon` org, HF token with write access
+- `huggingface-cli` installed and logged in (`huggingface-cli login`)
+- Chrome 113+ (WebGPU required — Firefox and Safari are out, document this clearly)
+- Git
+---
+## 1. Repository Structure
+Set up the project layout before writing any code.
+```
+split-brain-copilot/
+├── app.py                  # Gradio app entry point (HF Space root)
+├── modal_backend/
+│   ├── __init__.py
+│   ├── verifier.py         # Modal app: 14B inference endpoint
+│   └── sandbox.py          # Modal app: code execution sandbox
+├── static/
+│   ├── engine.js           # transformers.js WebGPU inference engine
+│   ├── ui.js               # Stream rendering, rollback animation, diff logic
+│   └── style.css           # Custom UI (required for Off-Brand badge)
+├── requirements.txt
+└── README.md               # HF Space card + demo video embed
+```
+Initialize git and create a HF Space repo (check whether we have huggingface cli installed and token set or logged in):
+```bash
+git init
+huggingface-cli repo create split-brain-copilot --type space --space-sdk gradio
+git remote add origin https://huggingface.co/spaces/YOUR_HF_USERNAME/split-brain-copilot
+```
+---
+## 2. Modal Backend — 14B Verifier Endpoint
+### 2.1 Download the GGUF model to a Modal Volume
+The 14B model is too large to bake into the image. Use a Modal Volume for persistent storage.
+```python
+# modal_backend/verifier.py
+import modal
+app = modal.App("split-brain-verifier")
+# Persistent volume — survives cold starts
+model_volume = modal.Volume.from_name("qwen-14b-volume", create_if_missing=True)
+MODEL_DIR = "/models"
+MODEL_FILENAME = "qwen2.5-coder-14b-instruct-q4_k_m.gguf"
+# Source: bartowski/Qwen2.5-Coder-14B-Instruct-GGUF on HuggingFace
+MODEL_REPO = "bartowski/Qwen2.5-Coder-14B-Instruct-GGUF"
+```
+Create a one-time download function:
+```python
+@app.function(
+    volumes={MODEL_DIR: model_volume},
+    timeout=3600,
+    secrets=[modal.Secret.from_name("huggingface-secret")],
+)
+def download_model():
+    from huggingface_hub import hf_hub_download
+    import os
+    hf_hub_download(
+        repo_id=MODEL_REPO,
+        filename=MODEL_FILENAME,
+        local_dir=MODEL_DIR,
+    )
+    model_volume.commit()
+    print(f"Downloaded to {MODEL_DIR}/{MODEL_FILENAME}")
+```
+Run this once: `modal run modal_backend/verifier.py::download_model`
+### 2.2 Build the llama.cpp image
+```python
+llama_image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .apt_install("build-essential", "cmake", "git", "libgomp1")
+    .run_commands(
+        "git clone https://github.com/ggerganov/llama.cpp /llama.cpp",
+        "cd /llama.cpp && cmake -B build -DLLAMA_CURL=OFF && cmake --build build --config Release -j$(nproc)",
+        "cd /llama.cpp && pip install -e .",
+    )
+    .pip_install("llama-cpp-python==0.3.4", "fastapi", "uvicorn")
+)
+```
+### 2.3 Verifier inference function
+The verifier receives the speculated code draft and the original prompt, and returns a verdict: `PASS`, `FIX`, or `REWRITE` with corrected code.
+```python
+@app.cls(
+    image=llama_image,
+    gpu=modal.gpu.A10G(),
+    volumes={MODEL_DIR: model_volume},
+    container_idle_timeout=300,
+    allow_concurrent_inputs=10,
+)
+class Verifier:
+    @modal.enter()
+    def load_model(self):
+        from llama_cpp import Llama
+        self.llm = Llama(
+            model_path=f"{MODEL_DIR}/{MODEL_FILENAME}",
+            n_gpu_layers=-1,      # all layers on GPU
+            n_ctx=8192,
+            n_batch=512,
+            verbose=False,
+        )
+    @modal.method()
+    def verify(self, prompt: str, draft_code: str, language: str = "python") -> dict:
+        system = f"""You are a code verifier. A smaller model drafted the following {language} code.
+Your job:
+1. Check for bugs, logic errors, type errors, off-by-one errors, and security issues.
+2. If the code is correct, respond with exactly: {{"verdict": "PASS"}}
+3. If fixable, respond with: {{"verdict": "FIX", "corrected_code": "<fixed code here>", "reason": "<one line>"}}
+4. If fundamentally wrong, respond with: {{"verdict": "REWRITE", "corrected_code": "<rewritten code>", "reason": "<one line>"}}
+Respond ONLY with valid JSON. No markdown, no explanation outside the JSON."""
+        user = f"Original prompt:\n{prompt}\n\nDrafted code:\n```{language}\n{draft_code}\n```"
+        response = self.llm.create_chat_completion(
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            max_tokens=2048,
+            temperature=0.1,
+        )
+        import json
+        raw = response["choices"][0]["message"]["content"].strip()
+        try:
+            return json.loads(raw)
+        except json.JSONDecodeError:
+            # Fallback: treat as PASS if we can't parse
+            return {"verdict": "PASS"}
+```
+### 2.4 Modal Sandbox — code execution (optional but impressive)
+Sandboxed execution confirms the corrected code actually runs. This is what earns you extra credibility in the demo.
+```python
+# modal_backend/sandbox.py
+import modal
+app = modal.App("split-brain-sandbox")
+@app.function(timeout=30)
+def execute_python(code: str) -> dict:
+    """Run untrusted code in a Modal sandbox and return stdout/stderr."""
+    sandbox = modal.Sandbox.create(
+        "python3", "-c", code,
+        image=modal.Image.debian_slim().pip_install("numpy"),
+        timeout=10,
+        cpu=0.5,
+    )
+    sandbox.wait()
+    return {
+        "stdout": sandbox.stdout.read(),
+        "stderr": sandbox.stderr.read(),
+        "returncode": sandbox.returncode,
+    }
+```
+### 2.5 Deploy the Modal backend
+```bash
+modal deploy modal_backend/verifier.py
+modal deploy modal_backend/sandbox.py
+```
+Note the endpoint URLs printed after deploy. You'll need them in `app.py`.
+Store your Modal token and HF token as Modal secrets:
+```bash
+modal secret create huggingface-secret HF_TOKEN=hf_xxx
+```
+---
+## 3. Browser Engine — transformers.js + WebGPU
+### 3.1 Model choice for the browser
+Use `Qwen2.5-Coder-1.5B-Instruct` in ONNX/WebGPU format. Xenova and onnx-community maintain these on HF Hub. Target:
+`onnx-community/Qwen2.5-Coder-1.5B-Instruct` with `dtype: "q4"` for fast WebGPU loading (~800MB, fits comfortably in browser VRAM on a modern GPU).
+### 3.2 engine.js — WebGPU inference
+```javascript
+// static/engine.js
+import { pipeline, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.0/dist/transformers.min.js";
+const MODEL_ID = "onnx-community/Qwen2.5-Coder-1.5B-Instruct";
+let generator = null;
+let isLoaded = false;
+export async function loadModel(onProgress) {
+    if (isLoaded) return;
+    generator = await pipeline("text-generation", MODEL_ID, {
+        dtype: "q4",
+        device: "webgpu",
+        progress_callback: onProgress,
+    });
+    isLoaded = true;
+}
+export async function generateCode(prompt, language, onToken, onComplete) {
+    if (!generator) throw new Error("Model not loaded");
+    const messages = [
+        {
+            role: "system",
+            content: `You are an expert ${language} programmer. Write clean, correct, production-ready code. Output ONLY the code block, no explanation.`
+        },
+        { role: "user", content: prompt }
+    ];
+    const streamer = new TextStreamer(generator.tokenizer, {
+        skip_prompt: true,
+        callback_function: (token) => {
+            onToken(token);
+        },
+    });
+    const result = await generator(messages, {
+        max_new_tokens: 1024,
+        temperature: 0.2,
+        do_sample: true,
+        streamer,
+    });
+    const fullCode = result[0].generated_text.at(-1).content;
+    onComplete(fullCode);
+    return fullCode;
+}
+export function isWebGPUSupported() {
+    return !!navigator.gpu;
+}
+```
+### 3.3 ui.js — stream rendering + rollback animation
+```javascript
+// static/ui.js
+let currentTokens = [];
+let streamBuffer = "";
+export function initEditor(containerId) {
+    // Attach to the Gradio custom HTML component
+    const container = document.getElementById(containerId);
+    container.innerHTML = `
+        <div id="stream-display" class="code-stream"></div>
+        <div id="status-bar" class="status-bar">
+            <span id="status-text">Ready</span>
+            <span id="token-count">0 tok/s</span>
+            <span id="verifier-status"></span>
+        </div>
+    `;
+}
+export function appendToken(token) {
+    streamBuffer += token;
+    currentTokens.push(token);
+    const display = document.getElementById("stream-display");
+    if (display) display.textContent = streamBuffer;
+}
+export function setStatus(text, type = "neutral") {
+    const el = document.getElementById("status-text");
+    if (el) {
+        el.textContent = text;
+        el.className = `status-${type}`;
+    }
+}
+export function setVerifierStatus(verdict) {
+    const el = document.getElementById("verifier-status");
+    if (!el) return;
+    const icons = { PASS: "✅ Verified", FIX: "🔧 Fixed", REWRITE: "🔄 Rewritten", CHECKING: "🔍 Verifying..." };
+    el.textContent = icons[verdict] || "";
+}
+export async function rollbackAndReplace(correctedCode, reason) {
+    const display = document.getElementById("stream-display");
+    if (!display) return;
+    // Flash red to signal rollback
+    display.classList.add("rollback-flash");
+    setVerifierStatus("FIX");
+    setStatus(`Verifier corrected: ${reason}`, "warning");
+    await sleep(400);
+    display.classList.remove("rollback-flash");
+    // Type in corrected code character by character
+    display.textContent = "";
+    streamBuffer = correctedCode;
+    currentTokens = [];
+    for (let i = 0; i < correctedCode.length; i++) {
+        display.textContent += correctedCode[i];
+        if (i % 5 === 0) await sleep(8); // smooth typewriter
+    }
+    setVerifierStatus("PASS");
+    setStatus("Ready", "neutral");
+}
+export function getCurrentCode() {
+    return streamBuffer;
+}
+export function reset() {
+    streamBuffer = "";
+    currentTokens = [];
+    const display = document.getElementById("stream-display");
+    if (display) display.textContent = "";
+}
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
+```
+### 3.4 style.css — custom UI (Off-Brand badge)
+```css
+/* static/style.css */
+:root {
+    --bg: #0d1117;
+    --surface: #161b22;
+    --border: #30363d;
+    --accent: #58a6ff;
+    --accent-warn: #f0883e;
+    --text: #e6edf3;
+    --text-muted: #8b949e;
+    --green: #3fb950;
+    --red: #f85149;
+}
+body { background: var(--bg); color: var(--text); font-family: 'JetBrains Mono', monospace; }
+.code-stream {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 16px;
+    min-height: 300px;
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 13px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    overflow-y: auto;
+    transition: border-color 0.2s;
+}
+.rollback-flash {
+    border-color: var(--red) !important;
+    background: rgba(248, 81, 73, 0.08) !important;
+    animation: flash 0.4s ease;
+}
+@keyframes flash {
+    0%   { background: rgba(248, 81, 73, 0.25); }
+    100% { background: rgba(248, 81, 73, 0.08); }
+}
+.status-bar {
+    display: flex;
+    justify-content: space-between;
+    padding: 8px 12px;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-top: none;
+    border-radius: 0 0 8px 8px;
+    font-size: 12px;
+    color: var(--text-muted);
+}
+.status-warning { color: var(--accent-warn); }
+.status-success { color: var(--green); }
+.status-neutral { color: var(--text-muted); }
+/* Gradio overrides */
+.gradio-container { background: var(--bg) !important; }
+footer { display: none !important; }
+/* WebGPU loading bar */
+.loading-bar {
+    height: 3px;
+    background: var(--border);
+    border-radius: 2px;
+    overflow: hidden;
+    margin: 8px 0;
+}
+.loading-bar-fill {
+    height: 100%;
+    background: var(--accent);
+    transition: width 0.3s ease;
+}
+```
+---
+## 4. Gradio App — app.py
+This is the HF Space entry point. Gradio acts as the shell; the real UI lives in the custom HTML component injected via `gr.HTML`.
+```python
+# app.py
+import gradio as gr
+import httpx
+import json
+import os
+import asyncio
+from pathlib import Path
+MODAL_VERIFIER_URL = os.environ.get("MODAL_VERIFIER_URL")  # set as HF Space secret
+MODAL_SANDBOX_URL = os.environ.get("MODAL_SANDBOX_URL")    # set as HF Space secret
+LANGUAGES = ["Python", "JavaScript", "TypeScript", "Rust", "Go", "C++"]
+def load_static(filename):
+    return Path(f"static/{filename}").read_text()
+custom_html = f"""
+<!DOCTYPE html>
+<html>
+<head>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
+<style>{load_static('style.css')}</style>
+</head>
+<body>
+<div id="split-brain-root">
+    <div class="webgpu-notice" id="webgpu-warning" style="display:none; color:#f85149; padding:8px; border:1px solid #f85149; border-radius:6px; margin-bottom:12px;">
+        ⚠️ WebGPU not detected. Please use Chrome 113+ on desktop for local inference.
+    </div>
+    <div id="load-section">
+        <button id="load-btn" onclick="initEngine()">⚡ Load 1.5B Model (WebGPU)</button>
+        <div class="loading-bar"><div class="loading-bar-fill" id="load-progress" style="width:0%"></div></div>
+        <span id="load-status" style="font-size:12px; color:#8b949e;"></span>
+    </div>
+    <div id="stream-display" class="code-stream" style="margin-top:12px;">Waiting for model load...</div>
+    <div class="status-bar">
+        <span id="status-text">Idle</span>
+        <span id="token-count"></span>
+        <span id="verifier-status"></span>
+    </div>
+</div>
+<script type="module">
+{load_static('engine.js')}
+{load_static('ui.js')}
+// Check WebGPU on load
+if (!isWebGPUSupported()) {{
+    document.getElementById('webgpu-warning').style.display = 'block';
+    document.getElementById('load-btn').disabled = true;
+}}
+window.initEngine = async function() {{
+    document.getElementById('load-btn').disabled = true;
+    document.getElementById('load-status').textContent = 'Loading model weights...';
+    await loadModel((progress) => {{
+        if (progress.progress) {{
+            document.getElementById('load-progress').style.width = progress.progress + '%';
+            document.getElementById('load-status').textContent = `${{progress.file || 'Loading'}} — ${{Math.round(progress.progress)}}%`;
+        }}
+    }});
+    document.getElementById('load-status').textContent = '✅ Model ready — WebGPU active';
+    document.getElementById('load-section').style.opacity = '0.5';
+}};
+// Gradio will call this via the hidden trigger
+window.runLocalGeneration = async function(prompt, language) {{
+    reset();
+    setStatus('Generating locally (WebGPU)...', 'neutral');
+    let tokenCount = 0;
+    const startTime = Date.now();
+    const fullCode = await generateCode(prompt, language,
+        (token) => {{
+            appendToken(token);
+            tokenCount++;
+            const elapsed = (Date.now() - startTime) / 1000;
+            const tps = Math.round(tokenCount / elapsed);
+            document.getElementById('token-count').textContent = `${{tps}} tok/s`;
+        }},
+        (code) => {{
+            setStatus('Local generation complete. Verifying...', 'neutral');
+        }}
+    );
+    // Send to Gradio backend for verification
+    // Use the hidden Gradio state to trigger the verify function
+    document.getElementById('draft-output-hidden').value = fullCode;
+    document.getElementById('trigger-verify-btn').click();
+}};
+window.applyVerification = function(verdictJson) {{
+    const verdict = JSON.parse(verdictJson);
+    if (verdict.verdict === 'PASS') {{
+        setVerifierStatus('PASS');
+        setStatus('✅ Verified clean', 'success');
+    }} else {{
+        rollbackAndReplace(verdict.corrected_code, verdict.reason);
+    }}
+}};
+</script>
+</body>
+</html>
+"""
+async def verify_with_modal(prompt: str, draft_code: str, language: str) -> str:
+    """Call Modal verifier endpoint and return JSON string."""
+    if not MODAL_VERIFIER_URL:
+        return json.dumps({"verdict": "PASS"})
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        resp = await client.post(
+            MODAL_VERIFIER_URL,
+            json={"prompt": prompt, "draft_code": draft_code, "language": language},
+        )
+        resp.raise_for_status()
+        return resp.text
+async def execute_in_sandbox(code: str) -> dict:
+    """Call Modal sandbox and return execution result."""
+    if not MODAL_SANDBOX_URL:
+        return {"stdout": "", "stderr": "Sandbox not configured", "returncode": -1}
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        resp = await client.post(MODAL_SANDBOX_URL, json={"code": code})
+        return resp.json()
+with gr.Blocks(
+    title="Split-Brain Co-Pilot",
+    css="footer {display:none}",
+    theme=gr.themes.Base(
+        primary_hue="blue",
+        neutral_hue="slate",
+    ),
+) as demo:
+    gr.HTML("<h1 style='text-align:center; color:#58a6ff;'>⚡ Split-Brain Co-Pilot</h1>")
+    gr.HTML("<p style='text-align:center; color:#8b949e;'>1.5B model runs in your browser (WebGPU). 14B model on Modal verifies and corrects.</p>")
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt_input = gr.Textbox(
+                label="What do you want to build?",
+                placeholder="e.g. A function that parses a CSV and returns the top 5 rows by a given column",
+                lines=3,
+            )
+            language_select = gr.Dropdown(
+                choices=LANGUAGES,
+                value="Python",
+                label="Language",
+            )
+            generate_btn = gr.Button("⚡ Generate (WebGPU → Verify)", variant="primary")
+        with gr.Column(scale=3):
+            # Custom HTML component for streaming display
+            gr.HTML(custom_html)
+            # Hidden elements for JS ↔ Gradio bridge
+            draft_hidden = gr.Textbox(visible=False, elem_id="draft-output-hidden")
+            verify_trigger = gr.Button("verify", visible=False, elem_id="trigger-verify-btn")
+            verdict_output = gr.Textbox(visible=False, label="verdict")
+    with gr.Row():
+        sandbox_output = gr.Code(label="Sandbox Execution Output", language="python", visible=False)
+    # Gradio event: user clicks Generate → JS takes over for local inference
+    generate_btn.click(
+        fn=None,
+        inputs=[prompt_input, language_select],
+        outputs=[],
+        js="(prompt, lang) => { window.runLocalGeneration(prompt, lang); return []; }",
+    )
+    # Gradio event: JS triggers verify after local generation completes
+    async def run_verification(prompt, draft_code, language):
+        verdict_json = await verify_with_modal(prompt, draft_code, language)
+        return verdict_json
+    verify_trigger.click(
+        fn=run_verification,
+        inputs=[prompt_input, draft_hidden, language_select],
+        outputs=[verdict_output],
+    )
+    # Apply verdict back to JS
+    verdict_output.change(
+        fn=None,
+        inputs=[verdict_output],
+        outputs=[],
+        js="(verdict) => { window.applyVerification(verdict); return []; }",
+    )
+if __name__ == "__main__":
+    demo.launch()
+```
+---
+## 5. Modal Web Endpoint Wrapper
+The Modal functions need to be exposed as HTTP endpoints that `app.py` can call via httpx. Add this to `verifier.py`:
+```python
+from fastapi import FastAPI
+from pydantic import BaseModel
+web_app = FastAPI()
+class VerifyRequest(BaseModel):
+    prompt: str
+    draft_code: str
+    language: str = "python"
+@app.function(
+    image=llama_image,
+    gpu=modal.gpu.A10G(),
+    volumes={MODEL_DIR: model_volume},
+    container_idle_timeout=300,
+)
+@modal.asgi_app()
+def verifier_endpoint():
+    verifier = Verifier()
+    @web_app.post("/verify")
+    async def verify(req: VerifyRequest):
+        result = verifier.verify.remote(req.prompt, req.draft_code, req.language)
+        return result
+    return web_app
+```
+After deploying, Modal gives you a URL like `https://your-username--split-brain-verifier-verifier-endpoint.modal.run`. Set this as the HF Space secret `MODAL_VERIFIER_URL`.
+---
+## 6. HF Space Configuration
+### 6.1 README.md (Space card)
+```yaml
+---
+title: Split-Brain Co-Pilot
+emoji: ⚡
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.30.0
+app_file: app.py
+pinned: true
+license: apache-2.0
+tags:
+  - code-generation
+  - webgpu
+  - speculative-decoding
+  - llama.cpp
+  - local-first
+---
+```
+### 6.2 requirements.txt
+```
+gradio==5.30.0
+httpx==0.27.0
+modal==0.73.0
+huggingface-hub==0.23.0
+```
+### 6.3 HF Space secrets
+Set these in the Space Settings → Repository secrets:
+| Secret name | Value |
+|---|---|
+| `MODAL_VERIFIER_URL` | Your Modal verifier endpoint URL |
+| `MODAL_SANDBOX_URL` | Your Modal sandbox endpoint URL |
+| `MODAL_TOKEN_ID` | From `modal token show` |
+| `MODAL_TOKEN_SECRET` | From `modal token show` |
+---
+## 7. Cold Start Mitigation
+Modal A10G containers take 10–40 seconds to cold start. Handle this gracefully:
+In `verify_with_modal`, add a keep-warm ping. Add this to `verifier.py`:
+```python
+@app.function(schedule=modal.Cron("*/5 * * * *"))
+def keep_warm():
+    """Ping the verifier every 5 minutes to avoid cold starts during the demo window."""
+    Verifier().verify.remote("test", "print('hello')", "python")
+```
+Deploy this separately: `modal deploy modal_backend/verifier.py`
+In the UI, show "Verifier warming up..." in the status bar while the first request is in flight and display a spinner. Do not let the UI appear broken during cold start.
+---
+## 8. Demo Video Script
+The demo video is a submission requirement. Plan it around these beats:
+- Open Chrome, show the app. Explain the split-brain concept in one sentence.
+- Click "Load 1.5B Model" — show the WebGPU loading progress bar.
+- Type a non-trivial prompt: "Write a Python function that finds all prime numbers up to n using a segmented sieve, handling edge cases."
+- Hit Generate — show tokens streaming at 80–120 tok/s with the token counter live.
+- Show the "Verifying..." status kick in immediately after local generation completes.
+- If the verifier returns FIX or REWRITE: show the red flash rollback animation and the corrected code typing in.
+- Show the sandbox execution output (stdout) confirming the corrected code runs.
+- End on the split status bar: "Local: WebGPU · Cloud: Modal A10G · Verdict: ✅ Verified"
+Keep the video under 3 minutes. Record with OBS or Loom. No cuts during the generation — the live stream is the point.
+---
+## 9. Bonus Badge Checklist
+| Badge | How you earn it | Status |
+|---|---|---|
+| **Off the Grid** | 1.5B runs 100% in browser, no cloud API for inference | ✅ Automatic |
+| **Llama Champion** | 14B served via llama.cpp on Modal | ✅ Automatic |
+| **Off-Brand** | Custom dark theme, rollback animation, token counter, status bar | ✅ Build it |
+| **Field Notes** | Write a blog post on HF or Dev.to explaining the speculative split-brain architecture | ✅ Write it post-build |
+---
+## 10. Submission Checklist
+Before June 15 deadline:
+- [ ] Modal verifier deployed and endpoint URL confirmed working
+- [ ] HF Space live and publicly accessible under `build-small-hackathon` org
+- [ ] WebGPU model loads in Chrome without errors
+- [ ] Token streaming visible in UI
+- [ ] Rollback animation triggers on at least one FIX/REWRITE verdict
+- [ ] Sandbox execution output shown in demo
+- [ ] Demo video recorded and uploaded (YouTube unlisted or HF)
+- [ ] Social media post published (Twitter/X or LinkedIn) with Space link and demo video
+- [ ] README.md Space card complete with description, tags, and video embed
+- [ ] Field Notes blog post published and linked in README
+---
+## 11. Known Gotchas
+**WebGPU VRAM:** The 1.5B Q4 ONNX model needs ~1GB VRAM. On machines with integrated graphics sharing system RAM, this works but may be slow. Document the Chrome + dedicated GPU requirement.
+**CORS:** Modal's ASGI endpoints allow cross-origin by default, but if you hit CORS errors in the browser JS, add `fastapi.middleware.cors.CORSMiddleware` to the web_app with `allow_origins=["*"]`.
+**transformers.js version:** Pin to `3.5.x`. Breaking changes in 3.x are frequent. The CDN import in `engine.js` uses the pinned version — don't use `@latest`.
+**Gradio JS bridge:** The `gr.Button(visible=False)` trigger pattern is the cleanest way to fire a Python function from browser JS in Gradio 5.x without websocket hacks. Do not use `gr.Request` for this — it won't work from inside a custom HTML block.
+**Modal Volume first deploy:** The volume download must complete before the verifier function can load the model. Run `download_model` manually once and confirm with `modal volume ls qwen-14b-volume /models` before deploying the endpoint.
+**HF Space cold start:** HF Spaces themselves also cold start. If the Space hasn't been visited recently, Gradio takes 20–30 seconds to boot. Add a loading spinner at the Gradio level using `gr.HTML` with a brief "Space initializing..." message that auto-hides once the page is interactive.

README.md ADDED Viewed

	@@ -0,0 +1,92 @@

+---
+title: Split-Brain Co-Pilot
+emoji: ⚡
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.30.0
+app_file: app.py
+pinned: true
+license: apache-2.0
+tags:
+  - code-generation
+  - webgpu
+  - speculative-decoding
+  - llama.cpp
+  - local-first
+---
+# Split-Brain Co-Pilot
+A speculative coding assistant for the Build Small Hackathon: a 1.5B code model drafts locally in Chrome with WebGPU, while a 14B Qwen verifier on Modal checks the result in the background. When the verifier catches a problem, the UI flashes, rolls back, and types in the corrected cloud block.
+## Architecture
+- Local brain: `onnx-community/Qwen2.5-Coder-1.5B-Instruct` through transformers.js `3.5.x`, WebGPU, Q4 weights.
+- Cloud brain: `bartowski/Qwen2.5-Coder-14B-Instruct-GGUF` (`Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf`) served on Modal A10G through llama.cpp.
+- Shell: Gradio 5 Space with a custom HTML/CSS/JS streaming surface.
+- Optional proof step: Modal sandbox execution endpoint for generated Python code.
+## Requirements
+Use Chrome 113+ on desktop. Firefox and Safari do not currently support the WebGPU path this demo needs. The browser model needs roughly 1 GB of available GPU memory, so dedicated GPU machines will feel much better than older integrated graphics.
+## Local Run
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+python app.py
+```
+Without `MODAL_VERIFIER_URL`, the app uses a PASS fallback so the WebGPU UI can be tested locally.
+Copy `.env.example` to `.env` for local secrets. The `.env` file is ignored by git.
+## Modal Setup
+Install and authenticate the Modal CLI:
+```bash
+pip install modal
+modal token new
+modal secret create huggingface-secret HF_TOKEN=hf_xxx
+```
+Download the 14B GGUF model into the persistent volume once:
+```bash
+modal run modal_backend/verifier.py::download_model
+```
+Deploy the verifier and sandbox:
+```bash
+modal deploy modal_backend/verifier.py
+modal deploy modal_backend/sandbox.py
+```
+Set these Space secrets after deploy:
+| Secret | Value |
+| --- | --- |
+| `MODAL_VERIFIER_URL` | Modal verifier endpoint URL, with or without `/verify` |
+| `MODAL_SANDBOX_URL` | Modal sandbox endpoint URL, with or without `/execute` |
+| `MODAL_TOKEN_ID` | From `modal token show` |
+| `MODAL_TOKEN_SECRET` | From `modal token show` |
+This project uses `modal==1.4.3`; older `0.73.x` clients are now rejected by Modal as deprecated.
+## Demo Beat
+Prompt idea: "Write a Python function that finds all prime numbers up to n using a segmented sieve, handling edge cases."
+Show the model loading bar, token streaming, verifier status, rollback animation on a FIX/REWRITE verdict, and the final verified state.
+## Badge Targets
+- Off the Grid: local 1.5B browser inference.
+- Llama Champion: 14B llama.cpp verifier on Modal.
+- Off-Brand: custom UI, rollback flash, status bar, token counter.
+- Field Notes: publish a post-build architecture writeup and link it here.

app.py ADDED Viewed

	@@ -0,0 +1,331 @@

+import json
+import os
+from pathlib import Path
+import gradio as gr
+import httpx
+LANGUAGES = ["Python", "JavaScript", "TypeScript", "Rust", "Go", "C++"]
+def load_local_env() -> None:
+    env_path = Path(".env")
+    if not env_path.exists():
+        return
+    for line in env_path.read_text(encoding="utf-8").splitlines():
+        if not line or line.startswith("#") or "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        os.environ.setdefault(key.strip(), value.strip())
+load_local_env()
+MODAL_VERIFIER_URL = os.environ.get("MODAL_VERIFIER_URL")
+MODAL_SANDBOX_URL = os.environ.get("MODAL_SANDBOX_URL")
+os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
+def load_static(filename: str) -> str:
+    return Path("static", filename).read_text(encoding="utf-8")
+def endpoint_url(url: str | None, path: str) -> str | None:
+    if not url:
+        return None
+    clean = url.rstrip("/")
+    if clean.endswith(path):
+        return clean
+    return f"{clean}{path}"
+custom_html = f"""
+<div id="split-brain-root">
+    <div class="split-topline">
+        <span>Local: WebGPU 1.5B</span>
+        <span>Cloud: Modal A10G 14B</span>
+    </div>
+    <div class="webgpu-notice" id="webgpu-warning" hidden>
+        WebGPU not detected. Use Chrome 113+ on desktop for local inference.
+    </div>
+    <div id="load-section" class="load-section">
+        <button id="load-btn" class="local-button" onclick="window.initEngine()">Load 1.5B Model</button>
+        <div class="loading-bar"><div class="loading-bar-fill" id="load-progress"></div></div>
+        <span id="load-status" class="load-status">Model not loaded</span>
+    </div>
+    <pre id="stream-display" class="code-stream">Waiting for model load...</pre>
+    <div class="status-bar">
+        <span id="status-text">Idle</span>
+        <span id="token-count">0 tok/s</span>
+        <span id="verifier-status">Verifier idle</span>
+    </div>
+</div>
+<script type="module">
+{load_static("engine.js")}
+{load_static("ui.js")}
+const warning = document.getElementById("webgpu-warning");
+const loadButton = document.getElementById("load-btn");
+function findGradioInput(id) {{
+    const root = document.getElementById(id);
+    if (!root) return null;
+    if (root.matches("input, textarea")) return root;
+    return root.querySelector("input, textarea");
+}}
+function findGradioButton(id) {{
+    const root = document.getElementById(id);
+    if (!root) return null;
+    if (root.matches("button")) return root;
+    return root.querySelector("button");
+}}
+if (!isWebGPUSupported()) {{
+    warning.hidden = false;
+    loadButton.disabled = true;
+    setStatus("Chrome 113+ with WebGPU required", "warning");
+}}
+window.initEngine = async function() {{
+    loadButton.disabled = true;
+    document.getElementById("load-status").textContent = "Loading model weights...";
+    try {{
+        await loadModel((progress) => {{
+            const value = progress.progress ? Math.round(progress.progress) : 0;
+            document.getElementById("load-progress").style.width = `${{value}}%`;
+            if (progress.file) {{
+                document.getElementById("load-status").textContent = `${{progress.file}} - ${{value}}%`;
+            }}
+        }});
+        document.getElementById("load-progress").style.width = "100%";
+        document.getElementById("load-status").textContent = "Model ready - WebGPU active";
+        document.getElementById("load-section").classList.add("loaded");
+        setStatus("Ready", "success");
+    }} catch (error) {{
+        loadButton.disabled = false;
+        setStatus(`Model load failed: ${{error.message}}`, "warning");
+        document.getElementById("load-status").textContent = "Load failed";
+    }}
+}};
+window.runLocalGeneration = async function(prompt, language) {{
+    if (!prompt || !prompt.trim()) {{
+        setStatus("Enter a prompt first", "warning");
+        return [];
+    }}
+    reset();
+    setVerifierStatus("IDLE");
+    setStatus("Generating locally (WebGPU)...", "neutral");
+    let tokenCount = 0;
+    const startTime = Date.now();
+    try {{
+        const fullCode = await generateCode(
+            prompt,
+            language,
+            (token) => {{
+                appendToken(token);
+                tokenCount += 1;
+                const elapsed = Math.max((Date.now() - startTime) / 1000, 0.1);
+                document.getElementById("token-count").textContent = `${{Math.round(tokenCount / elapsed)}} tok/s`;
+            }},
+            () => {{
+                setStatus("Local generation complete. Verifier warming up...", "neutral");
+                setVerifierStatus("CHECKING");
+            }}
+        );
+        const hidden = findGradioInput("draft-output-hidden");
+        const trigger = findGradioButton("trigger-verify-btn");
+        if (!hidden || !trigger) {{
+            setStatus("Gradio verification bridge not ready", "warning");
+            return [];
+        }}
+        hidden.value = fullCode;
+        hidden.dispatchEvent(new Event("input", {{ bubbles: true }}));
+        trigger.click();
+    }} catch (error) {{
+        setStatus(`Generation failed: ${{error.message}}`, "warning");
+    }}
+    return [];
+}};
+window.applyVerification = function(verdictJson) {{
+    if (!verdictJson) return [];
+    let verdict;
+    try {{
+        verdict = JSON.parse(verdictJson);
+    }} catch (error) {{
+        setStatus("Verifier returned invalid JSON", "warning");
+        return [];
+    }}
+    if (verdict.verdict === "PASS") {{
+        setVerifierStatus("PASS");
+        setStatus("Verified clean", "success");
+    }} else {{
+        rollbackAndReplace(verdict.corrected_code || "", verdict.reason || "Verifier supplied a correction", verdict.verdict);
+    }}
+    return [];
+}};
+</script>
+"""
+async def verify_with_modal(prompt: str, draft_code: str, language: str) -> str:
+    verifier_url = endpoint_url(MODAL_VERIFIER_URL, "/verify")
+    if not verifier_url:
+        return json.dumps(
+            {
+                "verdict": "PASS",
+                "reason": "MODAL_VERIFIER_URL is not configured; local demo fallback used.",
+            }
+        )
+    async with httpx.AsyncClient(timeout=90.0) as client:
+        response = await client.post(
+            verifier_url,
+            json={"prompt": prompt, "draft_code": draft_code, "language": language.lower()},
+        )
+        response.raise_for_status()
+        return response.text
+async def execute_in_sandbox(code: str) -> dict:
+    sandbox_url = endpoint_url(MODAL_SANDBOX_URL, "/execute")
+    if not sandbox_url:
+        return {"stdout": "", "stderr": "Sandbox not configured", "returncode": -1}
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        response = await client.post(sandbox_url, json={"code": code})
+        response.raise_for_status()
+        return response.json()
+def code_from_verdict(draft_code: str, verdict_json: str) -> str:
+    if not verdict_json:
+        return draft_code
+    try:
+        verdict = json.loads(verdict_json)
+    except json.JSONDecodeError:
+        return draft_code
+    return verdict.get("corrected_code") or draft_code
+async def run_sandbox(language: str, draft_code: str, verdict_json: str) -> str:
+    if language.lower() != "python":
+        return "Sandbox execution is currently wired for Python only."
+    code = code_from_verdict(draft_code, verdict_json)
+    if not code.strip():
+        return "No generated code is available yet."
+    result = await execute_in_sandbox(code)
+    stdout = result.get("stdout", "")
+    stderr = result.get("stderr", "")
+    returncode = result.get("returncode", "")
+    return "\n".join(
+        [
+            f"returncode: {returncode}",
+            "",
+            "stdout:",
+            stdout or "<empty>",
+            "",
+            "stderr:",
+            stderr or "<empty>",
+        ]
+    )
+with gr.Blocks(
+    title="Split-Brain Co-Pilot",
+    css=load_static("style.css"),
+    theme=gr.themes.Base(primary_hue="blue", neutral_hue="slate"),
+) as demo:
+    gr.HTML(
+        """
+        <section class="app-header">
+            <p class="eyebrow">Build Small Hackathon</p>
+            <h1>Split-Brain Co-Pilot</h1>
+            <p>Draft locally in Chrome with a 1.5B WebGPU model. Verify in the background with a 14B Modal brain.</p>
+        </section>
+        <div class="space-init" id="space-init">Space initializing...</div>
+        <script>
+            requestAnimationFrame(() => {
+                const el = document.getElementById("space-init");
+                if (el) el.hidden = true;
+            });
+        </script>
+        """
+    )
+    with gr.Row(equal_height=False):
+        with gr.Column(scale=2, min_width=320):
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                placeholder="Write a Python function that finds all prime numbers up to n using a segmented sieve, handling edge cases.",
+                lines=6,
+            )
+            language_select = gr.Dropdown(choices=LANGUAGES, value="Python", label="Language")
+            generate_btn = gr.Button("Generate -> Verify", variant="primary")
+        with gr.Column(scale=3, min_width=420):
+            gr.HTML(custom_html)
+            draft_hidden = gr.Textbox(
+                label="draft bridge",
+                elem_id="draft-output-hidden",
+                elem_classes=["bridge-hidden"],
+            )
+            verify_trigger = gr.Button(
+                "verify",
+                elem_id="trigger-verify-btn",
+                elem_classes=["bridge-hidden"],
+            )
+            verdict_output = gr.Textbox(
+                label="verdict",
+                elem_classes=["bridge-hidden"],
+            )
+    with gr.Row():
+        sandbox_btn = gr.Button("Run Python Sandbox", variant="secondary")
+    sandbox_output = gr.Code(label="Sandbox Execution Output", language="shell")
+    generate_btn.click(
+        fn=None,
+        inputs=[prompt_input, language_select],
+        outputs=[],
+        js="(prompt, lang) => window.runLocalGeneration(prompt, lang)",
+    )
+    async def run_verification(prompt: str, draft_code: str, language: str) -> str:
+        return await verify_with_modal(prompt, draft_code, language)
+    verify_trigger.click(
+        fn=run_verification,
+        inputs=[prompt_input, draft_hidden, language_select],
+        outputs=[verdict_output],
+    )
+    verdict_output.change(
+        fn=None,
+        inputs=[verdict_output],
+        outputs=[],
+        js="(verdict) => window.applyVerification(verdict)",
+    )
+    sandbox_btn.click(
+        fn=run_sandbox,
+        inputs=[language_select, draft_hidden, verdict_output],
+        outputs=[sandbox_output],
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name=os.environ.get("GRADIO_SERVER_NAME", "127.0.0.1"),
+        server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
+        show_api=False,
+    )

modal_backend/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Modal backend package for Split-Brain Co-Pilot."""

modal_backend/sandbox.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import modal
+app = modal.App("split-brain-sandbox")
+@app.function(timeout=30)
+def execute_python(code: str) -> dict:
+    """Run code in a Modal sandbox and return stdout/stderr."""
+    sandbox = modal.Sandbox.create(
+        "python3",
+        "-c",
+        code,
+        image=modal.Image.debian_slim().pip_install("numpy"),
+        timeout=10,
+        cpu=0.5,
+    )
+    sandbox.wait()
+    return {
+        "stdout": sandbox.stdout.read(),
+        "stderr": sandbox.stderr.read(),
+        "returncode": sandbox.returncode,
+    }
+@app.function(image=modal.Image.debian_slim().pip_install("fastapi", "pydantic"))
+@modal.asgi_app()
+def sandbox_endpoint():
+    from fastapi import FastAPI
+    from fastapi.middleware.cors import CORSMiddleware
+    from pydantic import BaseModel
+    web_app = FastAPI()
+    web_app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    class ExecuteRequest(BaseModel):
+        code: str
+    @web_app.post("/execute")
+    async def execute(req: ExecuteRequest):
+        return execute_python.remote(req.code)
+    @web_app.get("/health")
+    async def health():
+        return {"ok": True}
+    return web_app

modal_backend/verifier.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import json
+import modal
+app = modal.App("split-brain-verifier")
+model_volume = modal.Volume.from_name("qwen-14b-volume", create_if_missing=True)
+MODEL_DIR = "/models"
+MODEL_FILENAME = "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf"
+MODEL_REPO = "bartowski/Qwen2.5-Coder-14B-Instruct-GGUF"
+download_image = modal.Image.debian_slim(python_version="3.11").pip_install(
+    "huggingface-hub"
+)
+@app.function(
+    image=download_image,
+    volumes={MODEL_DIR: model_volume},
+    timeout=3600,
+    secrets=[modal.Secret.from_name("huggingface-secret")],
+)
+def download_model():
+    from huggingface_hub import hf_hub_download
+    hf_hub_download(
+        repo_id=MODEL_REPO,
+        filename=MODEL_FILENAME,
+        local_dir=MODEL_DIR,
+    )
+    model_volume.commit()
+    print(f"Downloaded to {MODEL_DIR}/{MODEL_FILENAME}")
+llama_image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .apt_install("build-essential", "cmake", "git", "libgomp1")
+    .run_commands(
+        "git clone https://github.com/ggerganov/llama.cpp /llama.cpp",
+        "cd /llama.cpp && cmake -B build -DLLAMA_CURL=OFF && cmake --build build --config Release -j$(nproc)",
+        "cd /llama.cpp && pip install -e .",
+    )
+    .pip_install("llama-cpp-python==0.3.4", "fastapi", "uvicorn", "pydantic")
+)
+@app.cls(
+    image=llama_image,
+    gpu="A10G",
+    volumes={MODEL_DIR: model_volume},
+    scaledown_window=300,
+)
+@modal.concurrent(max_inputs=10)
+class Verifier:
+    @modal.enter()
+    def load_model(self):
+        from llama_cpp import Llama
+        self.llm = Llama(
+            model_path=f"{MODEL_DIR}/{MODEL_FILENAME}",
+            n_gpu_layers=-1,
+            n_ctx=8192,
+            n_batch=512,
+            verbose=False,
+        )
+    @modal.method()
+    def verify(self, prompt: str, draft_code: str, language: str = "python") -> dict:
+        system = f"""You are a code verifier. A smaller model drafted the following {language} code.
+Your job:
+1. Check for bugs, logic errors, type errors, off-by-one errors, and security issues.
+2. If the code is correct, respond with exactly: {{"verdict": "PASS"}}
+3. If fixable, respond with: {{"verdict": "FIX", "corrected_code": "<fixed code here>", "reason": "<one line>"}}
+4. If fundamentally wrong, respond with: {{"verdict": "REWRITE", "corrected_code": "<rewritten code>", "reason": "<one line>"}}
+Respond ONLY with valid JSON. No markdown, no explanation outside the JSON."""
+        user = f"Original prompt:\n{prompt}\n\nDrafted code:\n```{language}\n{draft_code}\n```"
+        response = self.llm.create_chat_completion(
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            max_tokens=2048,
+            temperature=0.1,
+        )
+        raw = response["choices"][0]["message"]["content"].strip()
+        try:
+            parsed = json.loads(raw)
+        except json.JSONDecodeError:
+            return {"verdict": "PASS", "reason": "Verifier response could not be parsed."}
+        if parsed.get("verdict") not in {"PASS", "FIX", "REWRITE"}:
+            return {"verdict": "PASS", "reason": "Verifier returned an unknown verdict."}
+        return parsed
+web_image = llama_image
+@app.function(
+    image=web_image,
+    gpu="A10G",
+    volumes={MODEL_DIR: model_volume},
+    scaledown_window=300,
+)
+@modal.asgi_app()
+def verifier_endpoint():
+    from fastapi import FastAPI
+    from fastapi.middleware.cors import CORSMiddleware
+    from pydantic import BaseModel
+    web_app = FastAPI()
+    web_app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    class VerifyRequest(BaseModel):
+        prompt: str
+        draft_code: str
+        language: str = "python"
+    @web_app.post("/verify")
+    async def verify(req: VerifyRequest):
+        return Verifier().verify.remote(req.prompt, req.draft_code, req.language)
+    @web_app.get("/health")
+    async def health():
+        return {"ok": True}
+    return web_app
+@app.function(schedule=modal.Cron("*/5 * * * *"))
+def keep_warm():
+    Verifier().verify.remote("test", "print('hello')", "python")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio==5.30.0
+httpx==0.27.0
+modal==1.4.3
+cbor2==5.6.5
+huggingface-hub==0.28.1

static/engine.js ADDED Viewed

	@@ -0,0 +1,58 @@

+import { pipeline, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.0/dist/transformers.min.js";
+const MODEL_ID = "onnx-community/Qwen2.5-Coder-1.5B-Instruct";
+let generator = null;
+let isLoaded = false;
+export async function loadModel(onProgress) {
+    if (isLoaded) return;
+    generator = await pipeline("text-generation", MODEL_ID, {
+        dtype: "q4",
+        device: "webgpu",
+        progress_callback: onProgress,
+    });
+    isLoaded = true;
+}
+export async function generateCode(prompt, language, onToken, onComplete) {
+    if (!generator) throw new Error("Model not loaded");
+    const messages = [
+        {
+            role: "system",
+            content: `You are an expert ${language} programmer. Write clean, correct, production-ready code. Output only code, with no markdown or explanation.`,
+        },
+        { role: "user", content: prompt },
+    ];
+    const streamer = new TextStreamer(generator.tokenizer, {
+        skip_prompt: true,
+        callback_function: onToken,
+    });
+    const result = await generator(messages, {
+        max_new_tokens: 1024,
+        temperature: 0.2,
+        do_sample: true,
+        streamer,
+    });
+    const generated = result?.[0]?.generated_text;
+    const fullCode = Array.isArray(generated)
+        ? generated.at(-1).content
+        : String(generated || "");
+    onComplete(fullCode);
+    return fullCode;
+}
+export function isWebGPUSupported() {
+    return Boolean(navigator.gpu);
+}
+Object.assign(window, {
+    loadModel,
+    generateCode,
+    isWebGPUSupported,
+});

static/style.css ADDED Viewed

	@@ -0,0 +1,212 @@

+:root {
+    --bg: #0d1117;
+    --surface: #161b22;
+    --surface-2: #0f1720;
+    --border: #30363d;
+    --accent: #58a6ff;
+    --accent-warn: #f0883e;
+    --text: #e6edf3;
+    --text-muted: #8b949e;
+    --green: #3fb950;
+    --red: #f85149;
+}
+body,
+.gradio-container {
+    background: var(--bg) !important;
+    color: var(--text) !important;
+    font-family: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
+}
+footer {
+    display: none !important;
+}
+.bridge-hidden {
+    display: none !important;
+}
+.app-header {
+    margin: 0 auto 20px;
+    max-width: 980px;
+    text-align: center;
+}
+.app-header h1 {
+    margin: 4px 0 8px;
+    color: var(--text);
+    font-size: clamp(32px, 6vw, 56px);
+    letter-spacing: 0;
+}
+.app-header p {
+    margin: 0;
+    color: var(--text-muted);
+}
+.app-header .eyebrow {
+    color: var(--accent);
+    font-size: 12px;
+    font-weight: 700;
+    text-transform: uppercase;
+}
+.space-init {
+    margin: 0 auto 12px;
+    max-width: 980px;
+    color: var(--text-muted);
+    text-align: center;
+}
+#split-brain-root {
+    color: var(--text);
+}
+.split-topline,
+.status-bar {
+    display: flex;
+    gap: 12px;
+    justify-content: space-between;
+}
+.split-topline {
+    margin-bottom: 10px;
+    color: var(--text-muted);
+    font-size: 12px;
+}
+.webgpu-notice {
+    margin-bottom: 12px;
+    border: 1px solid var(--red);
+    border-radius: 6px;
+    padding: 10px 12px;
+    color: var(--red);
+}
+.load-section {
+    display: grid;
+    grid-template-columns: auto 1fr;
+    gap: 10px 12px;
+    align-items: center;
+    margin-bottom: 12px;
+}
+.load-section.loaded {
+    opacity: 0.72;
+}
+.local-button {
+    border: 1px solid var(--accent);
+    border-radius: 6px;
+    background: var(--accent);
+    color: #07111f;
+    cursor: pointer;
+    font: inherit;
+    font-weight: 700;
+    padding: 10px 14px;
+}
+.local-button:disabled {
+    cursor: not-allowed;
+    opacity: 0.55;
+}
+.loading-bar {
+    height: 8px;
+    overflow: hidden;
+    border-radius: 999px;
+    background: var(--border);
+}
+.loading-bar-fill {
+    width: 0%;
+    height: 100%;
+    background: var(--accent);
+    transition: width 0.3s ease;
+}
+.load-status {
+    grid-column: 1 / -1;
+    color: var(--text-muted);
+    font-size: 12px;
+}
+.code-stream {
+    box-sizing: border-box;
+    min-height: 390px;
+    max-height: 58vh;
+    margin: 0;
+    overflow: auto;
+    border: 1px solid var(--border);
+    border-radius: 8px 8px 0 0;
+    background: var(--surface);
+    color: var(--text);
+    font-family: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
+    font-size: 13px;
+    line-height: 1.6;
+    padding: 16px;
+    white-space: pre-wrap;
+}
+.rollback-flash {
+    border-color: var(--red) !important;
+    animation: flash 0.45s ease;
+}
+@keyframes flash {
+    0% {
+        background: rgba(248, 81, 73, 0.24);
+    }
+    100% {
+        background: var(--surface);
+    }
+}
+.status-bar {
+    align-items: center;
+    border: 1px solid var(--border);
+    border-top: 0;
+    border-radius: 0 0 8px 8px;
+    background: var(--surface-2);
+    color: var(--text-muted);
+    font-size: 12px;
+    padding: 10px 12px;
+}
+.status-warning,
+.verdict-fix,
+.verdict-rewrite {
+    color: var(--accent-warn);
+}
+.status-success,
+.verdict-pass {
+    color: var(--green);
+}
+.status-neutral,
+.verdict-idle {
+    color: var(--text-muted);
+}
+textarea,
+select,
+button {
+    font-family: inherit !important;
+}
+@media (max-width: 720px) {
+    .load-section {
+        grid-template-columns: 1fr;
+    }
+    .split-topline,
+    .status-bar {
+        align-items: flex-start;
+        flex-direction: column;
+    }
+    .code-stream {
+        min-height: 320px;
+    }
+}

static/ui.js ADDED Viewed

	@@ -0,0 +1,87 @@

+let currentTokens = [];
+let streamBuffer = "";
+export function appendToken(token) {
+    streamBuffer += token;
+    currentTokens.push(token);
+    const display = document.getElementById("stream-display");
+    if (display) {
+        display.textContent = streamBuffer;
+        display.scrollTop = display.scrollHeight;
+    }
+}
+export function setStatus(text, type = "neutral") {
+    const el = document.getElementById("status-text");
+    if (!el) return;
+    el.textContent = text;
+    el.className = `status-${type}`;
+}
+export function setVerifierStatus(verdict) {
+    const el = document.getElementById("verifier-status");
+    if (!el) return;
+    const labels = {
+        IDLE: "Verifier idle",
+        PASS: "Verified",
+        FIX: "Fixed",
+        REWRITE: "Rewritten",
+        CHECKING: "Verifying...",
+    };
+    el.textContent = labels[verdict] || "";
+    el.className = `verdict-${String(verdict || "idle").toLowerCase()}`;
+}
+export async function rollbackAndReplace(correctedCode, reason, verdict = "FIX") {
+    const display = document.getElementById("stream-display");
+    if (!display) return;
+    display.classList.add("rollback-flash");
+    setVerifierStatus(verdict);
+    setStatus(`Verifier corrected: ${reason}`, "warning");
+    await sleep(450);
+    display.classList.remove("rollback-flash");
+    display.textContent = "";
+    streamBuffer = correctedCode;
+    currentTokens = [];
+    for (let i = 0; i < correctedCode.length; i += 1) {
+        display.textContent += correctedCode[i];
+        if (i % 5 === 0) await sleep(8);
+    }
+    setVerifierStatus("PASS");
+    setStatus("Corrected block verified", "success");
+}
+export function getCurrentCode() {
+    return streamBuffer;
+}
+export function reset() {
+    streamBuffer = "";
+    currentTokens = [];
+    const display = document.getElementById("stream-display");
+    if (display) display.textContent = "";
+    const tokenCount = document.getElementById("token-count");
+    if (tokenCount) tokenCount.textContent = "0 tok/s";
+}
+function sleep(ms) {
+    return new Promise((resolve) => setTimeout(resolve, ms));
+}
+Object.assign(window, {
+    appendToken,
+    setStatus,
+    setVerifierStatus,
+    rollbackAndReplace,
+    getCurrentCode,
+    reset,
+});