""" Discode — chat your way to a live web app. Left: a slim, collapsible chat rail. Talk to the AI, ask for an app, then ask for changes ("make the snake green", "add a score counter"). Right: the generated app, rendered live with full JavaScript. Theme: Frutiger Aero / skeuomorphic glass. Model: Gemma 4 12B via llama.cpp. Local: start `llama-server -hf ggml-org/gemma-4-12B-it-GGUF:Q4_K_M --jinja -c 4096` before running this app, or install llama-cpp-python so the app can spawn it. Space: the app spawns llama_cpp.server on CPU Basic unless a server is already running. """ import os import re import sys import time import html as html_lib import subprocess import gradio as gr import requests from agno.agent import Agent from agno.models.llama_cpp import LlamaCpp MODEL_REPO = os.environ.get("MODEL_REPO", "ggml-org/gemma-4-12B-it-GGUF") MODEL_FILE = os.environ.get("MODEL_FILE", "gemma-4-12B-it-Q4_K_M.gguf") HOST = os.environ.get("LLAMACPP_HOST", "127.0.0.1") PORT = int(os.environ.get("LLAMACPP_PORT", "8080")) BASE_URL = os.environ.get("LLAMACPP_BASE_URL", f"http://{HOST}:{PORT}/v1") N_CTX = os.environ.get("LLAMACPP_CTX", "4096") N_THREADS = os.environ.get("LLAMACPP_THREADS", "2") SYSTEM_PROMPT = """You are Discode, a friendly expert front-end engineer who builds and edits ONE single-page web app for the user through conversation. On every turn where the user wants an app or a change: 1. First write ONE short, friendly sentence (what you built or changed). 2. Then output the COMPLETE, updated HTML document inside a single ```html ... ``` code block. The HTML must be: - A full self-contained document: , , , . - Inline CSS (in