Spaces:

LisaMegaWatts
/

MicroJulia

Sleeping

LisaMegaWatts Claude Opus 4.6 commited on 11 days ago

Commit

c275980

1 Parent(s): 643b9ff

Add MicroGPT Julia inference server

Docker-based OpenAI-compatible API serving a ~5K param
character-level GPT trained on philosophy quotes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (7) hide show

Dockerfile +29 -0
Project.toml +3 -0
README.md +32 -1
checkpoint.jl +103 -0
checkpoints/best_model.json +0 -0
model.jl +355 -0
server.jl +172 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM julia:1.10-bookworm
+# HuggingFace Spaces requires user ID 1000
+RUN useradd -m -u 1000 user
+# Install Julia packages (cached layer)
+WORKDIR /app
+COPY Project.toml /app/
+RUN julia --project=/app -e 'using Pkg; Pkg.instantiate(); Pkg.precompile()'
+# Precompile server deps for faster cold start
+RUN julia --project=/app -e 'using HTTP, JSON3; println("Precompile done")'
+# Switch to non-root user
+USER user
+ENV HOME=/home/user
+WORKDIR /home/user/app
+# Copy application code
+COPY --chown=user model.jl /home/user/app/
+COPY --chown=user checkpoint.jl /home/user/app/
+COPY --chown=user server.jl /home/user/app/
+COPY --chown=user Project.toml /home/user/app/
+COPY --chown=user checkpoints/ /home/user/app/checkpoints/
+# Default port for HuggingFace Spaces (override with PORT env var)
+EXPOSE 7860
+CMD ["julia", "--project=/app", "/home/user/app/server.jl"]

Project.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[deps]
+HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
+JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"

README.md CHANGED Viewed

@@ -7,6 +7,37 @@ sdk: docker
 pinned: false
 license: mit
 short_description: MicroGPT implementation in Julia
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 license: mit
 short_description: MicroGPT implementation in Julia
+app_port: 7860
 ---
+# MicroJulia
+A ~5K parameter character-level GPT written from scratch in Julia — no ML frameworks, just pure scalar autograd.
+Trained on philosophy quotes (Marcus Aurelius, Seneca, Socrates, Buddha, Confucius, Nietzsche, etc.)
+## API
+OpenAI-compatible inference endpoint:
+```bash
+curl -X POST https://lisamegawatts-microjulia.hf.space/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"messages":[{"role":"user","content":"The purpose of"}],"temperature":0.8,"max_tokens":128}'
+```
+### Endpoints
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/` | Health check |
+| GET | `/v1/models` | List models |
+| POST | `/v1/chat/completions` | Generate text |
+## Architecture
+- 1 transformer layer, 16-dim embeddings, 4 attention heads
+- Custom scalar autograd engine (`Value` type)
+- Character-level tokenizer (no BPE)
+- KV cache for efficient inference
+- ~5,000 parameters

checkpoint.jl ADDED Viewed

	@@ -0,0 +1,103 @@

+#=
+checkpoint.jl — Save/load checkpoint functions for MicroGPT
+Checkpoint format: JSON with weights, optimizer state, and training metadata.
+Used by train.jl, resume_train.jl, and server.jl.
+=#
+include("model.jl")
+using JSON3
+function save_checkpoint(path::String, state_dict, param_keys, uchars, hyperparams;
+                         m_buf=nothing, v_buf=nothing, step::Int=0,
+                         lr::Float64=0.01, β1::Float64=0.85, β2::Float64=0.99,
+                         best_val_loss::Float64=Inf,
+                         train_losses::Vector{Float64}=Float64[],
+                         val_losses::Vector{Float64}=Float64[],
+                         total_steps::Int=0, num_steps_target::Int=0)
+    # Extract .data from Value objects
+    sd_data = Dict{String,Any}()
+    for k in param_keys
+        sd_data[k] = [[v.data for v in row] for row in state_dict[k]]
+    end
+    checkpoint = Dict{String,Any}(
+        "uchars" => [string(c) for c in uchars],
+        "hyperparams" => hyperparams,
+        "state_dict" => sd_data,
+        "optimizer" => Dict{String,Any}(
+            "m_buf" => m_buf === nothing ? Float64[] : collect(m_buf),
+            "v_buf" => v_buf === nothing ? Float64[] : collect(v_buf),
+            "step" => step,
+            "lr" => lr,
+            "beta1" => β1,
+            "beta2" => β2
+        ),
+        "training" => Dict{String,Any}(
+            "best_val_loss" => best_val_loss,
+            "train_losses" => train_losses,
+            "val_losses" => val_losses,
+            "total_steps_completed" => total_steps,
+            "num_steps_target" => num_steps_target
+        )
+    )
+    mkpath(dirname(path))
+    open(path, "w") do f
+        JSON3.write(f, checkpoint)
+    end
+    vl_str = best_val_loss == Inf ? "Inf" : @sprintf("%.4f", best_val_loss)
+    println("Checkpoint saved: $path (step $step, best_val_loss=$vl_str)")
+end
+function load_checkpoint(path::String)
+    println("Loading checkpoint from $path ...")
+    raw = JSON3.read(read(path, String))
+    # Reconstruct character vocab
+    uchars = [only(String(s)) for s in raw["uchars"]]
+    BOS = length(uchars) + 1
+    vocab_size = BOS
+    # Hyperparameters
+    hp = raw["hyperparams"]
+    n_layer = Int(hp["n_layer"])
+    n_embd = Int(hp["n_embd"])
+    block_size = Int(hp["block_size"])
+    n_head = Int(hp["n_head"])
+    head_dim = n_embd ÷ n_head
+    # Reconstruct state_dict as Value objects
+    state_dict = Dict{String, Vector{Vector{Value}}}()
+    for (key, matrix) in pairs(raw["state_dict"])
+        state_dict[string(key)] = [[Value(Float64(v)) for v in row] for row in matrix]
+    end
+    # Optimizer state
+    opt = raw["optimizer"]
+    m_buf = Float64.(collect(opt["m_buf"]))
+    v_buf = Float64.(collect(opt["v_buf"]))
+    step = Int(opt["step"])
+    lr = Float64(opt["lr"])
+    β1 = Float64(opt["beta1"])
+    β2 = Float64(opt["beta2"])
+    # Training metadata
+    trn = raw["training"]
+    best_val_loss = Float64(trn["best_val_loss"])
+    train_losses = Float64.(collect(trn["train_losses"]))
+    val_losses = Float64.(collect(trn["val_losses"]))
+    total_steps = Int(trn["total_steps_completed"])
+    num_steps_target = Int(trn["num_steps_target"])
+    println("  vocab=$vocab_size, embd=$n_embd, layers=$n_layer, step=$step, best_val=$(round(best_val_loss, digits=4))")
+    return (;
+        state_dict, uchars, BOS, vocab_size,
+        n_layer, n_embd, block_size, n_head, head_dim,
+        m_buf, v_buf, step, lr, β1, β2,
+        best_val_loss, train_losses, val_losses,
+        total_steps, num_steps_target
+    )
+end

checkpoints/best_model.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model.jl ADDED Viewed

	@@ -0,0 +1,355 @@

+#=
+model.jl — Shared model code for MicroGPT
+Contains: Value autograd type, neural net primitives, GPT forward pass,
+and the philosophy quotes dataset. Included by all other scripts.
+=#
+using Random
+using Printf
+# ═══════════════════════════════════════════════════════════════════════════════
+# Autograd engine — scalar Value type with automatic differentiation
+# ═══════════════════════════════════════════════════════════════════════════════
+mutable struct Value
+    data::Float64
+    grad::Float64
+    _children::Vector{Value}
+    _local_grads::Vector{Float64}
+end
+Value(x::Real) = Value(Float64(x), 0.0, Value[], Float64[])
+# --- Core operations ---
+function Base.:+(a::Value, b::Value)
+    Value(a.data + b.data, 0.0, [a, b], [1.0, 1.0])
+end
+function Base.:*(a::Value, b::Value)
+    Value(a.data * b.data, 0.0, [a, b], [b.data, a.data])
+end
+function Base.:-(a::Value, b::Value)
+    Value(a.data - b.data, 0.0, [a, b], [1.0, -1.0])
+end
+function Base.:/(a::Value, b::Value)
+    Value(a.data / b.data, 0.0, [a, b], [1.0 / b.data, -a.data / b.data^2])
+end
+function Base.:^(a::Value, n::Real)
+    Value(a.data^n, 0.0, [a], [n * a.data^(n - 1)])
+end
+function Base.log(a::Value)
+    Value(log(a.data), 0.0, [a], [1.0 / a.data])
+end
+function Base.exp(a::Value)
+    e = exp(a.data)
+    Value(e, 0.0, [a], [e])
+end
+function relu(a::Value)
+    Value(max(0.0, a.data), 0.0, [a], [Float64(a.data > 0)])
+end
+# --- Scalar promotion ---
+Base.:+(a::Value, b::Real) = Value(a.data + b, 0.0, [a], [1.0])
+Base.:+(a::Real, b::Value) = Value(a + b.data, 0.0, [b], [1.0])
+Base.:*(a::Value, b::Real) = Value(a.data * b, 0.0, [a], [Float64(b)])
+Base.:*(a::Real, b::Value) = Value(a * b.data, 0.0, [b], [Float64(a)])
+Base.:-(a::Value, b::Real) = Value(a.data - b, 0.0, [a], [1.0])
+Base.:-(a::Real, b::Value) = Value(a - b.data, 0.0, [b], [-1.0])
+Base.:/(a::Value, b::Real) = Value(a.data / b, 0.0, [a], [1.0 / b])
+Base.:/(a::Real, b::Value) = Value(a / b.data, 0.0, [b], [-a / b.data^2])
+Base.:-(a::Value) = Value(-a.data, 0.0, [a], [-1.0])
+Base.zero(::Type{Value}) = Value(0.0)
+Base.isless(a::Value, b::Value) = a.data < b.data
+# --- Backpropagation ---
+function backward!(loss::Value)
+    topo = Value[]
+    visited = Set{UInt64}()
+    function build_topo(v)
+        id = objectid(v)
+        id in visited && return
+        push!(visited, id)
+        for child in v._children
+            build_topo(child)
+        end
+        push!(topo, v)
+    end
+    build_topo(loss)
+    loss.grad = 1.0
+    for v in reverse(topo)
+        for (child, lg) in zip(v._children, v._local_grads)
+            child.grad += lg * v.grad
+        end
+    end
+end
+# ═══════════════════════════════════════════════════════════════════════════════
+# Neural network primitives
+# ═══════════════════════════════════════════════════════════════════════════════
+function linear(x::Vector{Value}, w::Vector{Vector{Value}})
+    [sum(wi * xi for (wi, xi) in zip(wo, x)) for wo in w]
+end
+function softmax_v(logits::Vector{Value})
+    max_val = maximum(v.data for v in logits)
+    exps = [exp(v - max_val) for v in logits]
+    total = sum(exps)
+    [e / total for e in exps]
+end
+function rmsnorm(x::Vector{Value})
+    ms = sum(xi * xi for xi in x) / length(x)
+    scale = (ms + 1e-5) ^ (-0.5)
+    [xi * scale for xi in x]
+end
+# ═══════════════════════════════════════════════════════════════════════════════
+# GPT forward pass — one token at a time with KV cache
+# ═══════════════════════════════════════════════════════════════════════════════
+function gpt(token_id::Int, pos_id::Int,
+             keys::Vector{Vector{Vector{Value}}},
+             values::Vector{Vector{Vector{Value}}},
+             state_dict::Dict{String, Vector{Vector{Value}}},
+             n_layer::Int, n_head::Int, head_dim::Int)
+    tok_emb = state_dict["wte"][token_id]
+    pos_emb = state_dict["wpe"][pos_id]
+    x = [t + p for (t, p) in zip(tok_emb, pos_emb)]
+    x = rmsnorm(x)
+    for li in 0:n_layer-1
+        x_res = x
+        x = rmsnorm(x)
+        q = linear(x, state_dict["layer$(li).attn_wq"])
+        k = linear(x, state_dict["layer$(li).attn_wk"])
+        v = linear(x, state_dict["layer$(li).attn_wv"])
+        push!(keys[li+1], k)
+        push!(values[li+1], v)
+        x_attn = Value[]
+        for h in 0:n_head-1
+            hs = h * head_dim + 1
+            he = hs + head_dim - 1
+            q_h = q[hs:he]
+            k_h = [ki[hs:he] for ki in keys[li+1]]
+            v_h = [vi[hs:he] for vi in values[li+1]]
+            attn_logits = [sum(q_h[j] * k_h[t][j] for j in 1:head_dim) / sqrt(Float64(head_dim))
+                           for t in 1:length(k_h)]
+            attn_weights = softmax_v(attn_logits)
+            head_out = [sum(attn_weights[t] * v_h[t][j] for t in 1:length(v_h))
+                        for j in 1:head_dim]
+            append!(x_attn, head_out)
+        end
+        x = linear(x_attn, state_dict["layer$(li).attn_wo"])
+        x = [a + b for (a, b) in zip(x, x_res)]
+        x_res = x
+        x = rmsnorm(x)
+        x = linear(x, state_dict["layer$(li).mlp_fc1"])
+        x = [relu(xi) for xi in x]
+        x = linear(x, state_dict["layer$(li).mlp_fc2"])
+        x = [a + b for (a, b) in zip(x, x_res)]
+    end
+    logits = linear(x, state_dict["lm_head"])
+    return logits
+end
+# ═══════════════════════════════════════════════════════════════════════════════
+# Dataset — philosophy quotes (all ancient/classical, public domain)
+# ═══════════════════════════════════════════════════════════════════════════════
+const PHILOSOPHY_QUOTES = [
+    "waste no more time arguing about what a good man should be",
+    "the happiness of your life depends upon the quality of your thoughts",
+    "you have power over your mind not outside events",
+    "very little is needed to make a happy life",
+    "the soul becomes dyed with the color of its thoughts",
+    "when you arise in the morning think of what a privilege it is to be alive",
+    "the best revenge is to be unlike him who performed the injury",
+    "accept the things to which fate binds you",
+    "if it is not right do not do it if it is not true do not say it",
+    "look well into thyself there is a source of strength",
+    "do every act of your life as though it were the very last act of your life",
+    "it is not death that a man should fear but never beginning to live",
+    "we suffer more often in imagination than in reality",
+    "true happiness is to enjoy the present without anxious dependence upon the future",
+    "it is not because things are difficult that we do not dare",
+    "it is because we do not dare that they are difficult",
+    "luck is what happens when preparation meets opportunity",
+    "begin at once to live and count each separate day as a separate life",
+    "the whole future lies in uncertainty live immediately",
+    "sometimes even to live is an act of courage",
+    "if a man knows not which port he sails no wind is favorable",
+    "he who is brave is free",
+    "difficulties strengthen the mind as labor does the body",
+    "first say to yourself what you would be and then do what you have to do",
+    "no man is free who is not master of himself",
+    "only the educated are free",
+    "man is not worried by real problems so much as by his imagined anxieties",
+    "wealth consists not in having great possessions but in having few wants",
+    "make the mind tougher by exposing it to adversity",
+    "the mind that is anxious about future events is miserable",
+    "the unexamined life is not worth living",
+    "i know that i know nothing",
+    "be kind for everyone you meet is fighting a hard battle",
+    "the only true wisdom is in knowing you know nothing",
+    "wonder is the beginning of wisdom",
+    "education is the kindling of a flame not the filling of a vessel",
+    "strong minds discuss ideas average minds discuss events weak minds discuss people",
+    "the secret of change is to focus all of your energy on building the new",
+    "no man ever steps in the same river twice",
+    "character is fate",
+    "the only constant in life is change",
+    "much learning does not teach understanding",
+    "knowing yourself is the beginning of all wisdom",
+    "happiness depends upon ourselves",
+    "it is the mark of an educated mind to entertain a thought without accepting it",
+    "the more you know the more you realize you do not know",
+    "patience is bitter but its fruit is sweet",
+    "we are what we repeatedly do excellence then is not an act but a habit",
+    "courage is the first of human qualities because it guarantees the others",
+    "quality is not an act it is a habit",
+    "pleasure in the job puts perfection in the work",
+    "the whole is greater than the sum of its parts",
+    "nature does nothing in vain",
+    "what is honored in a country is cultivated there",
+    "the roots of education are bitter but the fruit is sweet",
+    "to perceive is to suffer",
+    "the journey of a thousand miles begins with a single step",
+    "knowing others is intelligence knowing yourself is true wisdom",
+    "mastering others is strength mastering yourself is true power",
+    "when i let go of what i am i become what i might be",
+    "nature does not hurry yet everything is accomplished",
+    "silence is a source of great strength",
+    "a good traveler has no fixed plans and is not intent on arriving",
+    "be content with what you have rejoice in the way things are",
+    "he who conquers himself is the mightiest warrior",
+    "real knowledge is to know the extent of ones ignorance",
+    "it does not matter how slowly you go so long as you do not stop",
+    "our greatest glory is not in never falling but in rising every time we fall",
+    "before you embark on a journey of revenge dig two graves",
+    "the man who moves a mountain begins by carrying away small stones",
+    "to see what is right and not do it is a want of courage",
+    "study the past if you would define the future",
+    "wherever you go go with all your heart",
+    "to be wronged is nothing unless you continue to remember it",
+    "the wise man is one who knows what he does not know",
+    "he who learns but does not think is lost",
+    "i think therefore i am",
+    "the heart has its reasons which reason knows nothing of",
+    "all of mans misfortune comes from not knowing how to sit quietly in a room",
+    "the more i read the more certain i am that i know nothing",
+    "there is nothing either good or bad but thinking makes it so",
+    "one cannot step twice in the same river",
+    "man is born free and everywhere he is in chains",
+    "life must be understood backward but it must be lived forward",
+    "anxiety is the dizziness of freedom",
+    "you will never be happy if you continue to search for what happiness consists of",
+    "act only according to that maxim which you can will to become universal law",
+    "he who thinks great thoughts often makes great errors",
+    "to live is to suffer to survive is to find some meaning in the suffering",
+    "without music life would be a mistake",
+    "he who has a why to live for can bear almost any how",
+    "that which does not kill us makes us stronger",
+    "there are no facts only interpretations",
+    "you must have chaos within you to give birth to a dancing star",
+    "whoever fights monsters should see to it that he does not become a monster",
+    "when you gaze long into an abyss the abyss also gazes into you",
+    "the individual has always had to struggle to keep from being overwhelmed by the tribe",
+    "there is always some madness in love but there is also always some reason in madness",
+    "the snake which cannot cast its skin has to die",
+    "in the middle of difficulty lies opportunity",
+    "the mind is everything what you think you become",
+    "peace comes from within do not seek it without",
+    "all that we are is the result of what we have thought",
+    "three things cannot be long hidden the sun the moon and the truth",
+    "the only way to do great work is to love what you do",
+    "virtue is not given by money but from virtue comes money",
+    "the measure of a man is what he does with power",
+    "no great mind has ever existed without a touch of madness",
+    "the energy of the mind is the essence of life",
+    "those who know do not speak those who speak do not know",
+    "the flame that burns twice as bright burns half as long",
+    "what we achieve inwardly will change outer reality",
+    "the only thing i know is that i know nothing and i am not quite sure that i know that",
+    "everything has beauty but not everyone sees it",
+    "the greatest wealth is to live content with little",
+    "it is during our darkest moments that we must focus to see the light",
+    "where there is love there is life",
+    "the mind is not a vessel to be filled but a fire to be kindled",
+]
+# ═══════════════════════════════════════════════════════════════════════════════
+# Helpers used by training and inference scripts
+# ═══════════════════════════════════════════════════════════════════════════════
+# Deterministic parameter key ordering (must match across all scripts)
+function get_param_keys(n_layer::Int)
+    keys = ["wte", "wpe", "lm_head"]
+    for i in 0:n_layer-1
+        append!(keys, [
+            "layer$i.attn_wq", "layer$i.attn_wk", "layer$i.attn_wv", "layer$i.attn_wo",
+            "layer$i.mlp_fc1", "layer$i.mlp_fc2"
+        ])
+    end
+    return keys
+end
+# Initialize weight matrices
+function init_matrix(nout::Int, nin::Int; std=0.08)
+    [[Value(randn() * std) for _ in 1:nin] for _ in 1:nout]
+end
+# Flatten state_dict into a single params vector (deterministic order)
+function flatten_params(state_dict, param_keys)
+    params = Value[]
+    for key in param_keys
+        for row in state_dict[key]
+            append!(params, row)
+        end
+    end
+    return params
+end
+# Generate text from trained model
+function generate(state_dict, uchars, BOS, n_layer, n_head, head_dim, block_size;
+                  temperature=0.8, max_tokens=128)
+    kv_keys = [Vector{Vector{Value}}() for _ in 1:n_layer]
+    kv_vals = [Vector{Vector{Value}}() for _ in 1:n_layer]
+    token_id = BOS
+    sample = Char[]
+    limit = min(max_tokens, block_size)
+    for pos in 1:limit
+        logits = gpt(token_id, pos, kv_keys, kv_vals, state_dict, n_layer, n_head, head_dim)
+        scaled = [l / temperature for l in logits]
+        probs = softmax_v(scaled)
+        weights = [p.data for p in probs]
+        r = rand()
+        cum = 0.0
+        token_id = 1
+        for (idx, w) in enumerate(weights)
+            cum += w
+            if r <= cum
+                token_id = idx
+                break
+            end
+        end
+        token_id == BOS && break
+        push!(sample, uchars[token_id])
+    end
+    return String(sample)
+end

server.jl ADDED Viewed

	@@ -0,0 +1,172 @@

+#=
+server.jl — OpenAI/OpenRouter-compatible inference server for MicroGPT
+Endpoints:
+    GET  /                       → health check / API info
+    GET  /v1/models              → list available models
+    POST /v1/chat/completions    → generate philosophy text (OpenAI format)
+Usage:
+    julia --project=. server.jl
+    WEIGHTS_PATH=checkpoints/checkpoint_step400.json julia --project=. server.jl
+    PORT=8080 julia --project=. server.jl
+=#
+include("checkpoint.jl")
+using HTTP
+using UUIDs
+using Sockets
+# ═══════════════════════════════════════════════════════════════════════════════
+# Load model at startup
+# ═══════════════════════════════════════════════════════════════════════════════
+const WEIGHTS_PATH = get(ENV, "WEIGHTS_PATH", "checkpoints/best_model.json")
+const PORT = parse(Int, get(ENV, "PORT", "7860"))
+const ckpt = load_checkpoint(WEIGHTS_PATH)
+const STATE_DICT = ckpt.state_dict
+const UCHARS = ckpt.uchars
+const BOS_TOKEN = ckpt.BOS
+const N_LAYER = ckpt.n_layer
+const N_EMBD = ckpt.n_embd
+const BLOCK_SIZE = ckpt.block_size
+const N_HEAD = ckpt.n_head
+const HEAD_DIM = ckpt.head_dim
+const MODEL_CREATED_AT = Int(floor(time()))
+println("Model ready: vocab=$(ckpt.vocab_size), embd=$N_EMBD, layers=$N_LAYER")
+# ═══════════════════════════════════════════════════════════════════════════════
+# API handlers
+# ═══════════════════════════════════════════════════════════════════════════════
+function json_response(status::Int, body)
+    HTTP.Response(status,
+        ["Content-Type" => "application/json",
+         "Access-Control-Allow-Origin" => "*",
+         "Access-Control-Allow-Methods" => "GET, POST, OPTIONS",
+         "Access-Control-Allow-Headers" => "Content-Type, Authorization"],
+        JSON3.write(body))
+end
+function handle_root(req::HTTP.Request)
+    json_response(200, Dict(
+        "name" => "MicroGPT Philosophy",
+        "version" => "1.0.0",
+        "description" => "A ~5K parameter character-level GPT trained on philosophy quotes",
+        "endpoints" => ["/v1/models", "/v1/chat/completions"],
+        "compatible_with" => ["OpenAI API", "OpenRouter"]
+    ))
+end
+function handle_models(req::HTTP.Request)
+    json_response(200, Dict(
+        "object" => "list",
+        "data" => [
+            Dict(
+                "id" => "microgpt-philosophy",
+                "object" => "model",
+                "created" => MODEL_CREATED_AT,
+                "owned_by" => "microgpt"
+            )
+        ]
+    ))
+end
+function handle_chat_completions(req::HTTP.Request)
+    local body
+    try
+        body = JSON3.read(String(req.body))
+    catch e
+        return json_response(400, Dict(
+            "error" => Dict(
+                "message" => "Invalid JSON in request body",
+                "type" => "invalid_request_error",
+                "code" => "invalid_json"
+            )
+        ))
+    end
+    # Extract parameters with defaults
+    temperature = Float64(get(body, :temperature, 0.8))
+    max_tokens = Int(get(body, :max_tokens, 128))
+    n_completions = Int(get(body, :n, 1))
+    # Clamp temperature to valid range
+    temperature = clamp(temperature, 0.01, 2.0)
+    max_tokens = clamp(max_tokens, 1, BLOCK_SIZE)
+    # Extract prompt text for token counting
+    messages = get(body, :messages, [])
+    prompt_text = ""
+    if !isempty(messages)
+        last_msg = messages[end]
+        prompt_text = string(get(last_msg, :content, ""))
+    end
+    # Generate completions
+    choices = []
+    total_completion_tokens = 0
+    for i in 1:n_completions
+        text = generate(STATE_DICT, UCHARS, BOS_TOKEN, N_LAYER, N_HEAD, HEAD_DIM, BLOCK_SIZE;
+                        temperature=temperature, max_tokens=max_tokens)
+        finish_reason = length(text) >= max_tokens ? "length" : "stop"
+        push!(choices, Dict(
+            "index" => i - 1,
+            "message" => Dict(
+                "role" => "assistant",
+                "content" => text
+            ),
+            "finish_reason" => finish_reason
+        ))
+        total_completion_tokens += length(text)
+    end
+    prompt_tokens = length(prompt_text)
+    completion_id = "chatcmpl-" * string(uuid4())
+    json_response(200, Dict(
+        "id" => completion_id,
+        "object" => "chat.completion",
+        "created" => Int(floor(time())),
+        "model" => "microgpt-philosophy",
+        "choices" => choices,
+        "usage" => Dict(
+            "prompt_tokens" => prompt_tokens,
+            "completion_tokens" => total_completion_tokens,
+            "total_tokens" => prompt_tokens + total_completion_tokens
+        ),
+        "system_fingerprint" => "microgpt-philosophy-v1"
+    ))
+end
+# ═══════════════════════════════════════════════════════════════════════════════
+# Router + CORS
+# ═══════════════════════════════════════════════════════════════════════════════
+function cors_preflight(req::HTTP.Request)
+    HTTP.Response(204,
+        ["Access-Control-Allow-Origin" => "*",
+         "Access-Control-Allow-Methods" => "GET, POST, OPTIONS",
+         "Access-Control-Allow-Headers" => "Content-Type, Authorization"])
+end
+const ROUTER = HTTP.Router()
+HTTP.register!(ROUTER, "GET", "/", handle_root)
+HTTP.register!(ROUTER, "GET", "/v1/models", handle_models)
+HTTP.register!(ROUTER, "POST", "/v1/chat/completions", handle_chat_completions)
+HTTP.register!(ROUTER, "OPTIONS", "/v1/chat/completions", cors_preflight)
+HTTP.register!(ROUTER, "OPTIONS", "/v1/models", cors_preflight)
+# ═══════════════════════════════════════════════════════════════════════════════
+# Start server
+# ═══════════════════════════════════════════════════════════════════════════════
+println("\nMicroGPT server starting on 0.0.0.0:$PORT ...")
+println("  GET  http://localhost:$PORT/")
+println("  GET  http://localhost:$PORT/v1/models")
+println("  POST http://localhost:$PORT/v1/chat/completions")
+println()
+HTTP.serve(ROUTER, "0.0.0.0", PORT)