Spaces:
Sleeping
Sleeping
Commit Β·
c275980
1
Parent(s): 643b9ff
Add MicroGPT Julia inference server
Browse filesDocker-based OpenAI-compatible API serving a ~5K param
character-level GPT trained on philosophy quotes.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- Dockerfile +29 -0
- Project.toml +3 -0
- README.md +32 -1
- checkpoint.jl +103 -0
- checkpoints/best_model.json +0 -0
- model.jl +355 -0
- server.jl +172 -0
Dockerfile
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM julia:1.10-bookworm
|
| 2 |
+
|
| 3 |
+
# HuggingFace Spaces requires user ID 1000
|
| 4 |
+
RUN useradd -m -u 1000 user
|
| 5 |
+
|
| 6 |
+
# Install Julia packages (cached layer)
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
COPY Project.toml /app/
|
| 9 |
+
RUN julia --project=/app -e 'using Pkg; Pkg.instantiate(); Pkg.precompile()'
|
| 10 |
+
|
| 11 |
+
# Precompile server deps for faster cold start
|
| 12 |
+
RUN julia --project=/app -e 'using HTTP, JSON3; println("Precompile done")'
|
| 13 |
+
|
| 14 |
+
# Switch to non-root user
|
| 15 |
+
USER user
|
| 16 |
+
ENV HOME=/home/user
|
| 17 |
+
WORKDIR /home/user/app
|
| 18 |
+
|
| 19 |
+
# Copy application code
|
| 20 |
+
COPY --chown=user model.jl /home/user/app/
|
| 21 |
+
COPY --chown=user checkpoint.jl /home/user/app/
|
| 22 |
+
COPY --chown=user server.jl /home/user/app/
|
| 23 |
+
COPY --chown=user Project.toml /home/user/app/
|
| 24 |
+
COPY --chown=user checkpoints/ /home/user/app/checkpoints/
|
| 25 |
+
|
| 26 |
+
# Default port for HuggingFace Spaces (override with PORT env var)
|
| 27 |
+
EXPOSE 7860
|
| 28 |
+
|
| 29 |
+
CMD ["julia", "--project=/app", "/home/user/app/server.jl"]
|
Project.toml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[deps]
|
| 2 |
+
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
|
| 3 |
+
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
|
README.md
CHANGED
|
@@ -7,6 +7,37 @@ sdk: docker
|
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
short_description: MicroGPT implementation in Julia
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
short_description: MicroGPT implementation in Julia
|
| 10 |
+
app_port: 7860
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# MicroJulia
|
| 14 |
+
|
| 15 |
+
A ~5K parameter character-level GPT written from scratch in Julia β no ML frameworks, just pure scalar autograd.
|
| 16 |
+
|
| 17 |
+
Trained on philosophy quotes (Marcus Aurelius, Seneca, Socrates, Buddha, Confucius, Nietzsche, etc.)
|
| 18 |
+
|
| 19 |
+
## API
|
| 20 |
+
|
| 21 |
+
OpenAI-compatible inference endpoint:
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
curl -X POST https://lisamegawatts-microjulia.hf.space/v1/chat/completions \
|
| 25 |
+
-H "Content-Type: application/json" \
|
| 26 |
+
-d '{"messages":[{"role":"user","content":"The purpose of"}],"temperature":0.8,"max_tokens":128}'
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
### Endpoints
|
| 30 |
+
|
| 31 |
+
| Method | Path | Description |
|
| 32 |
+
|--------|------|-------------|
|
| 33 |
+
| GET | `/` | Health check |
|
| 34 |
+
| GET | `/v1/models` | List models |
|
| 35 |
+
| POST | `/v1/chat/completions` | Generate text |
|
| 36 |
+
|
| 37 |
+
## Architecture
|
| 38 |
+
|
| 39 |
+
- 1 transformer layer, 16-dim embeddings, 4 attention heads
|
| 40 |
+
- Custom scalar autograd engine (`Value` type)
|
| 41 |
+
- Character-level tokenizer (no BPE)
|
| 42 |
+
- KV cache for efficient inference
|
| 43 |
+
- ~5,000 parameters
|
checkpoint.jl
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#=
|
| 2 |
+
checkpoint.jl β Save/load checkpoint functions for MicroGPT
|
| 3 |
+
|
| 4 |
+
Checkpoint format: JSON with weights, optimizer state, and training metadata.
|
| 5 |
+
Used by train.jl, resume_train.jl, and server.jl.
|
| 6 |
+
=#
|
| 7 |
+
|
| 8 |
+
include("model.jl")
|
| 9 |
+
using JSON3
|
| 10 |
+
|
| 11 |
+
function save_checkpoint(path::String, state_dict, param_keys, uchars, hyperparams;
|
| 12 |
+
m_buf=nothing, v_buf=nothing, step::Int=0,
|
| 13 |
+
lr::Float64=0.01, Ξ²1::Float64=0.85, Ξ²2::Float64=0.99,
|
| 14 |
+
best_val_loss::Float64=Inf,
|
| 15 |
+
train_losses::Vector{Float64}=Float64[],
|
| 16 |
+
val_losses::Vector{Float64}=Float64[],
|
| 17 |
+
total_steps::Int=0, num_steps_target::Int=0)
|
| 18 |
+
|
| 19 |
+
# Extract .data from Value objects
|
| 20 |
+
sd_data = Dict{String,Any}()
|
| 21 |
+
for k in param_keys
|
| 22 |
+
sd_data[k] = [[v.data for v in row] for row in state_dict[k]]
|
| 23 |
+
end
|
| 24 |
+
|
| 25 |
+
checkpoint = Dict{String,Any}(
|
| 26 |
+
"uchars" => [string(c) for c in uchars],
|
| 27 |
+
"hyperparams" => hyperparams,
|
| 28 |
+
"state_dict" => sd_data,
|
| 29 |
+
"optimizer" => Dict{String,Any}(
|
| 30 |
+
"m_buf" => m_buf === nothing ? Float64[] : collect(m_buf),
|
| 31 |
+
"v_buf" => v_buf === nothing ? Float64[] : collect(v_buf),
|
| 32 |
+
"step" => step,
|
| 33 |
+
"lr" => lr,
|
| 34 |
+
"beta1" => Ξ²1,
|
| 35 |
+
"beta2" => Ξ²2
|
| 36 |
+
),
|
| 37 |
+
"training" => Dict{String,Any}(
|
| 38 |
+
"best_val_loss" => best_val_loss,
|
| 39 |
+
"train_losses" => train_losses,
|
| 40 |
+
"val_losses" => val_losses,
|
| 41 |
+
"total_steps_completed" => total_steps,
|
| 42 |
+
"num_steps_target" => num_steps_target
|
| 43 |
+
)
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
mkpath(dirname(path))
|
| 47 |
+
open(path, "w") do f
|
| 48 |
+
JSON3.write(f, checkpoint)
|
| 49 |
+
end
|
| 50 |
+
vl_str = best_val_loss == Inf ? "Inf" : @sprintf("%.4f", best_val_loss)
|
| 51 |
+
println("Checkpoint saved: $path (step $step, best_val_loss=$vl_str)")
|
| 52 |
+
end
|
| 53 |
+
|
| 54 |
+
function load_checkpoint(path::String)
|
| 55 |
+
println("Loading checkpoint from $path ...")
|
| 56 |
+
raw = JSON3.read(read(path, String))
|
| 57 |
+
|
| 58 |
+
# Reconstruct character vocab
|
| 59 |
+
uchars = [only(String(s)) for s in raw["uchars"]]
|
| 60 |
+
BOS = length(uchars) + 1
|
| 61 |
+
vocab_size = BOS
|
| 62 |
+
|
| 63 |
+
# Hyperparameters
|
| 64 |
+
hp = raw["hyperparams"]
|
| 65 |
+
n_layer = Int(hp["n_layer"])
|
| 66 |
+
n_embd = Int(hp["n_embd"])
|
| 67 |
+
block_size = Int(hp["block_size"])
|
| 68 |
+
n_head = Int(hp["n_head"])
|
| 69 |
+
head_dim = n_embd Γ· n_head
|
| 70 |
+
|
| 71 |
+
# Reconstruct state_dict as Value objects
|
| 72 |
+
state_dict = Dict{String, Vector{Vector{Value}}}()
|
| 73 |
+
for (key, matrix) in pairs(raw["state_dict"])
|
| 74 |
+
state_dict[string(key)] = [[Value(Float64(v)) for v in row] for row in matrix]
|
| 75 |
+
end
|
| 76 |
+
|
| 77 |
+
# Optimizer state
|
| 78 |
+
opt = raw["optimizer"]
|
| 79 |
+
m_buf = Float64.(collect(opt["m_buf"]))
|
| 80 |
+
v_buf = Float64.(collect(opt["v_buf"]))
|
| 81 |
+
step = Int(opt["step"])
|
| 82 |
+
lr = Float64(opt["lr"])
|
| 83 |
+
Ξ²1 = Float64(opt["beta1"])
|
| 84 |
+
Ξ²2 = Float64(opt["beta2"])
|
| 85 |
+
|
| 86 |
+
# Training metadata
|
| 87 |
+
trn = raw["training"]
|
| 88 |
+
best_val_loss = Float64(trn["best_val_loss"])
|
| 89 |
+
train_losses = Float64.(collect(trn["train_losses"]))
|
| 90 |
+
val_losses = Float64.(collect(trn["val_losses"]))
|
| 91 |
+
total_steps = Int(trn["total_steps_completed"])
|
| 92 |
+
num_steps_target = Int(trn["num_steps_target"])
|
| 93 |
+
|
| 94 |
+
println(" vocab=$vocab_size, embd=$n_embd, layers=$n_layer, step=$step, best_val=$(round(best_val_loss, digits=4))")
|
| 95 |
+
|
| 96 |
+
return (;
|
| 97 |
+
state_dict, uchars, BOS, vocab_size,
|
| 98 |
+
n_layer, n_embd, block_size, n_head, head_dim,
|
| 99 |
+
m_buf, v_buf, step, lr, Ξ²1, Ξ²2,
|
| 100 |
+
best_val_loss, train_losses, val_losses,
|
| 101 |
+
total_steps, num_steps_target
|
| 102 |
+
)
|
| 103 |
+
end
|
checkpoints/best_model.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.jl
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#=
|
| 2 |
+
model.jl β Shared model code for MicroGPT
|
| 3 |
+
|
| 4 |
+
Contains: Value autograd type, neural net primitives, GPT forward pass,
|
| 5 |
+
and the philosophy quotes dataset. Included by all other scripts.
|
| 6 |
+
=#
|
| 7 |
+
|
| 8 |
+
using Random
|
| 9 |
+
using Printf
|
| 10 |
+
|
| 11 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
+
# Autograd engine β scalar Value type with automatic differentiation
|
| 13 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
+
|
| 15 |
+
mutable struct Value
|
| 16 |
+
data::Float64
|
| 17 |
+
grad::Float64
|
| 18 |
+
_children::Vector{Value}
|
| 19 |
+
_local_grads::Vector{Float64}
|
| 20 |
+
end
|
| 21 |
+
|
| 22 |
+
Value(x::Real) = Value(Float64(x), 0.0, Value[], Float64[])
|
| 23 |
+
|
| 24 |
+
# --- Core operations ---
|
| 25 |
+
|
| 26 |
+
function Base.:+(a::Value, b::Value)
|
| 27 |
+
Value(a.data + b.data, 0.0, [a, b], [1.0, 1.0])
|
| 28 |
+
end
|
| 29 |
+
|
| 30 |
+
function Base.:*(a::Value, b::Value)
|
| 31 |
+
Value(a.data * b.data, 0.0, [a, b], [b.data, a.data])
|
| 32 |
+
end
|
| 33 |
+
|
| 34 |
+
function Base.:-(a::Value, b::Value)
|
| 35 |
+
Value(a.data - b.data, 0.0, [a, b], [1.0, -1.0])
|
| 36 |
+
end
|
| 37 |
+
|
| 38 |
+
function Base.:/(a::Value, b::Value)
|
| 39 |
+
Value(a.data / b.data, 0.0, [a, b], [1.0 / b.data, -a.data / b.data^2])
|
| 40 |
+
end
|
| 41 |
+
|
| 42 |
+
function Base.:^(a::Value, n::Real)
|
| 43 |
+
Value(a.data^n, 0.0, [a], [n * a.data^(n - 1)])
|
| 44 |
+
end
|
| 45 |
+
|
| 46 |
+
function Base.log(a::Value)
|
| 47 |
+
Value(log(a.data), 0.0, [a], [1.0 / a.data])
|
| 48 |
+
end
|
| 49 |
+
|
| 50 |
+
function Base.exp(a::Value)
|
| 51 |
+
e = exp(a.data)
|
| 52 |
+
Value(e, 0.0, [a], [e])
|
| 53 |
+
end
|
| 54 |
+
|
| 55 |
+
function relu(a::Value)
|
| 56 |
+
Value(max(0.0, a.data), 0.0, [a], [Float64(a.data > 0)])
|
| 57 |
+
end
|
| 58 |
+
|
| 59 |
+
# --- Scalar promotion ---
|
| 60 |
+
|
| 61 |
+
Base.:+(a::Value, b::Real) = Value(a.data + b, 0.0, [a], [1.0])
|
| 62 |
+
Base.:+(a::Real, b::Value) = Value(a + b.data, 0.0, [b], [1.0])
|
| 63 |
+
Base.:*(a::Value, b::Real) = Value(a.data * b, 0.0, [a], [Float64(b)])
|
| 64 |
+
Base.:*(a::Real, b::Value) = Value(a * b.data, 0.0, [b], [Float64(a)])
|
| 65 |
+
Base.:-(a::Value, b::Real) = Value(a.data - b, 0.0, [a], [1.0])
|
| 66 |
+
Base.:-(a::Real, b::Value) = Value(a - b.data, 0.0, [b], [-1.0])
|
| 67 |
+
Base.:/(a::Value, b::Real) = Value(a.data / b, 0.0, [a], [1.0 / b])
|
| 68 |
+
Base.:/(a::Real, b::Value) = Value(a / b.data, 0.0, [b], [-a / b.data^2])
|
| 69 |
+
Base.:-(a::Value) = Value(-a.data, 0.0, [a], [-1.0])
|
| 70 |
+
|
| 71 |
+
Base.zero(::Type{Value}) = Value(0.0)
|
| 72 |
+
Base.isless(a::Value, b::Value) = a.data < b.data
|
| 73 |
+
|
| 74 |
+
# --- Backpropagation ---
|
| 75 |
+
|
| 76 |
+
function backward!(loss::Value)
|
| 77 |
+
topo = Value[]
|
| 78 |
+
visited = Set{UInt64}()
|
| 79 |
+
function build_topo(v)
|
| 80 |
+
id = objectid(v)
|
| 81 |
+
id in visited && return
|
| 82 |
+
push!(visited, id)
|
| 83 |
+
for child in v._children
|
| 84 |
+
build_topo(child)
|
| 85 |
+
end
|
| 86 |
+
push!(topo, v)
|
| 87 |
+
end
|
| 88 |
+
build_topo(loss)
|
| 89 |
+
loss.grad = 1.0
|
| 90 |
+
for v in reverse(topo)
|
| 91 |
+
for (child, lg) in zip(v._children, v._local_grads)
|
| 92 |
+
child.grad += lg * v.grad
|
| 93 |
+
end
|
| 94 |
+
end
|
| 95 |
+
end
|
| 96 |
+
|
| 97 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 98 |
+
# Neural network primitives
|
| 99 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 100 |
+
|
| 101 |
+
function linear(x::Vector{Value}, w::Vector{Vector{Value}})
|
| 102 |
+
[sum(wi * xi for (wi, xi) in zip(wo, x)) for wo in w]
|
| 103 |
+
end
|
| 104 |
+
|
| 105 |
+
function softmax_v(logits::Vector{Value})
|
| 106 |
+
max_val = maximum(v.data for v in logits)
|
| 107 |
+
exps = [exp(v - max_val) for v in logits]
|
| 108 |
+
total = sum(exps)
|
| 109 |
+
[e / total for e in exps]
|
| 110 |
+
end
|
| 111 |
+
|
| 112 |
+
function rmsnorm(x::Vector{Value})
|
| 113 |
+
ms = sum(xi * xi for xi in x) / length(x)
|
| 114 |
+
scale = (ms + 1e-5) ^ (-0.5)
|
| 115 |
+
[xi * scale for xi in x]
|
| 116 |
+
end
|
| 117 |
+
|
| 118 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 119 |
+
# GPT forward pass β one token at a time with KV cache
|
| 120 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
+
|
| 122 |
+
function gpt(token_id::Int, pos_id::Int,
|
| 123 |
+
keys::Vector{Vector{Vector{Value}}},
|
| 124 |
+
values::Vector{Vector{Vector{Value}}},
|
| 125 |
+
state_dict::Dict{String, Vector{Vector{Value}}},
|
| 126 |
+
n_layer::Int, n_head::Int, head_dim::Int)
|
| 127 |
+
|
| 128 |
+
tok_emb = state_dict["wte"][token_id]
|
| 129 |
+
pos_emb = state_dict["wpe"][pos_id]
|
| 130 |
+
x = [t + p for (t, p) in zip(tok_emb, pos_emb)]
|
| 131 |
+
x = rmsnorm(x)
|
| 132 |
+
|
| 133 |
+
for li in 0:n_layer-1
|
| 134 |
+
x_res = x
|
| 135 |
+
x = rmsnorm(x)
|
| 136 |
+
q = linear(x, state_dict["layer$(li).attn_wq"])
|
| 137 |
+
k = linear(x, state_dict["layer$(li).attn_wk"])
|
| 138 |
+
v = linear(x, state_dict["layer$(li).attn_wv"])
|
| 139 |
+
push!(keys[li+1], k)
|
| 140 |
+
push!(values[li+1], v)
|
| 141 |
+
|
| 142 |
+
x_attn = Value[]
|
| 143 |
+
for h in 0:n_head-1
|
| 144 |
+
hs = h * head_dim + 1
|
| 145 |
+
he = hs + head_dim - 1
|
| 146 |
+
q_h = q[hs:he]
|
| 147 |
+
k_h = [ki[hs:he] for ki in keys[li+1]]
|
| 148 |
+
v_h = [vi[hs:he] for vi in values[li+1]]
|
| 149 |
+
attn_logits = [sum(q_h[j] * k_h[t][j] for j in 1:head_dim) / sqrt(Float64(head_dim))
|
| 150 |
+
for t in 1:length(k_h)]
|
| 151 |
+
attn_weights = softmax_v(attn_logits)
|
| 152 |
+
head_out = [sum(attn_weights[t] * v_h[t][j] for t in 1:length(v_h))
|
| 153 |
+
for j in 1:head_dim]
|
| 154 |
+
append!(x_attn, head_out)
|
| 155 |
+
end
|
| 156 |
+
x = linear(x_attn, state_dict["layer$(li).attn_wo"])
|
| 157 |
+
x = [a + b for (a, b) in zip(x, x_res)]
|
| 158 |
+
|
| 159 |
+
x_res = x
|
| 160 |
+
x = rmsnorm(x)
|
| 161 |
+
x = linear(x, state_dict["layer$(li).mlp_fc1"])
|
| 162 |
+
x = [relu(xi) for xi in x]
|
| 163 |
+
x = linear(x, state_dict["layer$(li).mlp_fc2"])
|
| 164 |
+
x = [a + b for (a, b) in zip(x, x_res)]
|
| 165 |
+
end
|
| 166 |
+
|
| 167 |
+
logits = linear(x, state_dict["lm_head"])
|
| 168 |
+
return logits
|
| 169 |
+
end
|
| 170 |
+
|
| 171 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 172 |
+
# Dataset β philosophy quotes (all ancient/classical, public domain)
|
| 173 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 174 |
+
|
| 175 |
+
const PHILOSOPHY_QUOTES = [
|
| 176 |
+
"waste no more time arguing about what a good man should be",
|
| 177 |
+
"the happiness of your life depends upon the quality of your thoughts",
|
| 178 |
+
"you have power over your mind not outside events",
|
| 179 |
+
"very little is needed to make a happy life",
|
| 180 |
+
"the soul becomes dyed with the color of its thoughts",
|
| 181 |
+
"when you arise in the morning think of what a privilege it is to be alive",
|
| 182 |
+
"the best revenge is to be unlike him who performed the injury",
|
| 183 |
+
"accept the things to which fate binds you",
|
| 184 |
+
"if it is not right do not do it if it is not true do not say it",
|
| 185 |
+
"look well into thyself there is a source of strength",
|
| 186 |
+
"do every act of your life as though it were the very last act of your life",
|
| 187 |
+
"it is not death that a man should fear but never beginning to live",
|
| 188 |
+
"we suffer more often in imagination than in reality",
|
| 189 |
+
"true happiness is to enjoy the present without anxious dependence upon the future",
|
| 190 |
+
"it is not because things are difficult that we do not dare",
|
| 191 |
+
"it is because we do not dare that they are difficult",
|
| 192 |
+
"luck is what happens when preparation meets opportunity",
|
| 193 |
+
"begin at once to live and count each separate day as a separate life",
|
| 194 |
+
"the whole future lies in uncertainty live immediately",
|
| 195 |
+
"sometimes even to live is an act of courage",
|
| 196 |
+
"if a man knows not which port he sails no wind is favorable",
|
| 197 |
+
"he who is brave is free",
|
| 198 |
+
"difficulties strengthen the mind as labor does the body",
|
| 199 |
+
"first say to yourself what you would be and then do what you have to do",
|
| 200 |
+
"no man is free who is not master of himself",
|
| 201 |
+
"only the educated are free",
|
| 202 |
+
"man is not worried by real problems so much as by his imagined anxieties",
|
| 203 |
+
"wealth consists not in having great possessions but in having few wants",
|
| 204 |
+
"make the mind tougher by exposing it to adversity",
|
| 205 |
+
"the mind that is anxious about future events is miserable",
|
| 206 |
+
"the unexamined life is not worth living",
|
| 207 |
+
"i know that i know nothing",
|
| 208 |
+
"be kind for everyone you meet is fighting a hard battle",
|
| 209 |
+
"the only true wisdom is in knowing you know nothing",
|
| 210 |
+
"wonder is the beginning of wisdom",
|
| 211 |
+
"education is the kindling of a flame not the filling of a vessel",
|
| 212 |
+
"strong minds discuss ideas average minds discuss events weak minds discuss people",
|
| 213 |
+
"the secret of change is to focus all of your energy on building the new",
|
| 214 |
+
"no man ever steps in the same river twice",
|
| 215 |
+
"character is fate",
|
| 216 |
+
"the only constant in life is change",
|
| 217 |
+
"much learning does not teach understanding",
|
| 218 |
+
"knowing yourself is the beginning of all wisdom",
|
| 219 |
+
"happiness depends upon ourselves",
|
| 220 |
+
"it is the mark of an educated mind to entertain a thought without accepting it",
|
| 221 |
+
"the more you know the more you realize you do not know",
|
| 222 |
+
"patience is bitter but its fruit is sweet",
|
| 223 |
+
"we are what we repeatedly do excellence then is not an act but a habit",
|
| 224 |
+
"courage is the first of human qualities because it guarantees the others",
|
| 225 |
+
"quality is not an act it is a habit",
|
| 226 |
+
"pleasure in the job puts perfection in the work",
|
| 227 |
+
"the whole is greater than the sum of its parts",
|
| 228 |
+
"nature does nothing in vain",
|
| 229 |
+
"what is honored in a country is cultivated there",
|
| 230 |
+
"the roots of education are bitter but the fruit is sweet",
|
| 231 |
+
"to perceive is to suffer",
|
| 232 |
+
"the journey of a thousand miles begins with a single step",
|
| 233 |
+
"knowing others is intelligence knowing yourself is true wisdom",
|
| 234 |
+
"mastering others is strength mastering yourself is true power",
|
| 235 |
+
"when i let go of what i am i become what i might be",
|
| 236 |
+
"nature does not hurry yet everything is accomplished",
|
| 237 |
+
"silence is a source of great strength",
|
| 238 |
+
"a good traveler has no fixed plans and is not intent on arriving",
|
| 239 |
+
"be content with what you have rejoice in the way things are",
|
| 240 |
+
"he who conquers himself is the mightiest warrior",
|
| 241 |
+
"real knowledge is to know the extent of ones ignorance",
|
| 242 |
+
"it does not matter how slowly you go so long as you do not stop",
|
| 243 |
+
"our greatest glory is not in never falling but in rising every time we fall",
|
| 244 |
+
"before you embark on a journey of revenge dig two graves",
|
| 245 |
+
"the man who moves a mountain begins by carrying away small stones",
|
| 246 |
+
"to see what is right and not do it is a want of courage",
|
| 247 |
+
"study the past if you would define the future",
|
| 248 |
+
"wherever you go go with all your heart",
|
| 249 |
+
"to be wronged is nothing unless you continue to remember it",
|
| 250 |
+
"the wise man is one who knows what he does not know",
|
| 251 |
+
"he who learns but does not think is lost",
|
| 252 |
+
"i think therefore i am",
|
| 253 |
+
"the heart has its reasons which reason knows nothing of",
|
| 254 |
+
"all of mans misfortune comes from not knowing how to sit quietly in a room",
|
| 255 |
+
"the more i read the more certain i am that i know nothing",
|
| 256 |
+
"there is nothing either good or bad but thinking makes it so",
|
| 257 |
+
"one cannot step twice in the same river",
|
| 258 |
+
"man is born free and everywhere he is in chains",
|
| 259 |
+
"life must be understood backward but it must be lived forward",
|
| 260 |
+
"anxiety is the dizziness of freedom",
|
| 261 |
+
"you will never be happy if you continue to search for what happiness consists of",
|
| 262 |
+
"act only according to that maxim which you can will to become universal law",
|
| 263 |
+
"he who thinks great thoughts often makes great errors",
|
| 264 |
+
"to live is to suffer to survive is to find some meaning in the suffering",
|
| 265 |
+
"without music life would be a mistake",
|
| 266 |
+
"he who has a why to live for can bear almost any how",
|
| 267 |
+
"that which does not kill us makes us stronger",
|
| 268 |
+
"there are no facts only interpretations",
|
| 269 |
+
"you must have chaos within you to give birth to a dancing star",
|
| 270 |
+
"whoever fights monsters should see to it that he does not become a monster",
|
| 271 |
+
"when you gaze long into an abyss the abyss also gazes into you",
|
| 272 |
+
"the individual has always had to struggle to keep from being overwhelmed by the tribe",
|
| 273 |
+
"there is always some madness in love but there is also always some reason in madness",
|
| 274 |
+
"the snake which cannot cast its skin has to die",
|
| 275 |
+
"in the middle of difficulty lies opportunity",
|
| 276 |
+
"the mind is everything what you think you become",
|
| 277 |
+
"peace comes from within do not seek it without",
|
| 278 |
+
"all that we are is the result of what we have thought",
|
| 279 |
+
"three things cannot be long hidden the sun the moon and the truth",
|
| 280 |
+
"the only way to do great work is to love what you do",
|
| 281 |
+
"virtue is not given by money but from virtue comes money",
|
| 282 |
+
"the measure of a man is what he does with power",
|
| 283 |
+
"no great mind has ever existed without a touch of madness",
|
| 284 |
+
"the energy of the mind is the essence of life",
|
| 285 |
+
"those who know do not speak those who speak do not know",
|
| 286 |
+
"the flame that burns twice as bright burns half as long",
|
| 287 |
+
"what we achieve inwardly will change outer reality",
|
| 288 |
+
"the only thing i know is that i know nothing and i am not quite sure that i know that",
|
| 289 |
+
"everything has beauty but not everyone sees it",
|
| 290 |
+
"the greatest wealth is to live content with little",
|
| 291 |
+
"it is during our darkest moments that we must focus to see the light",
|
| 292 |
+
"where there is love there is life",
|
| 293 |
+
"the mind is not a vessel to be filled but a fire to be kindled",
|
| 294 |
+
]
|
| 295 |
+
|
| 296 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 297 |
+
# Helpers used by training and inference scripts
|
| 298 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 299 |
+
|
| 300 |
+
# Deterministic parameter key ordering (must match across all scripts)
|
| 301 |
+
function get_param_keys(n_layer::Int)
|
| 302 |
+
keys = ["wte", "wpe", "lm_head"]
|
| 303 |
+
for i in 0:n_layer-1
|
| 304 |
+
append!(keys, [
|
| 305 |
+
"layer$i.attn_wq", "layer$i.attn_wk", "layer$i.attn_wv", "layer$i.attn_wo",
|
| 306 |
+
"layer$i.mlp_fc1", "layer$i.mlp_fc2"
|
| 307 |
+
])
|
| 308 |
+
end
|
| 309 |
+
return keys
|
| 310 |
+
end
|
| 311 |
+
|
| 312 |
+
# Initialize weight matrices
|
| 313 |
+
function init_matrix(nout::Int, nin::Int; std=0.08)
|
| 314 |
+
[[Value(randn() * std) for _ in 1:nin] for _ in 1:nout]
|
| 315 |
+
end
|
| 316 |
+
|
| 317 |
+
# Flatten state_dict into a single params vector (deterministic order)
|
| 318 |
+
function flatten_params(state_dict, param_keys)
|
| 319 |
+
params = Value[]
|
| 320 |
+
for key in param_keys
|
| 321 |
+
for row in state_dict[key]
|
| 322 |
+
append!(params, row)
|
| 323 |
+
end
|
| 324 |
+
end
|
| 325 |
+
return params
|
| 326 |
+
end
|
| 327 |
+
|
| 328 |
+
# Generate text from trained model
|
| 329 |
+
function generate(state_dict, uchars, BOS, n_layer, n_head, head_dim, block_size;
|
| 330 |
+
temperature=0.8, max_tokens=128)
|
| 331 |
+
kv_keys = [Vector{Vector{Value}}() for _ in 1:n_layer]
|
| 332 |
+
kv_vals = [Vector{Vector{Value}}() for _ in 1:n_layer]
|
| 333 |
+
token_id = BOS
|
| 334 |
+
sample = Char[]
|
| 335 |
+
limit = min(max_tokens, block_size)
|
| 336 |
+
for pos in 1:limit
|
| 337 |
+
logits = gpt(token_id, pos, kv_keys, kv_vals, state_dict, n_layer, n_head, head_dim)
|
| 338 |
+
scaled = [l / temperature for l in logits]
|
| 339 |
+
probs = softmax_v(scaled)
|
| 340 |
+
weights = [p.data for p in probs]
|
| 341 |
+
r = rand()
|
| 342 |
+
cum = 0.0
|
| 343 |
+
token_id = 1
|
| 344 |
+
for (idx, w) in enumerate(weights)
|
| 345 |
+
cum += w
|
| 346 |
+
if r <= cum
|
| 347 |
+
token_id = idx
|
| 348 |
+
break
|
| 349 |
+
end
|
| 350 |
+
end
|
| 351 |
+
token_id == BOS && break
|
| 352 |
+
push!(sample, uchars[token_id])
|
| 353 |
+
end
|
| 354 |
+
return String(sample)
|
| 355 |
+
end
|
server.jl
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#=
|
| 2 |
+
server.jl β OpenAI/OpenRouter-compatible inference server for MicroGPT
|
| 3 |
+
|
| 4 |
+
Endpoints:
|
| 5 |
+
GET / β health check / API info
|
| 6 |
+
GET /v1/models β list available models
|
| 7 |
+
POST /v1/chat/completions β generate philosophy text (OpenAI format)
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
julia --project=. server.jl
|
| 11 |
+
WEIGHTS_PATH=checkpoints/checkpoint_step400.json julia --project=. server.jl
|
| 12 |
+
PORT=8080 julia --project=. server.jl
|
| 13 |
+
=#
|
| 14 |
+
|
| 15 |
+
include("checkpoint.jl")
|
| 16 |
+
using HTTP
|
| 17 |
+
using UUIDs
|
| 18 |
+
using Sockets
|
| 19 |
+
|
| 20 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
# Load model at startup
|
| 22 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
+
|
| 24 |
+
const WEIGHTS_PATH = get(ENV, "WEIGHTS_PATH", "checkpoints/best_model.json")
|
| 25 |
+
const PORT = parse(Int, get(ENV, "PORT", "7860"))
|
| 26 |
+
|
| 27 |
+
const ckpt = load_checkpoint(WEIGHTS_PATH)
|
| 28 |
+
const STATE_DICT = ckpt.state_dict
|
| 29 |
+
const UCHARS = ckpt.uchars
|
| 30 |
+
const BOS_TOKEN = ckpt.BOS
|
| 31 |
+
const N_LAYER = ckpt.n_layer
|
| 32 |
+
const N_EMBD = ckpt.n_embd
|
| 33 |
+
const BLOCK_SIZE = ckpt.block_size
|
| 34 |
+
const N_HEAD = ckpt.n_head
|
| 35 |
+
const HEAD_DIM = ckpt.head_dim
|
| 36 |
+
const MODEL_CREATED_AT = Int(floor(time()))
|
| 37 |
+
|
| 38 |
+
println("Model ready: vocab=$(ckpt.vocab_size), embd=$N_EMBD, layers=$N_LAYER")
|
| 39 |
+
|
| 40 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
+
# API handlers
|
| 42 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 43 |
+
|
| 44 |
+
function json_response(status::Int, body)
|
| 45 |
+
HTTP.Response(status,
|
| 46 |
+
["Content-Type" => "application/json",
|
| 47 |
+
"Access-Control-Allow-Origin" => "*",
|
| 48 |
+
"Access-Control-Allow-Methods" => "GET, POST, OPTIONS",
|
| 49 |
+
"Access-Control-Allow-Headers" => "Content-Type, Authorization"],
|
| 50 |
+
JSON3.write(body))
|
| 51 |
+
end
|
| 52 |
+
|
| 53 |
+
function handle_root(req::HTTP.Request)
|
| 54 |
+
json_response(200, Dict(
|
| 55 |
+
"name" => "MicroGPT Philosophy",
|
| 56 |
+
"version" => "1.0.0",
|
| 57 |
+
"description" => "A ~5K parameter character-level GPT trained on philosophy quotes",
|
| 58 |
+
"endpoints" => ["/v1/models", "/v1/chat/completions"],
|
| 59 |
+
"compatible_with" => ["OpenAI API", "OpenRouter"]
|
| 60 |
+
))
|
| 61 |
+
end
|
| 62 |
+
|
| 63 |
+
function handle_models(req::HTTP.Request)
|
| 64 |
+
json_response(200, Dict(
|
| 65 |
+
"object" => "list",
|
| 66 |
+
"data" => [
|
| 67 |
+
Dict(
|
| 68 |
+
"id" => "microgpt-philosophy",
|
| 69 |
+
"object" => "model",
|
| 70 |
+
"created" => MODEL_CREATED_AT,
|
| 71 |
+
"owned_by" => "microgpt"
|
| 72 |
+
)
|
| 73 |
+
]
|
| 74 |
+
))
|
| 75 |
+
end
|
| 76 |
+
|
| 77 |
+
function handle_chat_completions(req::HTTP.Request)
|
| 78 |
+
local body
|
| 79 |
+
try
|
| 80 |
+
body = JSON3.read(String(req.body))
|
| 81 |
+
catch e
|
| 82 |
+
return json_response(400, Dict(
|
| 83 |
+
"error" => Dict(
|
| 84 |
+
"message" => "Invalid JSON in request body",
|
| 85 |
+
"type" => "invalid_request_error",
|
| 86 |
+
"code" => "invalid_json"
|
| 87 |
+
)
|
| 88 |
+
))
|
| 89 |
+
end
|
| 90 |
+
|
| 91 |
+
# Extract parameters with defaults
|
| 92 |
+
temperature = Float64(get(body, :temperature, 0.8))
|
| 93 |
+
max_tokens = Int(get(body, :max_tokens, 128))
|
| 94 |
+
n_completions = Int(get(body, :n, 1))
|
| 95 |
+
|
| 96 |
+
# Clamp temperature to valid range
|
| 97 |
+
temperature = clamp(temperature, 0.01, 2.0)
|
| 98 |
+
max_tokens = clamp(max_tokens, 1, BLOCK_SIZE)
|
| 99 |
+
|
| 100 |
+
# Extract prompt text for token counting
|
| 101 |
+
messages = get(body, :messages, [])
|
| 102 |
+
prompt_text = ""
|
| 103 |
+
if !isempty(messages)
|
| 104 |
+
last_msg = messages[end]
|
| 105 |
+
prompt_text = string(get(last_msg, :content, ""))
|
| 106 |
+
end
|
| 107 |
+
|
| 108 |
+
# Generate completions
|
| 109 |
+
choices = []
|
| 110 |
+
total_completion_tokens = 0
|
| 111 |
+
for i in 1:n_completions
|
| 112 |
+
text = generate(STATE_DICT, UCHARS, BOS_TOKEN, N_LAYER, N_HEAD, HEAD_DIM, BLOCK_SIZE;
|
| 113 |
+
temperature=temperature, max_tokens=max_tokens)
|
| 114 |
+
finish_reason = length(text) >= max_tokens ? "length" : "stop"
|
| 115 |
+
push!(choices, Dict(
|
| 116 |
+
"index" => i - 1,
|
| 117 |
+
"message" => Dict(
|
| 118 |
+
"role" => "assistant",
|
| 119 |
+
"content" => text
|
| 120 |
+
),
|
| 121 |
+
"finish_reason" => finish_reason
|
| 122 |
+
))
|
| 123 |
+
total_completion_tokens += length(text)
|
| 124 |
+
end
|
| 125 |
+
|
| 126 |
+
prompt_tokens = length(prompt_text)
|
| 127 |
+
completion_id = "chatcmpl-" * string(uuid4())
|
| 128 |
+
|
| 129 |
+
json_response(200, Dict(
|
| 130 |
+
"id" => completion_id,
|
| 131 |
+
"object" => "chat.completion",
|
| 132 |
+
"created" => Int(floor(time())),
|
| 133 |
+
"model" => "microgpt-philosophy",
|
| 134 |
+
"choices" => choices,
|
| 135 |
+
"usage" => Dict(
|
| 136 |
+
"prompt_tokens" => prompt_tokens,
|
| 137 |
+
"completion_tokens" => total_completion_tokens,
|
| 138 |
+
"total_tokens" => prompt_tokens + total_completion_tokens
|
| 139 |
+
),
|
| 140 |
+
"system_fingerprint" => "microgpt-philosophy-v1"
|
| 141 |
+
))
|
| 142 |
+
end
|
| 143 |
+
|
| 144 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 145 |
+
# Router + CORS
|
| 146 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 147 |
+
|
| 148 |
+
function cors_preflight(req::HTTP.Request)
|
| 149 |
+
HTTP.Response(204,
|
| 150 |
+
["Access-Control-Allow-Origin" => "*",
|
| 151 |
+
"Access-Control-Allow-Methods" => "GET, POST, OPTIONS",
|
| 152 |
+
"Access-Control-Allow-Headers" => "Content-Type, Authorization"])
|
| 153 |
+
end
|
| 154 |
+
|
| 155 |
+
const ROUTER = HTTP.Router()
|
| 156 |
+
HTTP.register!(ROUTER, "GET", "/", handle_root)
|
| 157 |
+
HTTP.register!(ROUTER, "GET", "/v1/models", handle_models)
|
| 158 |
+
HTTP.register!(ROUTER, "POST", "/v1/chat/completions", handle_chat_completions)
|
| 159 |
+
HTTP.register!(ROUTER, "OPTIONS", "/v1/chat/completions", cors_preflight)
|
| 160 |
+
HTTP.register!(ROUTER, "OPTIONS", "/v1/models", cors_preflight)
|
| 161 |
+
|
| 162 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 163 |
+
# Start server
|
| 164 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 165 |
+
|
| 166 |
+
println("\nMicroGPT server starting on 0.0.0.0:$PORT ...")
|
| 167 |
+
println(" GET http://localhost:$PORT/")
|
| 168 |
+
println(" GET http://localhost:$PORT/v1/models")
|
| 169 |
+
println(" POST http://localhost:$PORT/v1/chat/completions")
|
| 170 |
+
println()
|
| 171 |
+
|
| 172 |
+
HTTP.serve(ROUTER, "0.0.0.0", PORT)
|