LisaMegaWatts Claude Opus 4.6 commited on
Commit
c275980
Β·
1 Parent(s): 643b9ff

Add MicroGPT Julia inference server

Browse files

Docker-based OpenAI-compatible API serving a ~5K param
character-level GPT trained on philosophy quotes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (7) hide show
  1. Dockerfile +29 -0
  2. Project.toml +3 -0
  3. README.md +32 -1
  4. checkpoint.jl +103 -0
  5. checkpoints/best_model.json +0 -0
  6. model.jl +355 -0
  7. server.jl +172 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM julia:1.10-bookworm
2
+
3
+ # HuggingFace Spaces requires user ID 1000
4
+ RUN useradd -m -u 1000 user
5
+
6
+ # Install Julia packages (cached layer)
7
+ WORKDIR /app
8
+ COPY Project.toml /app/
9
+ RUN julia --project=/app -e 'using Pkg; Pkg.instantiate(); Pkg.precompile()'
10
+
11
+ # Precompile server deps for faster cold start
12
+ RUN julia --project=/app -e 'using HTTP, JSON3; println("Precompile done")'
13
+
14
+ # Switch to non-root user
15
+ USER user
16
+ ENV HOME=/home/user
17
+ WORKDIR /home/user/app
18
+
19
+ # Copy application code
20
+ COPY --chown=user model.jl /home/user/app/
21
+ COPY --chown=user checkpoint.jl /home/user/app/
22
+ COPY --chown=user server.jl /home/user/app/
23
+ COPY --chown=user Project.toml /home/user/app/
24
+ COPY --chown=user checkpoints/ /home/user/app/checkpoints/
25
+
26
+ # Default port for HuggingFace Spaces (override with PORT env var)
27
+ EXPOSE 7860
28
+
29
+ CMD ["julia", "--project=/app", "/home/user/app/server.jl"]
Project.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [deps]
2
+ HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
3
+ JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
README.md CHANGED
@@ -7,6 +7,37 @@ sdk: docker
7
  pinned: false
8
  license: mit
9
  short_description: MicroGPT implementation in Julia
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  pinned: false
8
  license: mit
9
  short_description: MicroGPT implementation in Julia
10
+ app_port: 7860
11
  ---
12
 
13
+ # MicroJulia
14
+
15
+ A ~5K parameter character-level GPT written from scratch in Julia β€” no ML frameworks, just pure scalar autograd.
16
+
17
+ Trained on philosophy quotes (Marcus Aurelius, Seneca, Socrates, Buddha, Confucius, Nietzsche, etc.)
18
+
19
+ ## API
20
+
21
+ OpenAI-compatible inference endpoint:
22
+
23
+ ```bash
24
+ curl -X POST https://lisamegawatts-microjulia.hf.space/v1/chat/completions \
25
+ -H "Content-Type: application/json" \
26
+ -d '{"messages":[{"role":"user","content":"The purpose of"}],"temperature":0.8,"max_tokens":128}'
27
+ ```
28
+
29
+ ### Endpoints
30
+
31
+ | Method | Path | Description |
32
+ |--------|------|-------------|
33
+ | GET | `/` | Health check |
34
+ | GET | `/v1/models` | List models |
35
+ | POST | `/v1/chat/completions` | Generate text |
36
+
37
+ ## Architecture
38
+
39
+ - 1 transformer layer, 16-dim embeddings, 4 attention heads
40
+ - Custom scalar autograd engine (`Value` type)
41
+ - Character-level tokenizer (no BPE)
42
+ - KV cache for efficient inference
43
+ - ~5,000 parameters
checkpoint.jl ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #=
2
+ checkpoint.jl β€” Save/load checkpoint functions for MicroGPT
3
+
4
+ Checkpoint format: JSON with weights, optimizer state, and training metadata.
5
+ Used by train.jl, resume_train.jl, and server.jl.
6
+ =#
7
+
8
+ include("model.jl")
9
+ using JSON3
10
+
11
+ function save_checkpoint(path::String, state_dict, param_keys, uchars, hyperparams;
12
+ m_buf=nothing, v_buf=nothing, step::Int=0,
13
+ lr::Float64=0.01, Ξ²1::Float64=0.85, Ξ²2::Float64=0.99,
14
+ best_val_loss::Float64=Inf,
15
+ train_losses::Vector{Float64}=Float64[],
16
+ val_losses::Vector{Float64}=Float64[],
17
+ total_steps::Int=0, num_steps_target::Int=0)
18
+
19
+ # Extract .data from Value objects
20
+ sd_data = Dict{String,Any}()
21
+ for k in param_keys
22
+ sd_data[k] = [[v.data for v in row] for row in state_dict[k]]
23
+ end
24
+
25
+ checkpoint = Dict{String,Any}(
26
+ "uchars" => [string(c) for c in uchars],
27
+ "hyperparams" => hyperparams,
28
+ "state_dict" => sd_data,
29
+ "optimizer" => Dict{String,Any}(
30
+ "m_buf" => m_buf === nothing ? Float64[] : collect(m_buf),
31
+ "v_buf" => v_buf === nothing ? Float64[] : collect(v_buf),
32
+ "step" => step,
33
+ "lr" => lr,
34
+ "beta1" => Ξ²1,
35
+ "beta2" => Ξ²2
36
+ ),
37
+ "training" => Dict{String,Any}(
38
+ "best_val_loss" => best_val_loss,
39
+ "train_losses" => train_losses,
40
+ "val_losses" => val_losses,
41
+ "total_steps_completed" => total_steps,
42
+ "num_steps_target" => num_steps_target
43
+ )
44
+ )
45
+
46
+ mkpath(dirname(path))
47
+ open(path, "w") do f
48
+ JSON3.write(f, checkpoint)
49
+ end
50
+ vl_str = best_val_loss == Inf ? "Inf" : @sprintf("%.4f", best_val_loss)
51
+ println("Checkpoint saved: $path (step $step, best_val_loss=$vl_str)")
52
+ end
53
+
54
+ function load_checkpoint(path::String)
55
+ println("Loading checkpoint from $path ...")
56
+ raw = JSON3.read(read(path, String))
57
+
58
+ # Reconstruct character vocab
59
+ uchars = [only(String(s)) for s in raw["uchars"]]
60
+ BOS = length(uchars) + 1
61
+ vocab_size = BOS
62
+
63
+ # Hyperparameters
64
+ hp = raw["hyperparams"]
65
+ n_layer = Int(hp["n_layer"])
66
+ n_embd = Int(hp["n_embd"])
67
+ block_size = Int(hp["block_size"])
68
+ n_head = Int(hp["n_head"])
69
+ head_dim = n_embd Γ· n_head
70
+
71
+ # Reconstruct state_dict as Value objects
72
+ state_dict = Dict{String, Vector{Vector{Value}}}()
73
+ for (key, matrix) in pairs(raw["state_dict"])
74
+ state_dict[string(key)] = [[Value(Float64(v)) for v in row] for row in matrix]
75
+ end
76
+
77
+ # Optimizer state
78
+ opt = raw["optimizer"]
79
+ m_buf = Float64.(collect(opt["m_buf"]))
80
+ v_buf = Float64.(collect(opt["v_buf"]))
81
+ step = Int(opt["step"])
82
+ lr = Float64(opt["lr"])
83
+ Ξ²1 = Float64(opt["beta1"])
84
+ Ξ²2 = Float64(opt["beta2"])
85
+
86
+ # Training metadata
87
+ trn = raw["training"]
88
+ best_val_loss = Float64(trn["best_val_loss"])
89
+ train_losses = Float64.(collect(trn["train_losses"]))
90
+ val_losses = Float64.(collect(trn["val_losses"]))
91
+ total_steps = Int(trn["total_steps_completed"])
92
+ num_steps_target = Int(trn["num_steps_target"])
93
+
94
+ println(" vocab=$vocab_size, embd=$n_embd, layers=$n_layer, step=$step, best_val=$(round(best_val_loss, digits=4))")
95
+
96
+ return (;
97
+ state_dict, uchars, BOS, vocab_size,
98
+ n_layer, n_embd, block_size, n_head, head_dim,
99
+ m_buf, v_buf, step, lr, Ξ²1, Ξ²2,
100
+ best_val_loss, train_losses, val_losses,
101
+ total_steps, num_steps_target
102
+ )
103
+ end
checkpoints/best_model.json ADDED
The diff for this file is too large to render. See raw diff
 
model.jl ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #=
2
+ model.jl β€” Shared model code for MicroGPT
3
+
4
+ Contains: Value autograd type, neural net primitives, GPT forward pass,
5
+ and the philosophy quotes dataset. Included by all other scripts.
6
+ =#
7
+
8
+ using Random
9
+ using Printf
10
+
11
+ # ═══════════════════════════════════════════════════════════════════════════════
12
+ # Autograd engine β€” scalar Value type with automatic differentiation
13
+ # ═══════════════════════════════════════════════════════════════════════════════
14
+
15
+ mutable struct Value
16
+ data::Float64
17
+ grad::Float64
18
+ _children::Vector{Value}
19
+ _local_grads::Vector{Float64}
20
+ end
21
+
22
+ Value(x::Real) = Value(Float64(x), 0.0, Value[], Float64[])
23
+
24
+ # --- Core operations ---
25
+
26
+ function Base.:+(a::Value, b::Value)
27
+ Value(a.data + b.data, 0.0, [a, b], [1.0, 1.0])
28
+ end
29
+
30
+ function Base.:*(a::Value, b::Value)
31
+ Value(a.data * b.data, 0.0, [a, b], [b.data, a.data])
32
+ end
33
+
34
+ function Base.:-(a::Value, b::Value)
35
+ Value(a.data - b.data, 0.0, [a, b], [1.0, -1.0])
36
+ end
37
+
38
+ function Base.:/(a::Value, b::Value)
39
+ Value(a.data / b.data, 0.0, [a, b], [1.0 / b.data, -a.data / b.data^2])
40
+ end
41
+
42
+ function Base.:^(a::Value, n::Real)
43
+ Value(a.data^n, 0.0, [a], [n * a.data^(n - 1)])
44
+ end
45
+
46
+ function Base.log(a::Value)
47
+ Value(log(a.data), 0.0, [a], [1.0 / a.data])
48
+ end
49
+
50
+ function Base.exp(a::Value)
51
+ e = exp(a.data)
52
+ Value(e, 0.0, [a], [e])
53
+ end
54
+
55
+ function relu(a::Value)
56
+ Value(max(0.0, a.data), 0.0, [a], [Float64(a.data > 0)])
57
+ end
58
+
59
+ # --- Scalar promotion ---
60
+
61
+ Base.:+(a::Value, b::Real) = Value(a.data + b, 0.0, [a], [1.0])
62
+ Base.:+(a::Real, b::Value) = Value(a + b.data, 0.0, [b], [1.0])
63
+ Base.:*(a::Value, b::Real) = Value(a.data * b, 0.0, [a], [Float64(b)])
64
+ Base.:*(a::Real, b::Value) = Value(a * b.data, 0.0, [b], [Float64(a)])
65
+ Base.:-(a::Value, b::Real) = Value(a.data - b, 0.0, [a], [1.0])
66
+ Base.:-(a::Real, b::Value) = Value(a - b.data, 0.0, [b], [-1.0])
67
+ Base.:/(a::Value, b::Real) = Value(a.data / b, 0.0, [a], [1.0 / b])
68
+ Base.:/(a::Real, b::Value) = Value(a / b.data, 0.0, [b], [-a / b.data^2])
69
+ Base.:-(a::Value) = Value(-a.data, 0.0, [a], [-1.0])
70
+
71
+ Base.zero(::Type{Value}) = Value(0.0)
72
+ Base.isless(a::Value, b::Value) = a.data < b.data
73
+
74
+ # --- Backpropagation ---
75
+
76
+ function backward!(loss::Value)
77
+ topo = Value[]
78
+ visited = Set{UInt64}()
79
+ function build_topo(v)
80
+ id = objectid(v)
81
+ id in visited && return
82
+ push!(visited, id)
83
+ for child in v._children
84
+ build_topo(child)
85
+ end
86
+ push!(topo, v)
87
+ end
88
+ build_topo(loss)
89
+ loss.grad = 1.0
90
+ for v in reverse(topo)
91
+ for (child, lg) in zip(v._children, v._local_grads)
92
+ child.grad += lg * v.grad
93
+ end
94
+ end
95
+ end
96
+
97
+ # ═══════════════════════════════════════════════════════════════════════════════
98
+ # Neural network primitives
99
+ # ═══════════════════════════════════════════════════════════════════════════════
100
+
101
+ function linear(x::Vector{Value}, w::Vector{Vector{Value}})
102
+ [sum(wi * xi for (wi, xi) in zip(wo, x)) for wo in w]
103
+ end
104
+
105
+ function softmax_v(logits::Vector{Value})
106
+ max_val = maximum(v.data for v in logits)
107
+ exps = [exp(v - max_val) for v in logits]
108
+ total = sum(exps)
109
+ [e / total for e in exps]
110
+ end
111
+
112
+ function rmsnorm(x::Vector{Value})
113
+ ms = sum(xi * xi for xi in x) / length(x)
114
+ scale = (ms + 1e-5) ^ (-0.5)
115
+ [xi * scale for xi in x]
116
+ end
117
+
118
+ # ═══════════════════════════════════════════════════════════════════════════════
119
+ # GPT forward pass β€” one token at a time with KV cache
120
+ # ═══════════════════════════════════════════════════════════════════════════════
121
+
122
+ function gpt(token_id::Int, pos_id::Int,
123
+ keys::Vector{Vector{Vector{Value}}},
124
+ values::Vector{Vector{Vector{Value}}},
125
+ state_dict::Dict{String, Vector{Vector{Value}}},
126
+ n_layer::Int, n_head::Int, head_dim::Int)
127
+
128
+ tok_emb = state_dict["wte"][token_id]
129
+ pos_emb = state_dict["wpe"][pos_id]
130
+ x = [t + p for (t, p) in zip(tok_emb, pos_emb)]
131
+ x = rmsnorm(x)
132
+
133
+ for li in 0:n_layer-1
134
+ x_res = x
135
+ x = rmsnorm(x)
136
+ q = linear(x, state_dict["layer$(li).attn_wq"])
137
+ k = linear(x, state_dict["layer$(li).attn_wk"])
138
+ v = linear(x, state_dict["layer$(li).attn_wv"])
139
+ push!(keys[li+1], k)
140
+ push!(values[li+1], v)
141
+
142
+ x_attn = Value[]
143
+ for h in 0:n_head-1
144
+ hs = h * head_dim + 1
145
+ he = hs + head_dim - 1
146
+ q_h = q[hs:he]
147
+ k_h = [ki[hs:he] for ki in keys[li+1]]
148
+ v_h = [vi[hs:he] for vi in values[li+1]]
149
+ attn_logits = [sum(q_h[j] * k_h[t][j] for j in 1:head_dim) / sqrt(Float64(head_dim))
150
+ for t in 1:length(k_h)]
151
+ attn_weights = softmax_v(attn_logits)
152
+ head_out = [sum(attn_weights[t] * v_h[t][j] for t in 1:length(v_h))
153
+ for j in 1:head_dim]
154
+ append!(x_attn, head_out)
155
+ end
156
+ x = linear(x_attn, state_dict["layer$(li).attn_wo"])
157
+ x = [a + b for (a, b) in zip(x, x_res)]
158
+
159
+ x_res = x
160
+ x = rmsnorm(x)
161
+ x = linear(x, state_dict["layer$(li).mlp_fc1"])
162
+ x = [relu(xi) for xi in x]
163
+ x = linear(x, state_dict["layer$(li).mlp_fc2"])
164
+ x = [a + b for (a, b) in zip(x, x_res)]
165
+ end
166
+
167
+ logits = linear(x, state_dict["lm_head"])
168
+ return logits
169
+ end
170
+
171
+ # ═══════════════════════════════════════════════════════════════════════════════
172
+ # Dataset β€” philosophy quotes (all ancient/classical, public domain)
173
+ # ═══════════════════════════════════════════════════════════════════════════════
174
+
175
+ const PHILOSOPHY_QUOTES = [
176
+ "waste no more time arguing about what a good man should be",
177
+ "the happiness of your life depends upon the quality of your thoughts",
178
+ "you have power over your mind not outside events",
179
+ "very little is needed to make a happy life",
180
+ "the soul becomes dyed with the color of its thoughts",
181
+ "when you arise in the morning think of what a privilege it is to be alive",
182
+ "the best revenge is to be unlike him who performed the injury",
183
+ "accept the things to which fate binds you",
184
+ "if it is not right do not do it if it is not true do not say it",
185
+ "look well into thyself there is a source of strength",
186
+ "do every act of your life as though it were the very last act of your life",
187
+ "it is not death that a man should fear but never beginning to live",
188
+ "we suffer more often in imagination than in reality",
189
+ "true happiness is to enjoy the present without anxious dependence upon the future",
190
+ "it is not because things are difficult that we do not dare",
191
+ "it is because we do not dare that they are difficult",
192
+ "luck is what happens when preparation meets opportunity",
193
+ "begin at once to live and count each separate day as a separate life",
194
+ "the whole future lies in uncertainty live immediately",
195
+ "sometimes even to live is an act of courage",
196
+ "if a man knows not which port he sails no wind is favorable",
197
+ "he who is brave is free",
198
+ "difficulties strengthen the mind as labor does the body",
199
+ "first say to yourself what you would be and then do what you have to do",
200
+ "no man is free who is not master of himself",
201
+ "only the educated are free",
202
+ "man is not worried by real problems so much as by his imagined anxieties",
203
+ "wealth consists not in having great possessions but in having few wants",
204
+ "make the mind tougher by exposing it to adversity",
205
+ "the mind that is anxious about future events is miserable",
206
+ "the unexamined life is not worth living",
207
+ "i know that i know nothing",
208
+ "be kind for everyone you meet is fighting a hard battle",
209
+ "the only true wisdom is in knowing you know nothing",
210
+ "wonder is the beginning of wisdom",
211
+ "education is the kindling of a flame not the filling of a vessel",
212
+ "strong minds discuss ideas average minds discuss events weak minds discuss people",
213
+ "the secret of change is to focus all of your energy on building the new",
214
+ "no man ever steps in the same river twice",
215
+ "character is fate",
216
+ "the only constant in life is change",
217
+ "much learning does not teach understanding",
218
+ "knowing yourself is the beginning of all wisdom",
219
+ "happiness depends upon ourselves",
220
+ "it is the mark of an educated mind to entertain a thought without accepting it",
221
+ "the more you know the more you realize you do not know",
222
+ "patience is bitter but its fruit is sweet",
223
+ "we are what we repeatedly do excellence then is not an act but a habit",
224
+ "courage is the first of human qualities because it guarantees the others",
225
+ "quality is not an act it is a habit",
226
+ "pleasure in the job puts perfection in the work",
227
+ "the whole is greater than the sum of its parts",
228
+ "nature does nothing in vain",
229
+ "what is honored in a country is cultivated there",
230
+ "the roots of education are bitter but the fruit is sweet",
231
+ "to perceive is to suffer",
232
+ "the journey of a thousand miles begins with a single step",
233
+ "knowing others is intelligence knowing yourself is true wisdom",
234
+ "mastering others is strength mastering yourself is true power",
235
+ "when i let go of what i am i become what i might be",
236
+ "nature does not hurry yet everything is accomplished",
237
+ "silence is a source of great strength",
238
+ "a good traveler has no fixed plans and is not intent on arriving",
239
+ "be content with what you have rejoice in the way things are",
240
+ "he who conquers himself is the mightiest warrior",
241
+ "real knowledge is to know the extent of ones ignorance",
242
+ "it does not matter how slowly you go so long as you do not stop",
243
+ "our greatest glory is not in never falling but in rising every time we fall",
244
+ "before you embark on a journey of revenge dig two graves",
245
+ "the man who moves a mountain begins by carrying away small stones",
246
+ "to see what is right and not do it is a want of courage",
247
+ "study the past if you would define the future",
248
+ "wherever you go go with all your heart",
249
+ "to be wronged is nothing unless you continue to remember it",
250
+ "the wise man is one who knows what he does not know",
251
+ "he who learns but does not think is lost",
252
+ "i think therefore i am",
253
+ "the heart has its reasons which reason knows nothing of",
254
+ "all of mans misfortune comes from not knowing how to sit quietly in a room",
255
+ "the more i read the more certain i am that i know nothing",
256
+ "there is nothing either good or bad but thinking makes it so",
257
+ "one cannot step twice in the same river",
258
+ "man is born free and everywhere he is in chains",
259
+ "life must be understood backward but it must be lived forward",
260
+ "anxiety is the dizziness of freedom",
261
+ "you will never be happy if you continue to search for what happiness consists of",
262
+ "act only according to that maxim which you can will to become universal law",
263
+ "he who thinks great thoughts often makes great errors",
264
+ "to live is to suffer to survive is to find some meaning in the suffering",
265
+ "without music life would be a mistake",
266
+ "he who has a why to live for can bear almost any how",
267
+ "that which does not kill us makes us stronger",
268
+ "there are no facts only interpretations",
269
+ "you must have chaos within you to give birth to a dancing star",
270
+ "whoever fights monsters should see to it that he does not become a monster",
271
+ "when you gaze long into an abyss the abyss also gazes into you",
272
+ "the individual has always had to struggle to keep from being overwhelmed by the tribe",
273
+ "there is always some madness in love but there is also always some reason in madness",
274
+ "the snake which cannot cast its skin has to die",
275
+ "in the middle of difficulty lies opportunity",
276
+ "the mind is everything what you think you become",
277
+ "peace comes from within do not seek it without",
278
+ "all that we are is the result of what we have thought",
279
+ "three things cannot be long hidden the sun the moon and the truth",
280
+ "the only way to do great work is to love what you do",
281
+ "virtue is not given by money but from virtue comes money",
282
+ "the measure of a man is what he does with power",
283
+ "no great mind has ever existed without a touch of madness",
284
+ "the energy of the mind is the essence of life",
285
+ "those who know do not speak those who speak do not know",
286
+ "the flame that burns twice as bright burns half as long",
287
+ "what we achieve inwardly will change outer reality",
288
+ "the only thing i know is that i know nothing and i am not quite sure that i know that",
289
+ "everything has beauty but not everyone sees it",
290
+ "the greatest wealth is to live content with little",
291
+ "it is during our darkest moments that we must focus to see the light",
292
+ "where there is love there is life",
293
+ "the mind is not a vessel to be filled but a fire to be kindled",
294
+ ]
295
+
296
+ # ═══════════════════════════════════════════════════════════════════════════════
297
+ # Helpers used by training and inference scripts
298
+ # ═══════════════════════════════════════════════════════════════════════════════
299
+
300
+ # Deterministic parameter key ordering (must match across all scripts)
301
+ function get_param_keys(n_layer::Int)
302
+ keys = ["wte", "wpe", "lm_head"]
303
+ for i in 0:n_layer-1
304
+ append!(keys, [
305
+ "layer$i.attn_wq", "layer$i.attn_wk", "layer$i.attn_wv", "layer$i.attn_wo",
306
+ "layer$i.mlp_fc1", "layer$i.mlp_fc2"
307
+ ])
308
+ end
309
+ return keys
310
+ end
311
+
312
+ # Initialize weight matrices
313
+ function init_matrix(nout::Int, nin::Int; std=0.08)
314
+ [[Value(randn() * std) for _ in 1:nin] for _ in 1:nout]
315
+ end
316
+
317
+ # Flatten state_dict into a single params vector (deterministic order)
318
+ function flatten_params(state_dict, param_keys)
319
+ params = Value[]
320
+ for key in param_keys
321
+ for row in state_dict[key]
322
+ append!(params, row)
323
+ end
324
+ end
325
+ return params
326
+ end
327
+
328
+ # Generate text from trained model
329
+ function generate(state_dict, uchars, BOS, n_layer, n_head, head_dim, block_size;
330
+ temperature=0.8, max_tokens=128)
331
+ kv_keys = [Vector{Vector{Value}}() for _ in 1:n_layer]
332
+ kv_vals = [Vector{Vector{Value}}() for _ in 1:n_layer]
333
+ token_id = BOS
334
+ sample = Char[]
335
+ limit = min(max_tokens, block_size)
336
+ for pos in 1:limit
337
+ logits = gpt(token_id, pos, kv_keys, kv_vals, state_dict, n_layer, n_head, head_dim)
338
+ scaled = [l / temperature for l in logits]
339
+ probs = softmax_v(scaled)
340
+ weights = [p.data for p in probs]
341
+ r = rand()
342
+ cum = 0.0
343
+ token_id = 1
344
+ for (idx, w) in enumerate(weights)
345
+ cum += w
346
+ if r <= cum
347
+ token_id = idx
348
+ break
349
+ end
350
+ end
351
+ token_id == BOS && break
352
+ push!(sample, uchars[token_id])
353
+ end
354
+ return String(sample)
355
+ end
server.jl ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #=
2
+ server.jl β€” OpenAI/OpenRouter-compatible inference server for MicroGPT
3
+
4
+ Endpoints:
5
+ GET / β†’ health check / API info
6
+ GET /v1/models β†’ list available models
7
+ POST /v1/chat/completions β†’ generate philosophy text (OpenAI format)
8
+
9
+ Usage:
10
+ julia --project=. server.jl
11
+ WEIGHTS_PATH=checkpoints/checkpoint_step400.json julia --project=. server.jl
12
+ PORT=8080 julia --project=. server.jl
13
+ =#
14
+
15
+ include("checkpoint.jl")
16
+ using HTTP
17
+ using UUIDs
18
+ using Sockets
19
+
20
+ # ═══════════════════════════════════════════════════════════════════════════════
21
+ # Load model at startup
22
+ # ═══════════════════════════════════════════════════════════════════════════════
23
+
24
+ const WEIGHTS_PATH = get(ENV, "WEIGHTS_PATH", "checkpoints/best_model.json")
25
+ const PORT = parse(Int, get(ENV, "PORT", "7860"))
26
+
27
+ const ckpt = load_checkpoint(WEIGHTS_PATH)
28
+ const STATE_DICT = ckpt.state_dict
29
+ const UCHARS = ckpt.uchars
30
+ const BOS_TOKEN = ckpt.BOS
31
+ const N_LAYER = ckpt.n_layer
32
+ const N_EMBD = ckpt.n_embd
33
+ const BLOCK_SIZE = ckpt.block_size
34
+ const N_HEAD = ckpt.n_head
35
+ const HEAD_DIM = ckpt.head_dim
36
+ const MODEL_CREATED_AT = Int(floor(time()))
37
+
38
+ println("Model ready: vocab=$(ckpt.vocab_size), embd=$N_EMBD, layers=$N_LAYER")
39
+
40
+ # ═══════════════════════════════════════════════════════════════════════════════
41
+ # API handlers
42
+ # ═══════════════════════════════════════════════════════════════════════════════
43
+
44
+ function json_response(status::Int, body)
45
+ HTTP.Response(status,
46
+ ["Content-Type" => "application/json",
47
+ "Access-Control-Allow-Origin" => "*",
48
+ "Access-Control-Allow-Methods" => "GET, POST, OPTIONS",
49
+ "Access-Control-Allow-Headers" => "Content-Type, Authorization"],
50
+ JSON3.write(body))
51
+ end
52
+
53
+ function handle_root(req::HTTP.Request)
54
+ json_response(200, Dict(
55
+ "name" => "MicroGPT Philosophy",
56
+ "version" => "1.0.0",
57
+ "description" => "A ~5K parameter character-level GPT trained on philosophy quotes",
58
+ "endpoints" => ["/v1/models", "/v1/chat/completions"],
59
+ "compatible_with" => ["OpenAI API", "OpenRouter"]
60
+ ))
61
+ end
62
+
63
+ function handle_models(req::HTTP.Request)
64
+ json_response(200, Dict(
65
+ "object" => "list",
66
+ "data" => [
67
+ Dict(
68
+ "id" => "microgpt-philosophy",
69
+ "object" => "model",
70
+ "created" => MODEL_CREATED_AT,
71
+ "owned_by" => "microgpt"
72
+ )
73
+ ]
74
+ ))
75
+ end
76
+
77
+ function handle_chat_completions(req::HTTP.Request)
78
+ local body
79
+ try
80
+ body = JSON3.read(String(req.body))
81
+ catch e
82
+ return json_response(400, Dict(
83
+ "error" => Dict(
84
+ "message" => "Invalid JSON in request body",
85
+ "type" => "invalid_request_error",
86
+ "code" => "invalid_json"
87
+ )
88
+ ))
89
+ end
90
+
91
+ # Extract parameters with defaults
92
+ temperature = Float64(get(body, :temperature, 0.8))
93
+ max_tokens = Int(get(body, :max_tokens, 128))
94
+ n_completions = Int(get(body, :n, 1))
95
+
96
+ # Clamp temperature to valid range
97
+ temperature = clamp(temperature, 0.01, 2.0)
98
+ max_tokens = clamp(max_tokens, 1, BLOCK_SIZE)
99
+
100
+ # Extract prompt text for token counting
101
+ messages = get(body, :messages, [])
102
+ prompt_text = ""
103
+ if !isempty(messages)
104
+ last_msg = messages[end]
105
+ prompt_text = string(get(last_msg, :content, ""))
106
+ end
107
+
108
+ # Generate completions
109
+ choices = []
110
+ total_completion_tokens = 0
111
+ for i in 1:n_completions
112
+ text = generate(STATE_DICT, UCHARS, BOS_TOKEN, N_LAYER, N_HEAD, HEAD_DIM, BLOCK_SIZE;
113
+ temperature=temperature, max_tokens=max_tokens)
114
+ finish_reason = length(text) >= max_tokens ? "length" : "stop"
115
+ push!(choices, Dict(
116
+ "index" => i - 1,
117
+ "message" => Dict(
118
+ "role" => "assistant",
119
+ "content" => text
120
+ ),
121
+ "finish_reason" => finish_reason
122
+ ))
123
+ total_completion_tokens += length(text)
124
+ end
125
+
126
+ prompt_tokens = length(prompt_text)
127
+ completion_id = "chatcmpl-" * string(uuid4())
128
+
129
+ json_response(200, Dict(
130
+ "id" => completion_id,
131
+ "object" => "chat.completion",
132
+ "created" => Int(floor(time())),
133
+ "model" => "microgpt-philosophy",
134
+ "choices" => choices,
135
+ "usage" => Dict(
136
+ "prompt_tokens" => prompt_tokens,
137
+ "completion_tokens" => total_completion_tokens,
138
+ "total_tokens" => prompt_tokens + total_completion_tokens
139
+ ),
140
+ "system_fingerprint" => "microgpt-philosophy-v1"
141
+ ))
142
+ end
143
+
144
+ # ═══════════════════════════════════════════════════════════════════════════════
145
+ # Router + CORS
146
+ # ═══════════════════════════════════════════════════════════════════════════════
147
+
148
+ function cors_preflight(req::HTTP.Request)
149
+ HTTP.Response(204,
150
+ ["Access-Control-Allow-Origin" => "*",
151
+ "Access-Control-Allow-Methods" => "GET, POST, OPTIONS",
152
+ "Access-Control-Allow-Headers" => "Content-Type, Authorization"])
153
+ end
154
+
155
+ const ROUTER = HTTP.Router()
156
+ HTTP.register!(ROUTER, "GET", "/", handle_root)
157
+ HTTP.register!(ROUTER, "GET", "/v1/models", handle_models)
158
+ HTTP.register!(ROUTER, "POST", "/v1/chat/completions", handle_chat_completions)
159
+ HTTP.register!(ROUTER, "OPTIONS", "/v1/chat/completions", cors_preflight)
160
+ HTTP.register!(ROUTER, "OPTIONS", "/v1/models", cors_preflight)
161
+
162
+ # ═══════════════════════════════════════════════════════════════════════════════
163
+ # Start server
164
+ # ═══════════════════════════════════════════════════════════════════════════════
165
+
166
+ println("\nMicroGPT server starting on 0.0.0.0:$PORT ...")
167
+ println(" GET http://localhost:$PORT/")
168
+ println(" GET http://localhost:$PORT/v1/models")
169
+ println(" POST http://localhost:$PORT/v1/chat/completions")
170
+ println()
171
+
172
+ HTTP.serve(ROUTER, "0.0.0.0", PORT)