Spaces:

LisaMegaWatts
/

MonarchSLM

Running

LisaMegaWatts commited on Feb 26

Commit

f0aedd4

verified ·

1 Parent(s): 03128a8

Fix completion_tokens: count tokens not decoded characters

Files changed (1) hide show

server.jl CHANGED Viewed

@@ -270,12 +270,12 @@ function handle_request(request::HTTP.Request)
             for i in 1:n_completions
                 text = generate_streaming(CONFIG, PS, TOKENIZER, prompt_text;
                                           max_tokens, temperature, top_k=top_k_val, top_p=top_p_val)
-                finish_reason = length(text) >= max_tokens ? "length" : "stop"
                 push!(choices, Dict(
                     "index" => i - 1,
                     "message" => Dict("role" => "assistant", "content" => text),
                     "finish_reason" => finish_reason))
-                total_completion_tokens += length(text)
             end
             prompt_tokens = length(encode(TOKENIZER, prompt_text))

             for i in 1:n_completions
                 text = generate_streaming(CONFIG, PS, TOKENIZER, prompt_text;
                                           max_tokens, temperature, top_k=top_k_val, top_p=top_p_val)
+                finish_reason = "length"  # generate_streaming always produces exactly max_tokens tokens
                 push!(choices, Dict(
                     "index" => i - 1,
                     "message" => Dict("role" => "assistant", "content" => text),
                     "finish_reason" => finish_reason))
+                total_completion_tokens += max_tokens  # count tokens, not decoded chars
             end
             prompt_tokens = length(encode(TOKENIZER, prompt_text))