LisaMegaWatts commited on
Commit
f0aedd4
·
verified ·
1 Parent(s): 03128a8

Fix completion_tokens: count tokens not decoded characters

Browse files
Files changed (1) hide show
  1. server.jl +2 -2
server.jl CHANGED
@@ -270,12 +270,12 @@ function handle_request(request::HTTP.Request)
270
  for i in 1:n_completions
271
  text = generate_streaming(CONFIG, PS, TOKENIZER, prompt_text;
272
  max_tokens, temperature, top_k=top_k_val, top_p=top_p_val)
273
- finish_reason = length(text) >= max_tokens ? "length" : "stop"
274
  push!(choices, Dict(
275
  "index" => i - 1,
276
  "message" => Dict("role" => "assistant", "content" => text),
277
  "finish_reason" => finish_reason))
278
- total_completion_tokens += length(text)
279
  end
280
 
281
  prompt_tokens = length(encode(TOKENIZER, prompt_text))
 
270
  for i in 1:n_completions
271
  text = generate_streaming(CONFIG, PS, TOKENIZER, prompt_text;
272
  max_tokens, temperature, top_k=top_k_val, top_p=top_p_val)
273
+ finish_reason = "length" # generate_streaming always produces exactly max_tokens tokens
274
  push!(choices, Dict(
275
  "index" => i - 1,
276
  "message" => Dict("role" => "assistant", "content" => text),
277
  "finish_reason" => finish_reason))
278
+ total_completion_tokens += max_tokens # count tokens, not decoded chars
279
  end
280
 
281
  prompt_tokens = length(encode(TOKENIZER, prompt_text))