LisaMegaWatts commited on
Commit
d167fb8
·
verified ·
1 Parent(s): 62c96ea

Fix completion_tokens: count tokens not decoded characters

Browse files
Files changed (1) hide show
  1. server.jl +2 -2
server.jl CHANGED
@@ -278,12 +278,12 @@ function handle_request(request::HTTP.Request)
278
  for i in 1:n_completions
279
  text = generate_streaming(CONFIG, PS, TOKENIZER, prompt_text;
280
  max_tokens, temperature, top_k=top_k_val, top_p=top_p_val)
281
- finish_reason = length(text) >= max_tokens ? "length" : "stop"
282
  push!(choices, Dict(
283
  "index" => i - 1,
284
  "message" => Dict("role" => "assistant", "content" => text),
285
  "finish_reason" => finish_reason))
286
- total_completion_tokens += length(text)
287
  end
288
 
289
  prompt_tokens = length(encode(TOKENIZER, prompt_text))
 
278
  for i in 1:n_completions
279
  text = generate_streaming(CONFIG, PS, TOKENIZER, prompt_text;
280
  max_tokens, temperature, top_k=top_k_val, top_p=top_p_val)
281
+ finish_reason = "length" # generate_streaming always produces exactly max_tokens tokens
282
  push!(choices, Dict(
283
  "index" => i - 1,
284
  "message" => Dict("role" => "assistant", "content" => text),
285
  "finish_reason" => finish_reason))
286
+ total_completion_tokens += max_tokens # count tokens, not decoded chars
287
  end
288
 
289
  prompt_tokens = length(encode(TOKENIZER, prompt_text))