Spaces:
Running
Running
Fix completion_tokens: count tokens not decoded characters
Browse files
server.jl
CHANGED
|
@@ -270,12 +270,12 @@ function handle_request(request::HTTP.Request)
|
|
| 270 |
for i in 1:n_completions
|
| 271 |
text = generate_streaming(CONFIG, PS, TOKENIZER, prompt_text;
|
| 272 |
max_tokens, temperature, top_k=top_k_val, top_p=top_p_val)
|
| 273 |
-
finish_reason = length
|
| 274 |
push!(choices, Dict(
|
| 275 |
"index" => i - 1,
|
| 276 |
"message" => Dict("role" => "assistant", "content" => text),
|
| 277 |
"finish_reason" => finish_reason))
|
| 278 |
-
total_completion_tokens +=
|
| 279 |
end
|
| 280 |
|
| 281 |
prompt_tokens = length(encode(TOKENIZER, prompt_text))
|
|
|
|
| 270 |
for i in 1:n_completions
|
| 271 |
text = generate_streaming(CONFIG, PS, TOKENIZER, prompt_text;
|
| 272 |
max_tokens, temperature, top_k=top_k_val, top_p=top_p_val)
|
| 273 |
+
finish_reason = "length" # generate_streaming always produces exactly max_tokens tokens
|
| 274 |
push!(choices, Dict(
|
| 275 |
"index" => i - 1,
|
| 276 |
"message" => Dict("role" => "assistant", "content" => text),
|
| 277 |
"finish_reason" => finish_reason))
|
| 278 |
+
total_completion_tokens += max_tokens # count tokens, not decoded chars
|
| 279 |
end
|
| 280 |
|
| 281 |
prompt_tokens = length(encode(TOKENIZER, prompt_text))
|