File size: 2,972 Bytes
332826f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3634ca6
 
332826f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include "request_parsing.h"

#include <algorithm>
#include <sstream>

static int estimate_text_tokens_rough(const std::string &text) {
    if (text.empty()) return 0;
    return std::max(1, static_cast<int>((text.size() + 3) / 4));
}

static std::string flatten_json_content(const json &content) {
    if (content.is_string()) {
        return content.get<std::string>();
    }
    if (content.is_array()) {
        std::ostringstream oss;
        bool first = true;
        for (const auto &item : content) {
            std::string part;
            if (item.is_string()) {
                part = item.get<std::string>();
            } else if (item.is_object() && item.contains("text") && item["text"].is_string()) {
                part = item["text"].get<std::string>();
            }
            if (part.empty()) continue;
            if (!first) oss << '\n';
            oss << part;
            first = false;
        }
        return oss.str();
    }
    return "";
}

std::optional<TokenEstimate> estimate_chat_tokens(
    const json &payload,
    const LimitsConfig &limits,
    std::string &error) {
    if (!payload.is_object()) {
        error = "Expected JSON object";
        return std::nullopt;
    }
    if (!payload.contains("messages") || !payload["messages"].is_array()) {
        error = "Expected 'messages' array";
        return std::nullopt;
    }

    TokenEstimate estimate;
    estimate.requested_max_tokens = limits.default_max_tokens;
    if (payload.contains("max_tokens")) {
        if (!payload["max_tokens"].is_number_integer()) {
            error = "Expected integer 'max_tokens'";
            return std::nullopt;
        }
        estimate.requested_max_tokens = payload["max_tokens"].get<int>();
    }

    if (estimate.requested_max_tokens <= 0) {
        // -1 (and other non-positive values) is a sentinel meaning "unlimited" — use the default
        estimate.requested_max_tokens = limits.default_max_tokens;
    }

    for (const auto &message : payload["messages"]) {
        if (!message.is_object()) continue;
        if (message.contains("role") && message["role"].is_string()) {
            estimate.prompt_tokens += estimate_text_tokens_rough(message["role"].get<std::string>());
        }
        if (message.contains("content")) {
            estimate.prompt_tokens += estimate_text_tokens_rough(flatten_json_content(message["content"]));
        }
        estimate.prompt_tokens += 4;
    }

    estimate.estimated_total_tokens = estimate.prompt_tokens + estimate.requested_max_tokens;
    if (estimate.estimated_total_tokens > limits.max_tokens_per_request) {
        error = "Estimated request tokens exceed configured limit";
        return std::nullopt;
    }

    error.clear();
    return estimate;
}

bool request_stream_enabled(const json &payload) {
    if (!payload.is_object() || !payload.contains("stream")) return false;
    return payload["stream"].is_boolean() && payload["stream"].get<bool>();
}