File size: 29,951 Bytes

67068c5

#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <fstream>
#include <regex>
#include <memory>
#include "json.hpp" // nlohmann/json library
#include <sstream>
#include <cctype>
#include <cmath>

// using nlohmann/json
using json = nlohmann::json;

// Define Postprocessor class to encapsulate all logic
class Postprocessor {
private:
    // All original global variables are now private members of the class
    std::map<std::string, std::string> cls_map;
    std::map<std::string, std::vector<std::string>> area2NL;
    std::map<std::string, std::string> NL2area;
    std::map<std::string, std::map<std::string, std::string>> maxmin_val_map;
    std::set<std::string> uncommon_area_NL;
    std::set<std::string> st_open;
    std::set<std::string> st_close;
    std::map<std::string, std::string> default_value;
    std::vector<std::string> switch_keywords;
    std::vector<std::string> level_keywords;
    std::vector<std::string> str2remove;
    std::vector<std::string> max_level_keys;
    std::vector<std::string> min_level_keys;
    std::string exclude_pattern_str;

    /**
    * @brief Utility function to split a UTF-8 string into individual characters (or bytes for ASCII).
    * @param text The input UTF-8 string.
    * @return A vector of strings, where each string is a single character.
    */
    std::vector<std::string> split_utf8_string(const std::string& text) {
        std::vector<std::string> utf8_chars;
        size_t i = 0;
        while (i < text.length()) {
            size_t len = 1;
            unsigned char c = (unsigned char)text[i];
            if (c >= 0xE0) len = 3; // 3-byte character (most CJK)
            else if (c >= 0xC0) len = 2; // 2-byte character
            
            // Ensure we don't read past the end of the string
            if (i + len > text.length()) {
                len = 1; // Treat as a single byte if incomplete
            }
            
            utf8_chars.push_back(text.substr(i, len));
            i += len;
        }
        return utf8_chars;
    }
    std::string ReplaceAll(std::string str, const std::string& from, const std::string& to) {
        size_t start_pos = 0;
        while((start_pos = str.find(from, start_pos)) != std::string::npos) {
            str.replace(start_pos, from.length(), to);
            start_pos += to.length(); // Handles case where 'to' is a substring of 'from'
        }
        return str;
    }

    /**
    * @brief Converts English number words (like "one hundred twenty-three") into an Arabic number.
    * This is a simplified implementation supporting numbers up to one billion.
    * @param words The vector of English number words (tokens).
    * @param start_index The starting index in the vector to begin parsing (always 0 here since it's a dedicated word sequence).
    * @return A pair: the converted numerical value and the index of the next unprocessed word.
    */
    std::pair<long long, size_t> parse_english_numbers(const std::vector<std::string>& words, size_t start_index) {
        // Maps for English number words
        static const std::map<std::string, long long> ONES = {
            {"zero", 0}, {"one", 1}, {"two", 2}, {"three", 3}, {"four", 4}, 
            {"five", 5}, {"six", 6}, {"seven", 7}, {"eight", 8}, {"nine", 9}
        };
        static const std::map<std::string, long long> TEENS = {
            {"ten", 10}, {"eleven", 11}, {"twelve", 12}, {"thirteen", 13}, 
            {"fourteen", 14}, {"fifteen", 15}, {"sixteen", 16}, 
            {"seventeen", 17}, {"eighteen", 18}, {"nineteen", 19}
        };
        static const std::map<std::string, long long> TENS = {
            {"twenty", 20}, {"thirty", 30}, {"forty", 40}, {"fifty", 50}, 
            {"sixty", 60}, {"seventy", 70}, {"eighty", 80}, {"ninety", 90}
        };
        static const std::map<std::string, long long> MAGNITUDES = {
            {"hundred", 100}, {"thousand", 1000}, {"million", 1000000}, {"billion", 1000000000}
        };

        long long result = 0;
        long long current_magnitude_val = 0; // Value within the current 'thousand' segment
        long long current_hundred_val = 0;   // Value within the current 'hundred' segment

        size_t i = start_index;
        while (i < words.size()) {
            std::string word = words[i];
            
            // Convert to lowercase for case-insensitive matching
            std::transform(word.begin(), word.end(), word.begin(), 
                        [](unsigned char c){ return std::tolower(c); });

            // Handle hyphenated numbers (e.g., twenty-one)
            if (word.find('-') != std::string::npos) {
                std::string part1 = word.substr(0, word.find('-'));
                std::string part2 = word.substr(word.find('-') + 1);
                
                if (TENS.count(part1) && ONES.count(part2)) {
                    current_hundred_val += TENS.at(part1) + ONES.at(part2);
                    i++;
                    continue;
                }
            }
            
            if (ONES.count(word)) {
                current_hundred_val += ONES.at(word);
            } else if (TEENS.count(word)) {
                current_hundred_val += TEENS.at(word);
            } else if (TENS.count(word)) {
                current_hundred_val += TENS.at(word);
            } else if (MAGNITUDES.count(word)) {
                long long mag = MAGNITUDES.at(word);
                
                if (mag == 100) {
                    // If 'hundred' is encountered, multiply the current hundred value by 100.
                    if (current_hundred_val == 0) current_hundred_val = 1;
                    current_hundred_val *= mag; 
                } else {
                    // For 'thousand', 'million', etc.
                    current_magnitude_val += current_hundred_val;
                    if (current_magnitude_val == 0) current_magnitude_val = 1; // Simplification for "a thousand"
                    
                    result += current_magnitude_val * mag;
                    current_magnitude_val = 0;
                    current_hundred_val = 0;
                }
            } else if (word == "and" && i + 1 < words.size()) {
                // 'and' is often used to connect numbers, ignore it for calculation.
                i++;
                continue;
            } else {
                // Not a number word, conversion ends.
                break;
            }
            i++;
        }

        // Final accumulation of the current segment
        result += current_magnitude_val; 
        result += current_hundred_val;   
        
        // Check for 'zero' as a single word
        if (start_index < words.size() && i == start_index + 1 && words[start_index] == "zero") {
            return {0, start_index + 1};
        }
        
        // Only return a result if at least one number word was consumed
        if (i == start_index) {
            return {0, start_index}; // No number words found
        }
        
        return {result, i};
    }

    /**
    * @brief Converts Chinese and English number words in a string to Arabic numerals.
    * Chinese logic is preserved as requested. English logic is implemented separately.
    * @param text The input string containing mixed Chinese/English text.
    * @return The modified string with numbers converted to Arabic numerals.
    */
    std::string cn2an_transform(const std::string& text) {
        // --- Chinese Mappings (Preserved) ---
        // Digit map for Chinese characters (e.g., "一" -> 1)
        std::map<std::string, int> digit_map = {
            {"零", 0}, {"一", 1}, {"二", 2}, {"三", 3}, {"四", 4}, {"五", 5}, {"六", 6}, {"七", 7}, {"八", 8}, {"九", 9},
            {"〇", 0}, {"壹", 1}, {"貳", 2}, {"參", 3}, {"肆", 4}, {"伍", 5}, {"陸", 6}, {"柒", 7}, {"捌", 8}, {"玖", 9},
            {"兩", 2}
        };
        // Unit map for Chinese magnitude characters (e.g., "十" -> 10)
        std::map<std::string, int> unit_map = {
            {"十", 10}, {"百", 100}, {"千", 1000}, {"萬", 10000}, {"億", 100000000}
        };

        // --- Tokenization and Conversion Prep ---
        // Split the string into UTF-8 characters for Chinese processing
        std::vector<std::string> utf8_chars = split_utf8_string(text);
        std::string result_string = "";

        // Variables for Chinese number accumulation
        long long total_val = 0;
        long long section_val = 0;
        long long current_val = 0;
        
        // State flags and storage for English number parsing
        bool in_english_sequence = false;
        std::vector<std::string> english_words;
        
        // Helper lambda to flush and reset Chinese numbers
        auto flush_chinese = [&](std::string& res_str) {
            if (total_val > 0 || current_val > 0 || section_val > 0) {
                res_str += std::to_string(total_val + current_val + section_val);
                total_val = current_val = section_val = 0;
            }
        };
        
        // Helper lambda to flush and reset English words
        auto flush_english = [&](std::string& res_str) {
            if (in_english_sequence && !english_words.empty()) {
                auto [num, end_idx] = parse_english_numbers(english_words, 0);
                if (end_idx > 0) {
                    // Number converted successfully
                    res_str += std::to_string(num);
                    // Append any remaining non-number words
                    for (size_t k = end_idx; k < english_words.size(); ++k) {
                        res_str += english_words[k];
                    }
                } else {
                    // No number found, just append the words back
                    for (const auto& word : english_words) {
                        res_str += word;
                    }
                }
                english_words.clear();
                in_english_sequence = false;
            }
        };

        // --- Main Loop: Iterate through UTF-8 characters ---
        for (size_t j = 0; j < utf8_chars.size(); ++j) {
            std::string c = utf8_chars[j];

            // 1. Check for Chinese characters
            if (digit_map.count(c) || unit_map.count(c)) {
                flush_english(result_string); // Chinese interrupts English sequence

                // Apply preserved Chinese logic
                if (digit_map.count(c)) {
                    // Chinese digit found
                    section_val = digit_map.at(c);
                    // If it's the last character or the next char is not a unit
                    if (j == utf8_chars.size() - 1 || !unit_map.count(utf8_chars[j+1])) {
                        current_val += section_val;
                        section_val = 0;
                    }
                } else if (unit_map.count(c)) {
                    // Chinese unit found
                    int unit_val = unit_map.at(c);
                    
                    // Handle cases like "十" (ten) where the leading "一" is implied (simplified)
                    if (unit_val < 10000 && (j == 0 || !digit_map.count(utf8_chars[j-1])) && section_val == 0 && current_val == 0) {
                    section_val = 1;
                    }
                    
                    if (unit_val < 10000) {
                        // Ten, Hundred, Thousand units
                        current_val += section_val * unit_val;
                    } else {
                        // Ten Thousand, Hundred Million units (magnitude change)
                        total_val += (current_val + section_val) * unit_val;
                        current_val = 0;
                    }
                    section_val = 0;
                }

            } 
            // 2. Check for ASCII characters (potential English word or Arabic numeral)
            else if (c.length() == 1) {
                char ch = c[0];
                
                // Check for existing Arabic numerals
                if (std::isdigit(ch)) {
                    flush_chinese(result_string); // Arabic numeral interrupts Chinese number conversion
                    flush_english(result_string); // Arabic numeral interrupts English word sequence

                    // Collect contiguous Arabic numerals
                    std::string num_str = "";
                    while (j < utf8_chars.size() && utf8_chars[j].length() == 1 && std::isdigit(utf8_chars[j][0])) {
                        num_str += utf8_chars[j];
                        j++;
                    }
                    result_string += num_str;
                    j--; // Decrement to re-check the character after the number sequence
                    continue;
                }
                
                // Check for English words (alphabetic characters)
                if (std::isalpha(ch) || ch == '-') { 
                    flush_chinese(result_string); // English word interrupts Chinese number conversion
                    
                    // Collect contiguous alphabetic/hyphen characters as one token
                    std::string token = "";
                    while (j < utf8_chars.size() && utf8_chars[j].length() == 1 && (std::isalpha(utf8_chars[j][0]) || utf8_chars[j][0] == '-')) {
                        token += utf8_chars[j];
                        j++;
                    }
                    j--; // Stay on the last character of the token for the next loop iteration
                    
                    // Add the token to the list of English words
                    english_words.push_back(token);
                    in_english_sequence = true;

                } else {
                    // Not a Chinese char, Arabic digit, or English word part (e.g., space, punctuation)
                    
                    flush_chinese(result_string); // Flush pending Chinese number
                    flush_english(result_string); // Flush pending English words
                    
                    // Append the non-number character (space, punctuation, etc.)
                    result_string += c;
                }
            } 
            // 3. Any other non-Chinese, non-ASCII characters (e.g., symbols, other scripts)
            else {
                flush_chinese(result_string);
                flush_english(result_string);
                
                // Append the character
                result_string += c;
            }
        } // End of loop

        // --- Final Flush ---
        flush_chinese(result_string);
        flush_english(result_string);

        return result_string;
    }

    // A utility function to extract numbers from a string.
    std::vector<std::string> extract_numbers_from_string(const std::string& text) {
        // Use the regex string read from the JSON file
        std::regex exclude_pattern(this->exclude_pattern_str);
        std::string cleaned_text = std::regex_replace(text, exclude_pattern, "");
    
        std::regex num_pattern(R"(\d+)");
        std::sregex_iterator next(cleaned_text.begin(), cleaned_text.end(), num_pattern);
        std::sregex_iterator end;
        std::vector<std::string> numbers;
        while (next != end) {
            numbers.push_back(next->str());
            ++next;
        }
        return numbers;
    }

    // A utility function to get keywords from a string.
    json get_keywords(const std::string& text) {
        json keywords;
        keywords["switch"] = json::array();
        keywords["level"] = json::array();
        keywords["num"] = json::array();
        keywords["area_id"] = json::array();
    
        for (const auto& pair : area2NL) {
            for (const auto& nl_phrase : pair.second) {
                if (text.find(nl_phrase) != std::string::npos) {
                    keywords["area_id"].push_back(nl_phrase);
                }
            }
        }
    
        auto find_keywords = [&](const std::vector<std::string>& kw_list, const std::string& key) {
            for (const auto& keyword : kw_list) {
                if (text.find(keyword) != std::string::npos) {
                    keywords[key].push_back(keyword);
                }
            }
        };
    
        find_keywords(switch_keywords, "switch");
        find_keywords(level_keywords, "level");
    
        keywords["num"] = extract_numbers_from_string(text);
        return keywords;
    }

    // A utility function to find the longest string in a JSON array.
    std::string find_longest_string(const json& arr) {
        if (arr.empty()) {
            return "";
        }
        std::string longest_str = arr[0].get<std::string>();
        for (const auto& val : arr) {
            std::string current_str = val.get<std::string>();
            if (current_str.length() > longest_str.length()) {
                longest_str = current_str;
            }
        }
        return longest_str;
    }

public:
    // This is the main public method of the Postprocessor
    void load_data() {
        // Load response_template.json
        std::ifstream cls_file("response_template.json");
        if (!cls_file.is_open()) {
            std::cerr << "[ERROR] response_template.json file not found. Please ensure it's in the same directory." << std::endl;
            return;
        }

        json temp_json;
        try {
            cls_file >> temp_json;
            for (json::iterator it = temp_json.begin(); it != temp_json.end(); ++it) {
                cls_map[it.key()] = it.value().get<std::string>();
            }
        } catch (const json::parse_error& e) {
            std::cerr << "[ERROR] JSON parse error in response_template.json: " << e.what() << std::endl;
        }

        // Load keywords_data.json
        std::ifstream data_file("keywords_data.json");
        if (!data_file.is_open()) {
            std::cerr << "[ERROR] keywords_data.json file not found. Please ensure it's in the same directory." << std::endl;
            return;
        }

        json data_json;
        try {
            data_file >> data_json;

            // Read and populate area2NL
            for (const auto& [key, value] : data_json["area2NL"].items()) {
                area2NL[key] = value.get<std::vector<std::string>>();
            }
            // Read and populate maxmin_val_map
            for (const auto& [key, value] : data_json["maxmin_val_map"].items()) {
                maxmin_val_map[key] = value.get<std::map<std::string, std::string>>();
            }
            // Read and populate uncommon_area_NL
            for (const auto& val : data_json["uncommon_area_NL"]) {
                uncommon_area_NL.insert(val.get<std::string>());
            }
            // Read and populate st_open
            for (const auto& val : data_json["st_open"]) {
                st_open.insert(val.get<std::string>());
            }
            // Read and populate st_close
            for (const auto& val : data_json["st_close"]) {
                st_close.insert(val.get<std::string>());
            }
            // Read and populate default_value
            for (const auto& [key, value] : data_json["default_value"].items()) {
                default_value[key] = value.get<std::string>();
            }
            // Read and populate switch_keywords
            switch_keywords = data_json["switch_keywords"].get<std::vector<std::string>>();
            // Read and populate level_keywords
            level_keywords = data_json["level_keywords"].get<std::vector<std::string>>();
            // Read and populate exclude_pattern_str
            exclude_pattern_str = data_json["exclude_pattern_str"].get<std::string>();
            str2remove = data_json["str2remove"].get<std::vector<std::string>>();
            max_level_keys = data_json["max_level"].get<std::vector<std::string>>();
            min_level_keys = data_json["min_level"].get<std::vector<std::string>>();

            // Convert and populate NL2area based on area2NL
            for (const auto& [area_id, nl_phrases] : area2NL) {
                for (const auto& nl_phrase : nl_phrases) {
                    NL2area[nl_phrase] = area_id;
                }
            }

        } catch (const json::parse_error& e) {
            std::cerr << "[ERROR] JSON parse error in keywords_data.json: " << e.what() << std::endl;
        }
    }
    // Set verbose variable to public
    bool verbose = false;
    
    // This is the main public method of the Postprocessor
    std::string postprocess(const std::string& query, const std::string& pred_class) {
        if (verbose) {
            std::cerr << "[DEBUG] input query: " << query << ", pred_class: " << pred_class << std::endl;
        }
        
        std::string ori_func_name = pred_class.substr(0, pred_class.find('%'));
    
        if (cls_map.find(pred_class) == cls_map.end()) {
            // std::cerr << "[ERROR] Key not found in cls_map: " << pred_class << std::endl;
            json empty_json;
            return empty_json.dump();
        }
    
        std::string func_tmp_str = cls_map[pred_class];
        std::string ori_query = query;
        std::string new_query = cn2an_transform(query);

        // query transform
        std::transform(new_query.begin(), new_query.end(), new_query.begin(), 
                   [](unsigned char c){ return std::tolower(c); });
        
        for (auto s:str2remove){
            size_t pos = new_query.find(s);
            if (pos != std::string::npos) {
                new_query.erase(pos, s.length());
            }
        }
        std::replace(new_query.begin(), new_query.end(), '-', ' ');
        new_query = ReplaceAll(new_query, std::string("first level"), std::string("1"));
        new_query = ReplaceAll(new_query, std::string("second level"), std::string("2"));
        new_query = ReplaceAll(new_query, std::string("third level"), std::string("3"));
        new_query = ReplaceAll(new_query, std::string("fourth level"), std::string("4"));
        new_query = ReplaceAll(new_query, std::string("fifth level"), std::string("5"));
        new_query = ReplaceAll(new_query, std::string("sixth level"), std::string("6"));
        new_query = ReplaceAll(new_query, std::string("seventh level"), std::string("7"));
        new_query = ReplaceAll(new_query, std::string("eighth level"), std::string("8"));
        
    
        if (verbose) {
            std::cerr << "[DEBUG] new_query: " << new_query << std::endl;
        }

        json keywords = get_keywords(new_query);
        if (verbose) {
            std::cerr << "[DEBUG] keywords: " << keywords.dump() << std::endl;
        }
        
        json func_tmp = json::parse(func_tmp_str);

        std::set<std::string> set_keywords;
        for (const auto& kw : keywords["area_id"]) {
            set_keywords.insert(kw.get<std::string>());
        }

        if (!keywords["area_id"].empty()) {
            std::string area_kw = find_longest_string(keywords["area_id"]);
            if (verbose) {
                std::cerr << "[DEBUG] area_kw: " << area_kw << std::endl;
            }
            if (pred_class.find("SLIDING_DOOR") != std::string::npos) {
                if (set_keywords.count("左邊") || set_keywords.count("左側") || set_keywords.count("左") || set_keywords.count("left")) {
                    func_tmp["areaId"] = NL2area["左邊"];
                } else if (set_keywords.count("右邊") || set_keywords.count("右側") || set_keywords.count("右") || set_keywords.count("right")) {
                    func_tmp["areaId"] = NL2area["右邊"];
                }
            } else if (pred_class.find("HVAC_DEFROSTER") != std::string::npos) {
                if (set_keywords.count("前除霜") || set_keywords.count("front defroster")) {
                    func_tmp["areaId"] = NL2area["前除霜"];
                } else if (set_keywords.count("後除霜") || set_keywords.count("rear defroster")) {
                    func_tmp["areaId"] = NL2area["後除霜"];
                }
            } else if (pred_class.find("POWER_SUNSHADE") != std::string::npos) {
                if (set_keywords.count("頂棚") || set_keywords.count("roof")) {
                    func_tmp["areaId"] = NL2area["頂棚"];
                } else if (set_keywords.count("右邊") || set_keywords.count("右側") || set_keywords.count("右") || set_keywords.count("right")) {
                    func_tmp["areaId"] = "SEAT_ROW_2_RIGHT";
                } else if (set_keywords.count("左邊") || set_keywords.count("左側") || set_keywords.count("左") || set_keywords.count("left")) {
                    func_tmp["areaId"] = "SEAT_ROW_2_LEFT";
                }
            } 
            bool is_uncommon = false;
            for (const auto& uc_kw : uncommon_area_NL) {
                if (area_kw == uc_kw) {
                    is_uncommon = true;
                    break;
                }
            }
            if (!is_uncommon) {
                if (NL2area.count(area_kw) && func_tmp["areaId"]=="") {
                    func_tmp["areaId"] = NL2area[area_kw];
                }
            }
            if (func_tmp["areaId"]=="SEAT_ROW_1" || func_tmp["areaId"]=="SEAT_ROW_2" || func_tmp["areaId"]=="SEAT_ROW_3"){
                if (new_query.find("left") != std::string::npos){
                    func_tmp["areaId"] = std::string(func_tmp["areaId"])+"_LEFT";
                }
                else if (new_query.find("right") != std::string::npos){
                    func_tmp["areaId"] = std::string(func_tmp["areaId"])+"_RIGHT";
                }
            }
            
        }

        if (!keywords["num"].empty() && func_tmp.count("value") && func_tmp["value"].is_string() && func_tmp["value"].get<std::string>() == "") {
            func_tmp["value"] = keywords["num"][0].get<std::string>();
        } else if (!keywords["level"].empty() && func_tmp.count("value") && func_tmp["value"].is_string() && func_tmp["value"].get<std::string>() == "" && maxmin_val_map.count(ori_func_name)) {
            for (auto &m:max_level_keys){
                if(std::find(keywords["level"].begin(), keywords["level"].end(), m) != keywords["level"].end())
                    func_tmp["value"] = maxmin_val_map[ori_func_name]["max"];
            }
            for (auto &m:min_level_keys) {
                if(std::find(keywords["level"].begin(), keywords["level"].end(), m) != keywords["level"].end())
                    func_tmp["value"] = maxmin_val_map[ori_func_name]["min"];
            }
        } else if (!keywords["switch"].empty() && ori_func_name == "POWER_SUNSHADE" && func_tmp.count("value") && func_tmp["value"].is_string() && func_tmp["value"].get<std::string>() == "") {
            bool open_found = false;
            bool close_found = false;
            for (const auto& op : keywords["switch"]) {
                if (st_open.count(op)) open_found = true;
                if (st_close.count(op)) close_found = true;
            }
            if (open_found && new_query.find("開大") == std::string::npos) {
                func_tmp["value"] = "100";
            } else if (close_found && new_query.find("關小") == std::string::npos) {
                func_tmp["value"] = "0";
            }
        }

        if (func_tmp.count("value") && func_tmp["value"].is_string() && func_tmp["value"].get<std::string>() == "") {
            if (pred_class.find("increas") != std::string::npos || pred_class.find("decreas") != std::string::npos || pred_class.find("reduc") != std::string::npos || pred_class.find("reduc") != std::string::npos) {
                func_tmp["value"] = "1";
            } else if (ori_func_name == "HVAC_TEMPERATURE_SET") {
                if (new_query.find("熱") != std::string::npos || new_query.find("hot") != std::string::npos) {
                    func_tmp["value"] = maxmin_val_map[ori_func_name]["min"];
                } else if (new_query.find("冷") != std::string::npos || new_query.find("凍") != std::string::npos || new_query.find("cold") != std::string::npos) {
                    func_tmp["value"] = maxmin_val_map[ori_func_name]["max"];
                }
            } else if (default_value.count(ori_func_name)) {
                func_tmp["value"] = default_value[ori_func_name];
            }
        }

        json final_json;
        if (ori_func_name == "set_seat_mode") {
            final_json.push_back({{"name", ori_func_name}, {"arguments", func_tmp}});
        } else if (ori_func_name == "get_hhtd_info" || ori_func_name == "get_vehicle_info") {
            func_tmp["query"] = ori_query;
            final_json.push_back({{"name", ori_func_name}, {"arguments", func_tmp}});
        } else {
            final_json.push_back({{"name", "control_car_properties"}, {"arguments", func_tmp}});
        }

        std::string result = final_json.dump(4);
        if (verbose) {
            std::cerr << "[DEBUG] Final output: " << result << std::endl;
        }

        return result;
    }
};

// C-style wrapper function for ctypes calls
extern "C" {
    const char* postprocess_c(const char* query_c, const char* pred_class_c) {
        static Postprocessor processor;
        static bool data_loaded = false;
        
        // Load data only on the first call
        if (!data_loaded) {
            processor.load_data();
            data_loaded = true;
        }

        std::string query(query_c);
        std::string pred_class(pred_class_c);
        std::string result = processor.postprocess(query, pred_class);
    
        char* c_str = new char[result.length() + 1];
        if (c_str == nullptr) {
            std::cerr << "ERROR: Memory allocation failed in postprocess_c." << std::endl;
            return nullptr;
        }
        std::copy(result.begin(), result.end(), c_str);
        c_str[result.length()] = '\0';
    
        return c_str;
    }
}