Upload ChatIPC.cpp
Browse files- ChatIPC.cpp +43 -8
ChatIPC.cpp
CHANGED
|
@@ -265,16 +265,51 @@ static std::vector<std::string> tokenize_whitespace(const std::string &s){
|
|
| 265 |
return out;
|
| 266 |
}
|
| 267 |
|
| 268 |
-
static std::vector<std::string>
|
| 269 |
-
std::vector<std::string> out;
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
} else {
|
| 274 |
-
|
| 275 |
}
|
| 276 |
}
|
| 277 |
-
|
|
|
|
| 278 |
return out;
|
| 279 |
}
|
| 280 |
|
|
@@ -367,7 +402,7 @@ static void build_def_tokens_cache(){
|
|
| 367 |
|
| 368 |
auto &defs = global_def_tokens_cache[key];
|
| 369 |
for (const auto &def : entry.definitions){
|
| 370 |
-
auto toks =
|
| 371 |
defs.insert(defs.end(), toks.begin(), toks.end());
|
| 372 |
}
|
| 373 |
}
|
|
|
|
| 265 |
return out;
|
| 266 |
}
|
| 267 |
|
| 268 |
+
static std::vector<std::string> tokenize_dictionary_expansion(const std::string &s) {
|
| 269 |
+
std::vector<std::string> out;
|
| 270 |
+
std::string cur;
|
| 271 |
+
|
| 272 |
+
auto flush = [&]() {
|
| 273 |
+
if (!cur.empty()) {
|
| 274 |
+
out.push_back(cur);
|
| 275 |
+
cur.clear();
|
| 276 |
+
}
|
| 277 |
+
};
|
| 278 |
+
|
| 279 |
+
for (size_t i = 0; i < s.size(); ++i) {
|
| 280 |
+
unsigned char uc = static_cast<unsigned char>(s[i]);
|
| 281 |
+
char ch = static_cast<char>(uc);
|
| 282 |
+
|
| 283 |
+
if (ch == '_' || ch == '-' || ch == '/' || std::isspace(uc)) {
|
| 284 |
+
flush();
|
| 285 |
+
continue;
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
const bool is_upper = std::isupper(uc) != 0;
|
| 289 |
+
bool camel_boundary = false;
|
| 290 |
+
|
| 291 |
+
if (is_upper && !cur.empty()) {
|
| 292 |
+
unsigned char prev = static_cast<unsigned char>(s[i - 1]);
|
| 293 |
+
const bool prev_lower_or_digit = (std::islower(prev) != 0) || (std::isdigit(prev) != 0);
|
| 294 |
+
const bool prev_upper = std::isupper(prev) != 0;
|
| 295 |
+
const bool next_lower =
|
| 296 |
+
(i + 1 < s.size()) && (std::islower(static_cast<unsigned char>(s[i + 1])) != 0);
|
| 297 |
+
|
| 298 |
+
camel_boundary = prev_lower_or_digit || (prev_upper && next_lower);
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
if (camel_boundary) {
|
| 302 |
+
flush();
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
if (std::isalnum(uc) != 0) {
|
| 306 |
+
cur.push_back(static_cast<char>(std::tolower(uc)));
|
| 307 |
} else {
|
| 308 |
+
flush();
|
| 309 |
}
|
| 310 |
}
|
| 311 |
+
|
| 312 |
+
flush();
|
| 313 |
return out;
|
| 314 |
}
|
| 315 |
|
|
|
|
| 402 |
|
| 403 |
auto &defs = global_def_tokens_cache[key];
|
| 404 |
for (const auto &def : entry.definitions){
|
| 405 |
+
auto toks = tokenize_dictionary_expansion(def);
|
| 406 |
defs.insert(defs.end(), toks.begin(), toks.end());
|
| 407 |
}
|
| 408 |
}
|