technician1 commited on
Commit
fada704
·
verified ·
1 Parent(s): f68f6fa

Upload ChatIPC.cpp

Browse files
Files changed (1) hide show
  1. ChatIPC.cpp +43 -8
ChatIPC.cpp CHANGED
@@ -265,16 +265,51 @@ static std::vector<std::string> tokenize_whitespace(const std::string &s){
265
  return out;
266
  }
267
 
268
- static std::vector<std::string> tokenize_non_alnum(const std::string &s){
269
- std::vector<std::string> out; std::string cur;
270
- for (char ch : s){
271
- if (std::isalnum(static_cast<unsigned char>(ch)) || ch=='-' || ch=='\''){
272
- cur.push_back(to_low(ch));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  } else {
274
- if (!cur.empty()){ out.push_back(cur); cur.clear(); }
275
  }
276
  }
277
- if (!cur.empty()) out.push_back(cur);
 
278
  return out;
279
  }
280
 
@@ -367,7 +402,7 @@ static void build_def_tokens_cache(){
367
 
368
  auto &defs = global_def_tokens_cache[key];
369
  for (const auto &def : entry.definitions){
370
- auto toks = tokenize_non_alnum(def);
371
  defs.insert(defs.end(), toks.begin(), toks.end());
372
  }
373
  }
 
265
  return out;
266
  }
267
 
268
+ static std::vector<std::string> tokenize_dictionary_expansion(const std::string &s) {
269
+ std::vector<std::string> out;
270
+ std::string cur;
271
+
272
+ auto flush = [&]() {
273
+ if (!cur.empty()) {
274
+ out.push_back(cur);
275
+ cur.clear();
276
+ }
277
+ };
278
+
279
+ for (size_t i = 0; i < s.size(); ++i) {
280
+ unsigned char uc = static_cast<unsigned char>(s[i]);
281
+ char ch = static_cast<char>(uc);
282
+
283
+ if (ch == '_' || ch == '-' || ch == '/' || std::isspace(uc)) {
284
+ flush();
285
+ continue;
286
+ }
287
+
288
+ const bool is_upper = std::isupper(uc) != 0;
289
+ bool camel_boundary = false;
290
+
291
+ if (is_upper && !cur.empty()) {
292
+ unsigned char prev = static_cast<unsigned char>(s[i - 1]);
293
+ const bool prev_lower_or_digit = (std::islower(prev) != 0) || (std::isdigit(prev) != 0);
294
+ const bool prev_upper = std::isupper(prev) != 0;
295
+ const bool next_lower =
296
+ (i + 1 < s.size()) && (std::islower(static_cast<unsigned char>(s[i + 1])) != 0);
297
+
298
+ camel_boundary = prev_lower_or_digit || (prev_upper && next_lower);
299
+ }
300
+
301
+ if (camel_boundary) {
302
+ flush();
303
+ }
304
+
305
+ if (std::isalnum(uc) != 0) {
306
+ cur.push_back(static_cast<char>(std::tolower(uc)));
307
  } else {
308
+ flush();
309
  }
310
  }
311
+
312
+ flush();
313
  return out;
314
  }
315
 
 
402
 
403
  auto &defs = global_def_tokens_cache[key];
404
  for (const auto &def : entry.definitions){
405
+ auto toks = tokenize_dictionary_expansion(def);
406
  defs.insert(defs.end(), toks.begin(), toks.end());
407
  }
408
  }