technician1 commited on
Commit
275585d
verified
1 Parent(s): 8195d90

Upload 2 files

Browse files
Files changed (2) hide show
  1. ChatIPC.cpp +522 -0
  2. ChatIPC.exe +3 -0
ChatIPC.cpp ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // chatipc_modular.cpp
2
+ // Compile: g++ -std=c++17 -O2 -fopenmp -o chatipc_modular chatipc_modular.cpp
3
+ // Requires dictionary.cpp providing: extern unsigned char dictionary_json[]; extern unsigned int dictionary_json_len;
4
+
5
+ #include <algorithm>
6
+ #include <atomic>
7
+ #include <cctype>
8
+ #include <cinttypes>
9
+ #include <cstring>
10
+ #include <fstream>
11
+ #include <iostream>
12
+ #include <iterator>
13
+ #include <map>
14
+ #include <mutex>
15
+ #include <optional>
16
+ #include <sstream>
17
+ #include <stdexcept>
18
+ #include <string>
19
+ #include <thread>
20
+ #include <unordered_map>
21
+ #include <unordered_set>
22
+ #include <vector>
23
+
24
+ #ifdef _OPENMP
25
+ #include <omp.h>
26
+ #else
27
+ inline int omp_get_max_threads(){ return 1; }
28
+ inline int omp_get_thread_num(){ return 0; }
29
+ #endif
30
+
31
+ extern unsigned char dictionary_json[]; // provide dictionary.cpp to embed dictionary JSON bytes
32
+ extern unsigned int dictionary_json_len;
33
+
34
+ // --------------------------- Short utility functions ----------------------
35
+
36
+ static inline bool is_space(char c){ return std::isspace(static_cast<unsigned char>(c)) != 0; }
37
+ static inline char to_low(char c){ return static_cast<char>(std::tolower(static_cast<unsigned char>(c))); }
38
+ static inline void safe_flush(std::ostream &os){ os.flush(); }
39
+
40
+ // Tokenize by whitespace
41
+ static std::vector<std::string> tokenize_whitespace(const std::string &s){
42
+ std::istringstream iss(s);
43
+ std::vector<std::string> out;
44
+ std::string t;
45
+ while (iss >> t) out.push_back(t);
46
+ return out;
47
+ }
48
+
49
+ // Tokenize by non-alphanumeric characters (for definitions)
50
+ static std::vector<std::string> tokenize_non_alnum(const std::string &s){
51
+ std::vector<std::string> out; std::string cur;
52
+ for (char ch : s){
53
+ if (std::isalnum(static_cast<unsigned char>(ch)) || ch=='-' || ch=='\''){
54
+ cur.push_back(to_low(ch));
55
+ } else {
56
+ if (!cur.empty()){ out.push_back(cur); cur.clear(); }
57
+ }
58
+ }
59
+ if (!cur.empty()) out.push_back(cur);
60
+ return out;
61
+ }
62
+
63
+ // --------------------------- String interning (short methods) --------------
64
+
65
+ struct StringInterner {
66
+ std::unordered_set<std::string> pool;
67
+ std::mutex m;
68
+ const std::string* intern(const std::string &s){
69
+ std::lock_guard<std::mutex> lk(m);
70
+ auto it = pool.find(s);
71
+ if (it != pool.end()) return &*it;
72
+ auto pr = pool.insert(s);
73
+ return &*pr.first;
74
+ }
75
+ };
76
+
77
+ // --------------------------- Knowledge base (short methods) --------------
78
+
79
+ using StrPtr = const std::string*;
80
+ struct PtrHash { size_t operator()(StrPtr p) const noexcept { return std::hash<std::string>()(*p); } };
81
+ struct PtrEq { bool operator()(StrPtr a, StrPtr b) const noexcept { return *a == *b; } };
82
+
83
+ using NextSet = std::vector<StrPtr>;
84
+
85
+ struct KnowledgeBase {
86
+ StringInterner interner;
87
+ std::unordered_map<StrPtr, NextSet, PtrHash, PtrEq> next;
88
+ std::mutex m;
89
+ void add_pair_interned(StrPtr k, StrPtr v){
90
+ std::lock_guard<std::mutex> lk(m);
91
+ auto &vec = next[k];
92
+ for (auto p : vec) if (*p == *v) return;
93
+ vec.push_back(v);
94
+ }
95
+ void add_pair(const std::string &k, const std::string &v){
96
+ StrPtr kp = interner.intern(k);
97
+ StrPtr vp = interner.intern(v);
98
+ add_pair_interned(kp, vp);
99
+ }
100
+ std::optional<NextSet> lookup_by_string(const std::string &k) const {
101
+ for (auto &pr : next) if (*pr.first == k) return pr.second;
102
+ return std::nullopt;
103
+ }
104
+ std::optional<NextSet> lookup_by_ptr(StrPtr k) const {
105
+ auto it = next.find(k);
106
+ if (it==next.end()) return std::nullopt;
107
+ return it->second;
108
+ }
109
+ };
110
+
111
+ // --------------------------- Small JSON parse helpers ----------------------
112
+
113
+ static inline bool json_valid_index(size_t i, size_t n){ return i < n; }
114
+
115
+ static std::string parse_quoted_string(const std::string &text, size_t &i){
116
+ std::string out;
117
+ if (!json_valid_index(i, text.size()) || text[i] != '"') throw std::runtime_error("expected '\"'");
118
+ ++i;
119
+ while (json_valid_index(i, text.size())){
120
+ char c = text[i++];
121
+ if (c == '"') break;
122
+ if (c == '\\'){
123
+ if (!json_valid_index(i, text.size())) break;
124
+ char e = text[i++];
125
+ if (e=='n') out.push_back('\n');
126
+ else if (e=='t') out.push_back('\t');
127
+ else out.push_back(e);
128
+ } else out.push_back(c);
129
+ }
130
+ return out;
131
+ }
132
+
133
+ static void skip_spaces(const std::string &s, size_t &i){
134
+ while (json_valid_index(i, s.size()) && is_space(s[i])) ++i;
135
+ }
136
+
137
+ // Very small JSON-like parser tailored to dictionary_json structure
138
+ static std::unordered_map<std::string,std::string> parse_dictionary_json(){
139
+ std::unordered_map<std::string,std::string> dict;
140
+ if (dictionary_json_len == 0) return dict;
141
+ std::string text; text.reserve(dictionary_json_len + 1);
142
+ for (unsigned int b=0; b < dictionary_json_len; ++b) text.push_back(static_cast<char>(dictionary_json[b]));
143
+ size_t i = 0;
144
+ skip_spaces(text,i);
145
+ if (!json_valid_index(i,text.size()) || text[i] != '{') return dict;
146
+ ++i;
147
+ while (true){
148
+ skip_spaces(text,i);
149
+ if (!json_valid_index(i,text.size())) break;
150
+ if (text[i] == '}'){ ++i; break; }
151
+ std::string key = parse_quoted_string(text,i);
152
+ skip_spaces(text,i);
153
+ if (!json_valid_index(i,text.size()) || text[i] != ':') break;
154
+ ++i;
155
+ skip_spaces(text,i);
156
+ std::string val;
157
+ if (json_valid_index(i,text.size()) && text[i] == '"') val = parse_quoted_string(text,i);
158
+ else {
159
+ size_t start = i;
160
+ while (json_valid_index(i,text.size()) && text[i] != ',' && text[i] != '}') ++i;
161
+ val = text.substr(start, i-start);
162
+ }
163
+ dict.emplace(std::move(key), std::move(val));
164
+ skip_spaces(text,i);
165
+ if (json_valid_index(i,text.size()) && text[i] == ','){ ++i; continue; }
166
+ if (json_valid_index(i,text.size()) && text[i] == '}'){ ++i; break; }
167
+ }
168
+ return dict;
169
+ }
170
+
171
+ // --------------------------- Build definition index (small funcs) ---------
172
+
173
+ static std::unordered_set<std::string> def_tokens_from_text(const std::string &s){
174
+ auto toks = tokenize_non_alnum(s);
175
+ return std::unordered_set<std::string>(toks.begin(), toks.end());
176
+ }
177
+
178
+ static void expand_def_index(const std::unordered_map<std::string,std::unordered_set<std::string>> &direct,
179
+ std::unordered_map<std::string,std::unordered_set<std::string>> &out,
180
+ int depth)
181
+ {
182
+ for (auto &pr : direct){
183
+ const std::string &word = pr.first;
184
+ std::unordered_set<std::string> acc = pr.second;
185
+ if (depth > 1){
186
+ std::vector<std::string> frontier(acc.begin(), acc.end());
187
+ for (int d=1; d<depth; ++d){
188
+ std::vector<std::string> nextf;
189
+ for (auto &w : frontier){
190
+ auto it = direct.find(w);
191
+ if (it==direct.end()) continue;
192
+ for (auto &t : it->second){
193
+ if (acc.insert(t).second) nextf.push_back(t);
194
+ }
195
+ }
196
+ if (nextf.empty()) break;
197
+ frontier.swap(nextf);
198
+ }
199
+ }
200
+ out.emplace(word, std::move(acc));
201
+ }
202
+ }
203
+
204
+ static std::unordered_map<std::string,std::unordered_set<std::string>>
205
+ build_definition_index(int depth)
206
+ {
207
+ std::unordered_map<std::string,std::unordered_set<std::string>> out;
208
+ if (depth <= 0) return out;
209
+ auto raw = parse_dictionary_json();
210
+ std::unordered_map<std::string,std::unordered_set<std::string>> direct;
211
+ for (auto &pr : raw) direct.emplace(pr.first, def_tokens_from_text(pr.second));
212
+ expand_def_index(direct, out, depth);
213
+ return out;
214
+ }
215
+
216
+ // --------------------------- Similarity helpers (very small) ----------------
217
+
218
+ static double jaccard_similarity(const std::unordered_set<std::string> &A,
219
+ const std::unordered_set<std::string> &B)
220
+ {
221
+ if (A.empty() && B.empty()) return 1.0;
222
+ size_t inter = 0;
223
+ if (A.size() < B.size()){
224
+ for (const auto &x : A) if (B.count(x)) ++inter;
225
+ } else {
226
+ for (const auto &x : B) if (A.count(x)) ++inter;
227
+ }
228
+ size_t uni = A.size() + B.size() - inter;
229
+ if (uni == 0) return 0.0;
230
+ return static_cast<double>(inter) / static_cast<double>(uni);
231
+ }
232
+
233
+ static std::unordered_set<std::string>
234
+ aggregate_sets(const std::vector<std::string> &tokens,
235
+ const std::unordered_map<std::string,std::unordered_set<std::string>> &def_index)
236
+ {
237
+ std::unordered_set<std::string> agg;
238
+ for (auto &t : tokens){
239
+ agg.insert(t);
240
+ auto it = def_index.find(t);
241
+ if (it != def_index.end()){
242
+ for (auto &d : it->second) agg.insert(d);
243
+ }
244
+ }
245
+ return agg;
246
+ }
247
+
248
+ // --------------------------- Candidate selection (short funcs) ---------------
249
+
250
+ static std::string best_candidate_by_similarity(const NextSet &cands,
251
+ const std::vector<std::string> &prompt_toks,
252
+ const std::vector<std::string> &resp_toks,
253
+ const std::unordered_map<std::string,std::unordered_set<std::string>> &def_index,
254
+ const std::unordered_map<std::string,int> &recent_counts,
255
+ double repeat_penalty)
256
+ {
257
+ if (cands.empty()) return std::string();
258
+ if (cands.size() == 1) return *cands[0];
259
+
260
+ auto agg = aggregate_sets(prompt_toks, def_index);
261
+ for (auto &r : resp_toks){
262
+ auto it = def_index.find(r);
263
+ if (it != def_index.end()) for (auto &d : it->second) agg.insert(d);
264
+ }
265
+
266
+ double best = -1e9;
267
+ std::string best_tok;
268
+ size_t M = cands.size();
269
+ std::vector<double> scores(M, 0.0);
270
+
271
+ #pragma omp parallel for schedule(static)
272
+ for (ptrdiff_t i=0;i<static_cast<ptrdiff_t>(M);++i){
273
+ std::unordered_set<std::string> candset;
274
+ candset.insert(*cands[(size_t)i]);
275
+ auto it = def_index.find(*cands[(size_t)i]);
276
+ if (it != def_index.end()) for (auto &d : it->second) candset.insert(d);
277
+ double s = jaccard_similarity(agg, candset);
278
+ scores[(size_t)i] = s;
279
+ }
280
+
281
+ for (size_t i=0;i<M;++i){
282
+ const std::string &tok = *cands[i];
283
+ double s = scores[i];
284
+ auto rc_it = recent_counts.find(tok);
285
+ int cnt = (rc_it==recent_counts.end()? 0 : rc_it->second);
286
+ double adjusted = s - repeat_penalty * static_cast<double>(cnt);
287
+ if (adjusted > best || (adjusted == best && tok < best_tok)){
288
+ best = adjusted;
289
+ best_tok = tok;
290
+ }
291
+ }
292
+ return best_tok;
293
+ }
294
+
295
+ // --------------------------- Response generator (short units) ---------------
296
+
297
+ static std::vector<std::string> generate_response(KnowledgeBase &kb,
298
+ const std::vector<std::string> &prompt_toks,
299
+ size_t maxlen,
300
+ const std::unordered_map<std::string,std::unordered_set<std::string>> &def_index,
301
+ double repeat_penalty)
302
+ {
303
+ std::vector<std::string> resp;
304
+ if (prompt_toks.empty() || maxlen == 0) return resp;
305
+ std::unordered_map<std::string,int> recent_counts;
306
+
307
+ auto would_create_2_cycle = [&](const std::string &cand) -> bool {
308
+ if (resp.size() < 2) return false;
309
+ // check alternation: X Y X Y ... then candidate == X and last == Y
310
+ const std::string &last = resp.back();
311
+ const std::string &prev = resp[resp.size()-2];
312
+ return (cand == prev && last == resp[resp.size()-3 < resp.size() ? resp.size()-3 : 0]);
313
+ // this is a cheap conservative check; main guard is repeat_penalty + single-candidate rule
314
+ };
315
+
316
+ std::string last_printed;
317
+ for (size_t step=0; step<maxlen; ++step){
318
+ NextSet candidates;
319
+ bool found = false;
320
+ if (step==0){
321
+ for (ssize_t p = static_cast<ssize_t>(prompt_toks.size())-1; p>=0; --p){
322
+ auto opt = kb.lookup_by_string(prompt_toks[(size_t)p]);
323
+ if (opt){ candidates = *opt; found = true; break; }
324
+ }
325
+ } else {
326
+ auto opt = kb.lookup_by_string(last_printed);
327
+ if (opt){ candidates = *opt; found = true; }
328
+ else {
329
+ for (ssize_t p = static_cast<ssize_t>(prompt_toks.size())-1; p>=0; --p){
330
+ auto opt2 = kb.lookup_by_string(prompt_toks[(size_t)p]);
331
+ if (opt2){ candidates = *opt2; found = true; break; }
332
+ }
333
+ }
334
+ }
335
+ if (!found || candidates.empty()) break;
336
+
337
+ // If only one candidate and it already appeared, stop to avoid 1-cycle.
338
+ if (candidates.size()==1){
339
+ std::string only = *candidates[0];
340
+ if (recent_counts[only] > 0) break;
341
+ resp.push_back(only);
342
+ recent_counts[only] += 1;
343
+ last_printed = only;
344
+ continue;
345
+ }
346
+
347
+ // choose best with repeat penalty
348
+ std::string chosen = best_candidate_by_similarity(candidates, prompt_toks, resp, def_index, recent_counts, repeat_penalty);
349
+ if (chosen.empty()) break;
350
+
351
+ // cheap 2-cycle avoider: if this would continue a trivial alternation, stop
352
+ if (would_create_2_cycle(chosen)) break;
353
+
354
+ resp.push_back(chosen);
355
+ recent_counts[chosen] += 1;
356
+ last_printed = chosen;
357
+ }
358
+ return resp;
359
+ }
360
+
361
+ // --------------------------- Learning from files (short) -------------------
362
+
363
+ static void learn_from_file(KnowledgeBase &kb, const std::string &fname){
364
+ std::ifstream ifs(fname);
365
+ if (!ifs) return;
366
+ std::string tok;
367
+ std::string prev;
368
+ bool have_prev = false;
369
+ while (ifs >> tok){
370
+ if (have_prev) kb.add_pair(prev, tok);
371
+ prev = tok; have_prev = true;
372
+ }
373
+ }
374
+
375
+ static void learn_files_parallel(KnowledgeBase &kb, const std::vector<std::string> &files){
376
+ #pragma omp parallel for schedule(dynamic)
377
+ for (ptrdiff_t i=0;i<static_cast<ptrdiff_t>(files.size());++i) learn_from_file(kb, files[(size_t)i]);
378
+ }
379
+
380
+ // --------------------------- Serialization (short functions) ----------------
381
+
382
+ // File format documented in comments near functions
383
+ static void save_kb_binary(const KnowledgeBase &kb, const std::string &fname){
384
+ std::ofstream ofs(fname, std::ios::binary);
385
+ if (!ofs) throw std::runtime_error("cannot open save file");
386
+ std::vector<const std::string*> interned;
387
+ interned.reserve(kb.interner.pool.size());
388
+ for (auto &s : kb.interner.pool) interned.push_back(&s);
389
+ uint64_t N = interned.size();
390
+ ofs.write(reinterpret_cast<const char*>(&N), sizeof(N));
391
+ for (auto p : interned){
392
+ uint64_t L = p->size();
393
+ ofs.write(reinterpret_cast<const char*>(&L), sizeof(L));
394
+ ofs.write(p->data(), static_cast<std::streamsize>(L));
395
+ }
396
+ uint64_t E = kb.next.size();
397
+ ofs.write(reinterpret_cast<const char*>(&E), sizeof(E));
398
+ for (auto &pr : kb.next){
399
+ // find index of key
400
+ const std::string &key = *pr.first;
401
+ auto it = std::find_if(interned.begin(), interned.end(), [&](const std::string* s){ return *s == key; });
402
+ if (it == interned.end()) throw std::runtime_error("save index error");
403
+ uint64_t key_idx = static_cast<uint64_t>(std::distance(interned.begin(), it));
404
+ ofs.write(reinterpret_cast<const char*>(&key_idx), sizeof(key_idx));
405
+ uint64_t M = pr.second.size();
406
+ ofs.write(reinterpret_cast<const char*>(&M), sizeof(M));
407
+ for (auto nxt : pr.second){
408
+ auto it2 = std::find_if(interned.begin(), interned.end(), [&](const std::string* s){ return *s == *nxt; });
409
+ if (it2 == interned.end()) throw std::runtime_error("save index error2");
410
+ uint64_t v_idx = static_cast<uint64_t>(std::distance(interned.begin(), it2));
411
+ ofs.write(reinterpret_cast<const char*>(&v_idx), sizeof(v_idx));
412
+ }
413
+ }
414
+ safe_flush(ofs);
415
+ }
416
+
417
+ static void load_kb_binary(KnowledgeBase &kb, const std::string &fname){
418
+ std::ifstream ifs(fname, std::ios::binary);
419
+ if (!ifs) throw std::runtime_error("cannot open load file");
420
+ uint64_t N;
421
+ ifs.read(reinterpret_cast<char*>(&N), sizeof(N));
422
+ std::vector<std::string> strings; strings.reserve((size_t)N);
423
+ for (uint64_t i=0;i<N;++i){
424
+ uint64_t L; ifs.read(reinterpret_cast<char*>(&L), sizeof(L));
425
+ std::string s; s.resize((size_t)L);
426
+ ifs.read(&s[0], static_cast<std::streamsize>(L));
427
+ strings.push_back(std::move(s));
428
+ }
429
+ std::vector<StrPtr> ptrs; ptrs.reserve(strings.size());
430
+ for (auto &s : strings) ptrs.push_back(kb.interner.intern(s));
431
+ uint64_t E; ifs.read(reinterpret_cast<char*>(&E), sizeof(E));
432
+ for (uint64_t i=0;i<E;++i){
433
+ uint64_t key_idx; ifs.read(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
434
+ uint64_t M; ifs.read(reinterpret_cast<char*>(&M), sizeof(M));
435
+ StrPtr key_ptr = ptrs.at((size_t)key_idx);
436
+ NextSet vec; vec.reserve((size_t)M);
437
+ for (uint64_t j=0;j<M;++j){
438
+ uint64_t v_idx; ifs.read(reinterpret_cast<char*>(&v_idx), sizeof(v_idx));
439
+ vec.push_back(ptrs.at((size_t)v_idx));
440
+ }
441
+ kb.next.emplace(key_ptr, std::move(vec));
442
+ }
443
+ }
444
+
445
+ // --------------------------- CLI + Interactive loop (shorters) -----------
446
+
447
+ static void print_usage(const char *p){
448
+ std::cout << "Usage: " << p << " [--maxlen N] [--save FILE] [--load-kb FILE] [--dict-depth D] [--learn f1 f2 ...]\n";
449
+ }
450
+
451
+ int main(int argc, char **argv){
452
+ size_t maxlen = 100;
453
+ std::string savefile;
454
+ std::string load_txt;
455
+ std::string load_kb;
456
+ int dict_depth = 2;
457
+ double repeat_penalty = 0.7; // default 位
458
+ std::vector<std::string> learn_files;
459
+
460
+ for (int i=1;i<argc;++i){
461
+ std::string a = argv[i];
462
+ if (a=="--help"){ print_usage(argv[0]); return 0; }
463
+ if (a=="--maxlen" && i+1<argc){ maxlen = std::stoul(argv[++i]); continue; }
464
+ if (a=="--save" && i+1<argc){ savefile = argv[++i]; continue; }
465
+ if (a=="--load-kb" && i+1<argc){ load_kb = argv[++i]; continue; }
466
+ if (a=="--dict-depth" && i+1<argc){ dict_depth = std::stoi(argv[++i]); continue; }
467
+ if (a=="--repeat-penalty" && i+1<argc){ repeat_penalty = std::stod(argv[++i]); continue; }
468
+ if (a=="--learn"){
469
+ ++i;
470
+ for (; i<argc; ++i){
471
+ if (!argv[i]) break;
472
+ std::string s = argv[i];
473
+ if (!s.empty() && s[0]=='-'){ --i; break; }
474
+ learn_files.push_back(s);
475
+ }
476
+ continue;
477
+ }
478
+ learn_files.push_back(a);
479
+ }
480
+
481
+ KnowledgeBase kb;
482
+
483
+ if (!load_kb.empty()){
484
+ try { load_kb_binary(kb, load_kb); std::cerr << "Loaded KB: " << load_kb << "\n"; }
485
+ catch (const std::exception &e){ std::cerr << "Load KB error: " << e.what() << "\n"; }
486
+ }
487
+
488
+ if (!learn_files.empty()){
489
+ std::cerr << "Learning from file/s (" << learn_files.size() << ") using threads=" << omp_get_max_threads() << "\n";
490
+ learn_files_parallel(kb, learn_files);
491
+ }
492
+
493
+ auto def_index = build_definition_index(dict_depth);
494
+ if (!def_index.empty()) std::cerr << "Dictionary depth " << dict_depth << " loaded (" << def_index.size() << " words)\n";
495
+
496
+ std::string line;
497
+ std::cout << "Ready. Enter prompts.\n";
498
+ while (std::cout << "> " , std::getline(std::cin, line)){
499
+ if (line.empty()){ std::cout << "\n"; continue; }
500
+ auto prompt_toks = tokenize_whitespace(line);
501
+ for (size_t i=1;i<prompt_toks.size();++i) kb.add_pair(prompt_toks[i-1], prompt_toks[i]);
502
+ auto resp = generate_response(kb, prompt_toks, maxlen, def_index, repeat_penalty);
503
+ for (size_t i=0;i<resp.size();++i){ std::cout << resp[i]; if (i+1<resp.size()) std::cout << ' '; }
504
+ std::cout << "\n";
505
+ if (!resp.empty()){
506
+ std::vector<std::string> combined = prompt_toks;
507
+ combined.insert(combined.end(), resp.begin(), resp.end());
508
+ for (size_t i=1;i<combined.size();++i) kb.add_pair(combined[i-1], combined[i]);
509
+ }
510
+ if (!savefile.empty()){
511
+ try { save_kb_binary(kb, savefile); std::cerr << "Saved KB: " << savefile << "\n"; }
512
+ catch (const std::exception &e){ std::cerr << "Save KB error: " << e.what() << "\n"; }
513
+ }
514
+ }
515
+
516
+ if (!savefile.empty()){
517
+ try { save_kb_binary(kb, savefile); std::cerr << "Saved KB: " << savefile << "\n"; }
518
+ catch (const std::exception &e){ std::cerr << "Save KB error: " << e.what() << "\n"; }
519
+ }
520
+
521
+ return 0;
522
+ }
ChatIPC.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f67f38cd598779cf67847c62456380856ded672c2a93c8b57a1ae53900a6bef3
3
+ size 24020480