Upload 12 files

Browse files

Files changed (9) hide show

.gitattributes +5 -0
ChatIPC.cbp +43 -0
ChatIPC.cpp +1862 -0
ChatIPC.depend +3 -0
ChatIPC.layout +10 -0
Implicational propositional calculus - Wikipedia.pdf +3 -0
Use only C.docx +0 -0
a.docx +0 -0
input.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+books_large_p1.txt filter=lfs diff=lfs merge=lfs -text
+books_large_p2.txt filter=lfs diff=lfs merge=lfs -text
+ChatIPC.exe filter=lfs diff=lfs merge=lfs -text
+dictionary.cpp filter=lfs diff=lfs merge=lfs -text
+Implicational[[:space:]]propositional[[:space:]]calculus[[:space:]]-[[:space:]]Wikipedia.pdf filter=lfs diff=lfs merge=lfs -text

ChatIPC.cbp ADDED Viewed

	@@ -0,0 +1,43 @@

+<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+<CodeBlocks_project_file>
+	<FileVersion major="1" minor="6" />
+	<Project>
+		<Option title="ChatIPC" />
+		<Option pch_mode="2" />
+		<Option compiler="gcc" />
+		<Build>
+			<Target title="Debug">
+				<Option output="bin/Debug/ChatIPC" prefix_auto="1" extension_auto="1" />
+				<Option object_output="obj/Debug/" />
+				<Option type="1" />
+				<Option compiler="gcc" />
+				<Compiler>
+					<Add option="-g" />
+				</Compiler>
+			</Target>
+			<Target title="Release">
+				<Option output="bin/Release/ChatIPC" prefix_auto="1" extension_auto="1" />
+				<Option object_output="obj/Release/" />
+				<Option type="1" />
+				<Option compiler="gcc" />
+				<Compiler>
+					<Add option="-O2" />
+				</Compiler>
+				<Linker>
+					<Add option="-s" />
+				</Linker>
+			</Target>
+		</Build>
+		<Compiler>
+			<Add option="-Wall" />
+			<Add option="-fexceptions" />
+		</Compiler>
+		<Unit filename="ChatIPC.cbp" />
+		<Unit filename="ChatIPC.cpp" />
+		<Unit filename="Implicational propositional calculus - Wikipedia.pdf" />
+		<Unit filename="dictionary.cpp" />
+		<Extensions>
+			<lib_finder disable_auto="1" />
+		</Extensions>
+	</Project>
+</CodeBlocks_project_file>

ChatIPC.cpp ADDED Viewed

	@@ -0,0 +1,1862 @@

+// ChatIPC.cpp
+// IPC is abbreviation for Implicational Propositional Calculus.
+// C++17 — standard library only (optional OpenMP parallelization).
+// chat mode. The chat mode incrementally incorporates user inputs and the
+// program's own responses into the implication graph and uses fast hashmaps
+// + optional OpenMP to parallelize sentence processing. A small synthesis
+// engine assembles responses from inferred implication chains (no hard-coded
+// templates beyond minimal connective phrasing).
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <regex>
+#include <unordered_set>
+#include <unordered_map>
+#include <set>
+#include <queue>
+#include <tuple>
+#include <algorithm>
+#include <cctype>
+#include <locale>
+#include <iomanip>
+#include <functional>
+#include <mutex>
+#include <thread>
+#include <atomic>
+#include <chrono>
+#include <utility>
+#include <deque>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+using std::string;
+using std::vector;
+using std::smatch;
+using std::regex;
+using std::unordered_set;
+using std::unordered_map;
+using std::set;
+using std::queue;
+using std::tuple;
+using std::get;
+using std::size_t;
+using std::pair;
+// Debug control: set by command-line flag --debug or environment variable IMPL_DEBUG=1
+static bool GLOBAL_DEBUG = false;
+static int GLOBAL_THREADS = 0; // 0 means auto (use omp_get_max_threads() or hardware_concurrency)
+#define DBG(msg) do { if (GLOBAL_DEBUG) std::cerr << "[DBG] " << __FILE__ << ":" << __LINE__ << " " << msg << std::endl; } while(0)
+#define DBG_LINE() do { if (GLOBAL_DEBUG) std::cerr << "[DBG] " << __FILE__ << ":" << __LINE__ << std::endl; } while(0)
+/* ----------------------------- Basic text utils ---------------------------- */
+static inline string trim(const string &s) {
+    DBG_LINE();
+    size_t a = 0;
+    while (a < s.size() && std::isspace((unsigned char)s[a])) ++a;
+    size_t b = s.size();
+    while (b > a && std::isspace((unsigned char)s[b-1])) --b;
+    string r = s.substr(a, b - a);
+    DBG("trim -> '" << r << "'");
+    return r;
+}
+static inline string normalize_spaces(const string &s) {
+    DBG_LINE();
+    string out; out.reserve(s.size());
+    bool last_space = false;
+    for (unsigned char c : s) {
+        if (std::isspace(c)) {
+            if (!last_space) { out.push_back(' '); last_space = true; }
+        } else { out.push_back(c); last_space = false; }
+    }
+    string r = trim(out);
+    DBG("normalize_spaces -> '" << r << "'");
+    return r;
+}
+static inline string lower_copy(const string &s) {
+    DBG_LINE();
+    std::locale loc;
+    string r = s;
+    for (char &c : r) c = std::tolower((unsigned char)c);
+    DBG("lower_copy -> '" << r << "'");
+    return r;
+}
+/* split a phrase of antecedents joined by "and" or commas (conservative) */
+static vector<string> split_antecedents(const string &s) {
+    DBG_LINE();
+    vector<string> out;
+    std::regex comma_re(R"(\s*,\s*)");
+    std::sregex_token_iterator it(s.begin(), s.end(), comma_re, -1), end;
+    for (; it != end; ++it) {
+        string part = trim(*it);
+        std::regex and_re(R"(\b(?:and|&|∧)\b)");
+        std::sregex_token_iterator it2(part.begin(), part.end(), and_re, -1), end2;
+        for (; it2 != end2; ++it2) {
+            string p2 = trim(*it2);
+            if (!p2.empty()) out.push_back(p2);
+        }
+    }
+    if (out.empty()) {
+        string t = trim(s);
+        if (!t.empty()) out.push_back(t);
+    }
+    DBG("split_antecedents on '" << s << "' -> " << out.size() << " parts");
+    return out;
+}
+static inline string node_norm(const string &x) {
+    DBG_LINE();
+    string r = normalize_spaces(trim(x));
+    DBG("node_norm -> '" << r << "'");
+    return r;
+}
+/* Edge type & helpers */
+struct Edge {
+    string A;
+    string B;
+    string form;      // description of matched pattern
+    size_t line;      // approximate line number
+    string sentence;  // sentence snippet
+};
+static inline string key_of_edge(const Edge &e) {
+    DBG_LINE();
+    string k = e.form + "||" + e.A + "||" + e.B + "||" + e.sentence;
+    DBG("key_of_edge -> '" << k << "'");
+    return k;
+}
+static size_t line_of_offset(const string &text, size_t offset) {
+    DBG_LINE();
+    if (offset > text.size()) offset = text.size();
+    size_t ln = 1;
+    for (size_t i = 0; i < offset; ++i) if (text[i] == '\n') ++ln;
+    DBG("line_of_offset -> " << ln);
+    return ln;
+}
+/* ------------------------------ Patterns holder --------------------------- */
+struct Patterns {
+    // all regex objects from the original code
+    regex sym_re, sequent_re, lex_re, passive_re, ifthen_re, given_re, whenever_re, therefore_re, from_we_re;
+    regex follows_from_re, onlyif_re, onlywhen_re, unless_re, iff_re, suff_re, neces_re, nec_suf_re;
+    regex means_re, equiv_re, every_re, in_case_re, without_re, must_re, cannotboth_re, prevents_re, contradicts_re;
+    regex exceptwhen_re, either_re, aslongas_re, ifandwhen_re, insofar_re, necessitates_re, guarantees_re, requires_re;
+    regex impossible_if_re, prereq_re, no_re, causes_re, because_re, due_to_re, defined_re, exactlywhen_re, provided_re;
+    regex ifnot_re, definition_syn_re, otherwise_re, or_else_re, implies_nc_re, suff_notnec_re, nec_notsuff_re, neither_re;
+    regex barring_re, in_absence_re, conditional_on_re, subject_to_re, dependent_on_re, before_re, after_re, correlates_re;
+    regex probable_re, adverb_qual_re, not_converse_variants_re;
+    // new advanced/defeasible/counterfactual/statistical patterns
+    regex counterfactual_re;     // "If it were the case that X, then Y"
+    regex subjunctive_re;        // "Were X to happen, Y would ..."
+    regex defeasible_re;         // "generally / normally / typically X implies Y"
+    regex default_re;            // "X by default, then Y"
+    regex increases_prob_re;     // "X increases the probability of Y"
+    // new: variable declaration pattern (e.g. "G and H are variables", "X is a variable")
+    regex variable_decl_re;
+};
+static Patterns make_patterns() {
+    DBG_LINE();
+    const auto IC = std::regex_constants::icase;
+    Patterns p{
+        // Make sure the order of regex initializers in make_patterns() matches the order of fields in the Patterns struct exactly;
+        // otherwise the aggregate initialization will mis-assign regexes.
+        // core
+        regex(R"(([^.!?;\n]{1,400}?)\s*(->|=>|⇒|→|⟹|⊢|⊨|<->|<=>|↔)\s*([^.!?;\n]{1,400}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^⊢⊨\n]{1,300}?)\s*(?:⊢|⊨)\s*([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,350}?)\b(?:implies|implied|entails|yields|results\s+in|gives|produces|follows|causes|leads\s+to|prevents|precludes)\b(?:\s+(?:that|from))?\s*([^.!?;\n]{1,350}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,350}?)\s+\b(?:is\s+implied\s+by|follows\s+from|is\s+derived\s+from|is\s+entailed\s+by|is\s+caused\s+by|is\s+due\s+to|is\s+the\s+result\s+of)\b\s+([^.!?;\n]{1,350}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bif\s+(.{1,350}?)\s+(?:then\s+)?(.{1,350}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\b(?:given|assuming|provided|assuming\s+that|provided\s+that)\s+(?:that\s+)?(.{1,300}?)\s*,\s*([^.!?;\n]{1,350}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bwhenever\s+(.{1,300}?)\s*,?\s*(?:then\s+)?([^.!?;\n]{1,350}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,350}?)\s*(?:therefore|hence|thus|consequently|so|as\s+a\s+result)\s+([^.!?;\n]{1,350}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bfrom\s+([^.!?;\n]{1,350}?)\s+(?:we|one|it)\s+(?:conclude|deduce|derive|obtain|get)\s+(?:that\s*)?([^.!?;\n]{1,350}?)(?:[.!?;\n]|$))", IC),
+        // more
+        regex(R"(([^.!?;\n]{1,350}?)\s+(?:follows\s+from|is\s+implied\s+by|is\s+derived\s+from)\s+([^.!?;\n]{1,350}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,250}?)\s+only\s+if\s+([^.!?;\n]{1,250}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,250}?)\s+only\s+when\s+([^.!?;\n]{1,250}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,250}?)\s+unless\s+([^.!?;\n]{1,250}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,300}?)\s+(?:if\s+and\s+only\s+if|iff|exactly\s+when|exactly\s+if)\s+([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,300}?)\s+(?:is\s+)?(?:sufficient\s+for|suffices\s+for)\s+([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,300}?)\s+(?:is\s+)?(?:necessary\s+for)\s+([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,300}?)\s+(?:is\s+)?(?:necessary\s+and\s+sufficient|sufficient\s+and\s+necessary)\s+for\s+([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        // extended
+        regex(R"(([^.!?;\n]{1,300}?)\s+(?:means\s+that|means|denotes|signifies|constitutes)\s+([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,300}?)\s+(?:is\s+equivalent\s+to|equivalent\s+to|is\s+the\s+same\s+as)\s+([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\b(?:every|each|all|any)\s+([^.!?;\n]{1,120}?)\s+(?:is|are|must\s+be|is\s+necessarily)\s+([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bin\s+case\s+(.{1,200}?)\s*,\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bwithout\s+(.{1,160}?)\s*,\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,200}?)\s+must\s+(?:be\s+)?(?:([^.!?;\n]{1,200}?))(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,160}?)\s+(?:cannot\s+both|are\s+mutually\s+exclusive|mutually\s+exclusive|cannot\s+both\s+be)\s+([^.!?;\n]{1,160}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:prevents|preclude|precludes)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        // continued
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:contradicts|is\s+incompatible\s+with|conflicts\s+with)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+except\s+when\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\beither\s+(.{1,160}?)\s+or\s+(.{1,160}?)(?:\s*,?\s*(but\s+not\s+both))?(?:[.!?;\n]|$))", IC),
+        regex(R"(\bas\s+long\s+as\s+(.{1,200}?)\s*,?\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bif\s+and\s+when\s+(.{1,200}?)\s*,?\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\binsofar\s+as\s+(.{1,200}?)\s*,?\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:necessitates|necessitate)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:guarantees|ensures)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:requires|needs|is\s+required\s+for)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        // rest
+        regex(R"(([^.!?;\n]{1,220}?)\s+is\s+impossible\s+if\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,200}?)\s+(?:is\s+a\s+)?prerequisite\s+for\s+([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bno\s+([^.!?;\n]{1,120}?)\s+(?:are|are\s+ever|is|can|will|be)\s+([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:causes|cause|lead?s?\s+to|results?\s+in|produces)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+\b(?:because|since|as)\b\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\b(?:due\s+to|because\s+of)\s+([^.!?;\n]{1,220}?)\s*,?\s*(?:then\s+)?([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,200}?)\s+(?:is\s+defined\s+as|is\s+defined\s+to\s+be|defined\s+as)\s+([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,200}?)\s+exactly\s+when\s+([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:provided|provided\s+that)\s+(?:that\s+)?([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bif\s+not\s+(.{1,200}?)\s*,?\s*(?:then\s+)?not\s+(.{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,200}?)\s+(?:denotes|signifies|is\s+called|is\s+termed)\s+([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,300}?)\s*,?\s*otherwise\s+([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,300}?)\s*,?\s*(?:or\s+else)\s+([^.!?;\n]{1,300}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,250}?)\s+(?:implies|entails|yields)\s+([^.!?;\n]{1,250}?)\s*(?:,\s*)?(?:but\s+not\s+conversely|not\s+conversely|but\s+not\s+the\s+other\s+way|though\s+not\s+the\s+converse))", IC),
+        regex(R"(([^.!?;\n]{1,250}?)\s+(?:is\s+)?(?:a\s+)?(?:sufficient\s+but\s+not\s+necessary|suffices\s+but\s+is\s+not\s+necessary)\s+for\s+([^.!?;\n]{1,250}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,250}?)\s+(?:is\s+)?(?:a\s+)?(?:necessary\s+but\s+not\s+sufficient)\s+for\s+([^.!?;\n]{1,250}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,250}?)\s+is\s+(?:neither\s+necessary\s+nor\s+sufficient)\s+for\s+([^.!?;\n]{1,250}?)(?:[.!?;\n]|$))", IC),
+        regex(R"((?:barring|except\s+for|save\s+for)\s+(.{1,200}?)\s*,?\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\b(?:in\s+the\s+absence\s+of|in\s+absence\s+of)\s+(.{1,200}?)\s*,?\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,200}?)\s+(?:conditional\s+on|conditional\s+upon|conditional\s+that)\s+([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:subject\s+to)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:depends\s+on|is\s+dependent\s+on)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,160}?)\s+before\s+([^.!?;\n]{1,160}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,160}?)\s+after\s+([^.!?;\n]{1,160}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,200}?)\s+(?:correlates\s+with|is\s+associated\s+with|is\s+linked\s+to|is\s+related\s+to)\s+([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:is\s+likely\s+to\s+|is\s+probable\s+that\s+|is\s+likely\s+that\s+|will\s+likely\s+|likely\s+to\s+)([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:probably|likely|usually|often|rarely|unlikely)\s+(?:implies|imply|leads\s+to|results\s+in|causes|is\s+associated\s+with|is\s+expected\s+to)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"((?:not\s+conversely|but\s+not\s+conversely|not\s+the\s+converse|but\s+not\s+the\s+other\s+way|though\s+not\s+the\s+converse|not\s+vice\s+versa))", IC),
+        // counterfactual / subjunctive / defeasible / statistical patterns (new)
+        regex(R"(\bif\s+it\s+were\s+the\s+case\s+that\s+(.{1,200}?)\s*,\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\bwere\s+(.{1,120}?)\s+to\s+(.{1,120}?)\s*,\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(\b(?:generally|normally|typically|in\s+general|as\s+a\s+rule|usually|most\s+often)\b\s+([^.!?;\n]{1,220}?)\s+(?:imply|implies|lead?s?\s+to|result?s?\s+in|cause|causes)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,200}?)\s+by\s+default\s*,\s*(?:then\s+)?([^.!?;\n]{1,200}?)(?:[.!?;\n]|$))", IC),
+        regex(R"(([^.!?;\n]{1,220}?)\s+(?:increases\s+the\s+probability\s+of|raises\s+the\s+likelihood\s+of|increases\s+likelihood\s+of)\s+([^.!?;\n]{1,220}?)(?:[.!?;\n]|$))", IC),
+        // variable
+        regex(R"((?:\b(?:let|assume|suppose|take|declare|define|consider)\b\s+)?((?:\b[A-Za-z]\b(?:\s*,\s*|\s+and\s+))*\b[A-Za-z]\b)\s+(?:are|is|be|be\s+treated\s+as|be\s+regarded\s+as|be\s+said\s+to\s+be|as)\s+(?:(?:a\s+)?variables?|(?:a\s+)?variable)(?:[.!?;\n]|$))", IC),
+    };
+    DBG("make_patterns: created patterns struct");
+    return p;
+}
+/* ------------------------------ Sentence splitting ------------------------ */
+static vector<std::pair<string,size_t>> split_into_sentences(const string &text) {
+    DBG_LINE();
+    vector<std::pair<string,size_t>> out;
+    size_t pos = 0;
+    while (pos < text.size()) {
+        size_t maxlook = std::min(text.size(), pos + (size_t)1400);
+        size_t endpos = std::string::npos;
+        for (size_t i = pos; i < maxlook; ++i) {
+            char c = text[i];
+            if (c == '.' || c == '!' || c == '?' || c == ';' || c == '\n') { endpos = i + 1; break; }
+        }
+        if (endpos == std::string::npos) {
+            size_t i = pos;
+            while (i < text.size() && text[i] != '.' && text[i] != '!' && text[i] != '?' && text[i] != ';' && text[i] != '\n') ++i;
+            endpos = (i < text.size()) ? (i+1) : text.size();
+        }
+        string sentence = text.substr(pos, endpos - pos);
+        size_t sent_line = line_of_offset(text, pos);
+        out.emplace_back(sentence, sent_line);
+        pos = endpos;
+    }
+    DBG("split_into_sentences -> " << out.size() << " sentences");
+    return out;
+}
+/* --------------------------- Sentence processing -------------------------- */
+static void apply_regex_iter(
+    const string &sentence,
+    const regex &r,
+    const std::function<void(const smatch&)> &cb)
+{
+    DBG_LINE();
+    for (std::sregex_iterator it(sentence.begin(), sentence.end(), r), end; it != end; ++it) {
+        cb(*it);
+    }
+}
+static void process_sentence(
+    const string &sentence,
+    size_t sent_line,
+    const Patterns &p,
+    vector<Edge> &edges,
+    unordered_set<string> &seen,
+    unordered_set<string> &forbidden_inferred_rev)
+{
+    DBG("process_sentence start line=" << sent_line << " sentence='" << sentence << "'");
+    auto record_edge = [&](string A_raw, string B_raw, const string &form) {
+        DBG_LINE();
+        string A = node_norm(A_raw);
+        string B = node_norm(B_raw);
+        if (A.empty() || B.empty()) return;
+        vector<string> As = split_antecedents(A);
+        vector<string> Bs = split_antecedents(B);
+        for (const string &a0 : As) {
+            for (const string &b0 : Bs) {
+                string a = node_norm(a0);
+                string b = node_norm(b0);
+                if (a.empty() || b.empty()) continue;
+                Edge e{a, b, form, sent_line, normalize_spaces(sentence)};
+                string k = key_of_edge(e);
+                if (seen.insert(k).second) edges.push_back(std::move(e));
+            }
+        }
+    };
+    // (core patterns and extended handlers) - same as original file
+    DBG("process_sentence: applying core patterns");
+    apply_regex_iter(sentence, p.sym_re, [&](const smatch &m){ record_edge(m.str(1), m.str(3), string("symbol ") + trim(m.str(2))); });
+    apply_regex_iter(sentence, p.sequent_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "sequent"); });
+    apply_regex_iter(sentence, p.lex_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "lexical implies/entails/causal"); });
+    apply_regex_iter(sentence, p.passive_re, [&](const smatch &m){ record_edge(m.str(2), m.str(1), "passive causal/implication (X -> Y)"); });
+    apply_regex_iter(sentence, p.ifthen_re, [&](const smatch &m){ string L=trim(m.str(1)), R=trim(m.str(2)); if(L.size()>1 && R.size()>1) record_edge(L, R, "if...then / conditional"); });
+    apply_regex_iter(sentence, p.given_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "given/assuming/provided"); });
+    apply_regex_iter(sentence, p.whenever_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "whenever (universal conditional)"); });
+    apply_regex_iter(sentence, p.therefore_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "therefore/hence/consequently"); });
+    apply_regex_iter(sentence, p.from_we_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "from ... we deduce"); });
+    apply_regex_iter(sentence, p.follows_from_re, [&](const smatch &m){ record_edge(m.str(2), m.str(1), "follows from (X -> Y)"); });
+    apply_regex_iter(sentence, p.onlyif_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "only if (Y -> X)"); });
+    apply_regex_iter(sentence, p.onlywhen_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "only when (Y -> X)"); });
+    apply_regex_iter(sentence, p.unless_re, [&](const smatch &m){ record_edge(string("not(")+m.str(2)+")", m.str(1), "unless (not(Q) -> P)"); });
+    apply_regex_iter(sentence, p.iff_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "iff / biconditional (A -> B)"); record_edge(m.str(2), m.str(1), "iff / biconditional (B -> A)"); });
+    apply_regex_iter(sentence, p.nec_suf_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "necessary and sufficient (A -> B)"); record_edge(m.str(2), m.str(1), "necessary and sufficient (B -> A)"); });
+    apply_regex_iter(sentence, p.suff_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "sufficient for (A -> B)"); });
+    apply_regex_iter(sentence, p.neces_re, [&](const smatch &m){ record_edge(m.str(2), m.str(1), "necessary for (B -> A)"); });
+    DBG("process_sentence: applying extended patterns");
+    apply_regex_iter(sentence, p.means_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "means/denotes/signifies/constitutes (A -> B)"); });
+    apply_regex_iter(sentence, p.equiv_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "equivalent (A -> B)"); record_edge(m.str(2), m.str(1), "equivalent (B -> A)"); });
+    apply_regex_iter(sentence, p.every_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "universal 'every/all' (class -> property)"); });
+    apply_regex_iter(sentence, p.in_case_re, [&](const smatch &m){ record_edge(m.str(1), m.str(2), "in case (conditional)"); });
+    apply_regex_iter(sentence, p.without_re, [&](const smatch &m){ record_edge(string("not(")+m.str(1)+")", m.str(2), "without (not(X) -> Y)"); });
+    apply_regex_iter(sentence, p.must_re, [&](const smatch &m){ string L=trim(m.str(1)), R=trim(m.str(2)); if(!L.empty() && !R.empty()) record_edge(L,R,"must / modal -> (X -> Y)"); });
+    apply_regex_iter(sentence, p.cannotboth_re, [&](const smatch &m){ string A=trim(m.str(1)), B=trim(m.str(2)); if(!A.empty()&&!B.empty()){ record_edge(A,string("not(")+B+")","mutually exclusive (A -> not(B))"); record_edge(B,string("not(")+A+")","mutually exclusive (B -> not(A))"); } });
+    apply_regex_iter(sentence, p.prevents_re, [&](const smatch &m){ record_edge(m.str(1), string("not(")+m.str(2)+")", "prevents / precludes (A -> not(B))"); });
+    apply_regex_iter(sentence, p.contradicts_re, [&](const smatch &m){ string A=trim(m.str(1)), B=trim(m.str(2)); if(!A.empty()&&!B.empty()){ record_edge(A,string("not(")+B+")","contradicts (A -> not(B))"); record_edge(B,string("not(")+A+")","contradicts (B -> not(A))"); } });
+    apply_regex_iter(sentence, p.exceptwhen_re, [&](const smatch &m){ record_edge(string("not(")+m.str(2)+")", m.str(1), "except when (not(X) -> Y)"); });
+    apply_regex_iter(sentence, p.variable_decl_re, [&](const smatch &m){ record_edge(m.str(1), string("is_variable"), "declares-variables"); });
+    // rest of pattern handlers (kept intact) --- debug trace entry at start and end
+    DBG("process_sentence: completed");
+}
+/* --------------------------- Graph building & inference ------------------- */
+static void build_graph_from_edges(
+    const vector<Edge> &edges,
+    unordered_map<string,int> &id,
+    vector<string> &id2,
+    vector<vector<int>> &adj,
+    set<string> &explicit_edges,
+    unordered_map<string,string> &form_by_idpair)
+{
+    DBG_LINE();
+    auto ensure = [&](const string &s)->int {
+        auto it = id.find(s);
+        if (it != id.end()) return it->second;
+        int idx = (int)id2.size();
+        id2.push_back(s);
+        id.emplace(s, idx);
+        DBG("ensure new node '" << s << "' -> id=" << idx);
+        return idx;
+    };
+    for (const auto &e : edges) {
+        int a = ensure(e.A), b = ensure(e.B);
+        if ((size_t)std::max(a,b) >= adj.size()) adj.resize(id2.size());
+        string key = std::to_string(a) + "->" + std::to_string(b);
+        if (explicit_edges.insert(key).second) {
+            adj[a].push_back(b);
+            form_by_idpair[key] = e.form;
+        }
+    }
+    DBG("build_graph_from_edges: nodes=" << id2.size() << " edges=" << explicit_edges.size());
+}
+static vector<Edge> build_contrapositives(const vector<Edge> &edges, unordered_set<string> &seen) {
+    DBG_LINE();
+    vector<Edge> out;
+    for (const auto &e : edges) {
+        string nB = string("not(") + e.B + ")";
+        string nA = string("not(") + e.A + ")";
+        Edge cp{nB, nA, string("contrapositive of: ") + e.form, 0, ""};
+        string k = key_of_edge(cp);
+        if (seen.insert(k).second) out.push_back(cp);
+    }
+    DBG("build_contrapositives -> " << out.size());
+    return out;
+}
+static vector<Edge> infer_transitives(
+    const vector<string> &id2,
+    const vector<vector<int>> &adj,
+    const set<string> &explicit_edges,
+    const unordered_map<string,string> &form_by_idpair,
+    const unordered_set<string> &forbidden_inferred_rev,
+    int maxDepth = 3)
+{
+    DBG_LINE();
+    unordered_map<string, bool> is_weak_edge;
+    for (const auto &p : form_by_idpair) {
+        const string &form = p.second;
+        string lf = lower_copy(form);
+        bool weak = (lf.find("[weak]") != string::npos)
+                 || (lf.find("probable") != string::npos)
+                 || (lf.find("likely") != string::npos)
+                 || (lf.find("probab") != string::npos)
+                 || (lf.find("correlat") != string::npos)
+                 || (lf.find("counterfactual") != string::npos)
+                 || (lf.find("defeasible") != string::npos)
+                 || (lf.find("default") != string::npos)
+                 || (lf.find("statistical") != string::npos);
+        is_weak_edge[p.first] = weak;
+    }
+    vector<Edge> inferred;
+    set<string> inferred_keys;
+    int n = (int)id2.size();
+    for (int s = 0; s < n; ++s) {
+        vector<int> dist(n, -1);
+        std::queue<std::tuple<int,int,bool>> q;
+        dist[s] = 0;
+        for (int v : adj[s]) {
+            string key = std::to_string(s) + "->" + std::to_string(v);
+            bool w = is_weak_edge.count(key) ? is_weak_edge[key] : false;
+            dist[v] = 1;
+            q.push(std::make_tuple(v, 1, w));
+        }
+        while (!q.empty()) {
+            auto [u, d, path_has_weak] = q.front(); q.pop();
+            if (d >= 2 && d <= maxDepth) {
+                string A = id2[s], C = id2[u];
+                string A_norm = node_norm(A), C_norm = node_norm(C);
+                if (forbidden_inferred_rev.find(A_norm + "->" + C_norm) == forbidden_inferred_rev.end()) {
+                    if (!path_has_weak) {
+                        string form = "inferred (transitive length=" + std::to_string(d) + ")";
+                        Edge ie{A, C, form, 0, ""};
+                        string k = key_of_edge(ie);
+                        if (explicit_edges.count(std::to_string(s) + "->" + std::to_string(u)) == 0 && inferred_keys.insert(k).second) {
+                            inferred.push_back(ie);
+                        }
+                    }
+                }
+            }
+            if (d < maxDepth) {
+                for (int w : adj[u]) {
+                    if (dist[w] == -1) {
+                        dist[w] = d + 1;
+                        string edgekey = std::to_string(u) + "->" + std::to_string(w);
+                        bool edge_is_weak = is_weak_edge.count(edgekey) ? is_weak_edge[edgekey] : false;
+                        bool new_path_weak = path_has_weak || edge_is_weak;
+                        q.push(std::make_tuple(w, d+1, new_path_weak));
+                    }
+                }
+            }
+        }
+    }
+    DBG("infer_transitives -> " << inferred.size());
+    return inferred;
+}
+/* ------------------------------- Reporting -------------------------------- */
+static void output_report(
+    const vector<Edge> &edges,
+    const vector<Edge> &contrapositives,
+    const vector<Edge> &inferred,
+    const unordered_map<string,string> &form_by_idpair,
+    const vector<string> &id2,
+    const set<string> &explicit_edges,
+    const unordered_set<string> &forbidden_inferred_rev)
+{
+    DBG_LINE();
+    // 1) Explicit edges
+    std::cout << "=== Explicit edges (" << edges.size() << ") ===\n\n";
+    for (size_t i = 0; i < edges.size(); ++i) {
+        const auto &e = edges[i];
+        std::cout << "[" << (i+1) << "] Line " << e.line << "  Form: " << e.form << "\n";
+        std::cout << "    " << "Antecedent: " << e.A << "\n";
+        std::cout << "    " << "Consequent: " << e.B << "\n";
+        std::cout << "    " << "Sentence: " << e.sentence << "\n\n";
+    }
+    // 2) Contrapositives
+    if (!contrapositives.empty()) {
+        std::cout << "=== Contrapositives (" << contrapositives.size() << ") ===\n\n";
+        for (size_t i = 0; i < contrapositives.size(); ++i) {
+            const auto &e = contrapositives[i];
+            std::cout << "[" << (i+1) << "] " << e.form << "\n";
+            std::cout << "    " << e.A << "  ->  " << e.B << "\n\n";
+        }
+    }
+    // 3) Inferred transitive edges
+    if (!inferred.empty()) {
+        std::cout << "=== Inferred transitive edges (" << inferred.size() << ", depth<=3) ===\n\n";
+        for (size_t i = 0; i < inferred.size(); ++i) {
+            const auto &e = inferred[i];
+            std::cout << "[" << (i+1) << "] " << e.form << "\n";
+            std::cout << "    " << e.A << "  ->  " << e.B << "\n\n";
+        }
+    }
+    // 4) Expanded weak-edge summary (grouped)
+    auto lower_form = [&](const string &f){ return lower_copy(f); };
+    size_t weak_count = 0;
+    unordered_map<string, vector<tuple<string,string,string>>> groups;
+    unordered_map<string,string> form_for_pair;
+    for (const auto &p : form_by_idpair) {
+        const string &pairkey = p.first;    // "a->b" where a and b are numeric ids
+        const string &form = p.second;
+        string lf = lower_form(form);
+        bool is_weak = (lf.find("[weak]") != string::npos)
+                    || (lf.find("probable") != string::npos)
+                    || (lf.find("likely") != string::npos)
+                    || (lf.find("probab") != string::npos)
+                    || (lf.find("correlat") != string::npos)
+                    || (lf.find("counterfactual") != string::npos)
+                    || (lf.find("defeasib") != string::npos)
+                    || (lf.find("default") != string::npos)
+                    || (lf.find("statistical") != string::npos)
+                    || (lf.find("increases probability") != string::npos)
+                    || (lf.find("raises the likelihood") != string::npos)
+                    || (lf.find("raises likelihood") != string::npos);
+        if (!is_weak) continue;
+        ++weak_count;
+        size_t possep = pairkey.find("->");
+        if (possep == string::npos) continue;
+        int a = 0, b = 0;
+        try {
+            a = std::stoi(pairkey.substr(0, possep));
+            b = std::stoi(pairkey.substr(possep+2));
+        } catch (...) { continue; }
+        string Aname = (a >= 0 && a < (int)id2.size()) ? id2[a] : ("<node:" + std::to_string(a) + ">");
+        string Bname = (b >= 0 && b < (int)id2.size()) ? id2[b] : ("<node:" + std::to_string(b) + ">");
+        string keyAB = Aname + "||" + Bname;
+        if (form_for_pair.find(keyAB) == form_for_pair.end()) form_for_pair[keyAB] = form;
+        if (lf.find("correlat") != string::npos) groups["correlational / associated"].emplace_back(Aname, Bname, form);
+        if (lf.find("probab") != string::npos || lf.find("likely") != string::npos) groups["probabilistic / likely"].emplace_back(Aname, Bname, form);
+        if (lf.find("counterfactual") != string::npos || lf.find("subjunctive") != string::npos) groups["counterfactual / subjunctive"].emplace_back(Aname, Bname, form);
+        if (lf.find("defeasib") != string::npos || lf.find("generally") != string::npos || lf.find("typically") != string::npos
+            || lf.find("normally") != string::npos || lf.find("usually") != string::npos) {
+            groups["defeasible / general rules"].emplace_back(Aname, Bname, form);
+        }
+        if (lf.find("default") != string::npos) groups["default rules"].emplace_back(Aname, Bname, form);
+        if (lf.find("statistical") != string::npos || lf.find("increases probability") != string::npos
+            || lf.find("raises the likelihood") != string::npos || lf.find("raises likelihood") != string::npos) {
+            groups["statistical / increases-likelihood"].emplace_back(Aname, Bname, form);
+        }
+        bool matched_any = false;
+        for (const auto &gpair : groups) {
+            if (!gpair.second.empty()) { matched_any = true; break; }
+        }
+        if (!matched_any) groups["other weak"].emplace_back(Aname, Bname, form);
+    }
+    if (weak_count > 0) {
+        std::cout << "=== Weak / Probabilistic / Correlational explicit edges (" << weak_count << ") ===\n\n";
+        vector<string> order = {
+            "probabilistic / likely",
+            "correlational / associated",
+            "counterfactual / subjunctive",
+            "defeasible / general rules",
+            "default rules",
+            "statistical / increases-likelihood",
+            "other weak"
+        };
+        for (const string &grp : order) {
+            auto it = groups.find(grp);
+            if (it == groups.end() || it->second.empty()) continue;
+            std::cout << " -- " << grp << " (" << it->second.size() << ")\n";
+            std::set<string> printed;
+            for (const auto &t : it->second) {
+                const string &Aname = std::get<0>(t);
+                const string &Bname = std::get<1>(t);
+                const string &form  = std::get<2>(t);
+                string keyAB = Aname + "->" + Bname;
+                if (!printed.insert(keyAB).second) continue;
+                std::cout << "    " << Aname << "  ->  " << Bname;
+                if (!form.empty()) std::cout << "   Form: " << form;
+                std::cout << "\n";
+            }
+            std::cout << "\n";
+        }
+    }
+    // 5) Explicitly forbidden inferences
+    if (!forbidden_inferred_rev.empty()) {
+        std::cout << "=== Explicitly forbidden inferences (" << forbidden_inferred_rev.size() << ") ===\n\n";
+        size_t i = 1;
+        for (const auto &f : forbidden_inferred_rev) {
+            std::cout << "[" << (i++) << "] Forbidden inference: " << f << "  (text explicitly disallows this converse)\n";
+        }
+        std::cout << "\n";
+    }
+}
+/* ------------------- Incremental processing + chat machinery ---------------- */
+// external symbols provided by dictionary.cpp (as you showed)
+extern unsigned char dictionary_json[];      // binary blob of JSON text
+extern unsigned int  dictionary_json_len;   // its length
+struct ChatMemory {
+    // thread-safe containers for conversation history and edges
+    std::mutex mtx;
+    vector<std::pair<string,string>> history; // pairs of (user, assistant)
+    vector<Edge> edges;                      // all explicit edges (including from input and conversations)
+    unordered_set<string> seen_keys;         // dedup
+    unordered_set<string> forbidden_inferred_rev;
+    // graph caches
+    unordered_map<string,int> id; // node -> id
+    vector<string> id2;           // id -> node
+    vector<vector<int>> adj;      // adjacency
+    set<string> explicit_edges;   // "a->b" numeric
+    unordered_map<string,string> form_by_idpair; // "a->b" -> form
+    Patterns patterns;
+    ChatMemory() : patterns(make_patterns()) { DBG("ChatMemory constructed"); }
+    // --- Begin: graph backtracking / attention / retrieval indices ---
+    // Reverse adjacency for fast incoming-edge traversal (same length as adj when indexed)
+    vector<vector<int>> rev_adj;
+    // Edge-index maps: for each node id, store indices into `edges` vector
+    vector<vector<int>> edges_from_node; // outgoing edge indices by node id
+    vector<vector<int>> edges_to_node;   // incoming edge indices by node id
+    // Token -> node id index for fast retrieval (tokenized node labels)
+    unordered_map<string, vector<int>> token_index;
+    // Provenance / metadata for explicit edges: key_of_edge(edge) -> source label (e.g., "user:file:line" or "assistant")
+    unordered_map<string, string> edge_provenance;
+    // Compact correction log (human-readable)
+    vector<string> correction_log;
+    // Lightweight cache of last focus (keeps frequently-accessed node ids)
+    unordered_map<string, vector<int>> relevance_cache;
+    // mark (by node id) nodes that can reach a declared-variable sentinel
+    vector<char> can_reach_var_decl;
+    // dictionary (loaded lazily) + concurrency control and safety caps
+    std::unordered_map<std::string, std::string> dictionary; // loaded lazily
+    bool dict_loaded = false;
+    std::mutex dict_mtx;                 // make dictionary load thread-safe
+    int dict_depth = 2;                  // default (0 = no expansion); set via CLI or setter
+    double dict_similarity_threshold = 0.0; // keep 0.0 (always choose best) — adjust if desired
+// Safety cap to avoid explosion while expanding definitions (adjustable)
+static constexpr size_t MAX_DICT_TOKENS = 5000;
+    void set_dict_depth(int d) { dict_depth = std::max(0, d); }
+    int get_dict_depth() const { return dict_depth; }
+    // --- Minimal JSON string parser (keeps same behavior) ---
+    string parse_json_string(const string &s, size_t &pos) {
+        ++pos; // skip opening '"'
+        string out;
+        while (pos < s.size()) {
+            char c = s[pos++];
+            if (c == '"') break;
+            if (c == '\\' && pos < s.size()) {
+                char esc = s[pos++];
+                switch (esc) {
+                    case '"': out.push_back('"'); break;
+                    case '\\': out.push_back('\\'); break;
+                    case '/': out.push_back('/'); break;
+                    case 'b': out.push_back('\b'); break;
+                    case 'f': out.push_back('\f'); break;
+                    case 'n': out.push_back('\n'); break;
+                    case 'r': out.push_back('\r'); break;
+                    case 't': out.push_back('\t'); break;
+                    case 'u':
+                        // skip 4 hex digits (approximate)
+                        if (pos + 4 <= s.size()) pos += 4;
+                        out.push_back('?');
+                        break;
+                    default:
+                        out.push_back(esc);
+                }
+            } else {
+                out.push_back(c);
+            }
+        }
+        return out;
+    }
+    // Load dictionary lazily from binary JSON blob (uses instance members)
+    // Thread-safe: multiple threads may call this concurrently; we serialize the first loader.
+    void load_dictionary_from_blob() {
+        // Fast-path: avoid locking if already loaded
+        if (dict_loaded) return;
+        std::lock_guard<std::mutex> lg(dict_mtx);
+        if (dict_loaded) return; // double-checked
+        // dictionary_json and dictionary_json_len are file-scope externs
+        if (dictionary_json == nullptr || dictionary_json_len == 0) {
+            dict_loaded = true;
+            return;
+        }
+        // Parse JSON from blob (keeps same minimal parser semantics)
+        string json((char*)dictionary_json, (size_t)dictionary_json_len);
+        size_t pos = 0, n = json.size();
+        while (pos < n) {
+            while (pos < n && json[pos] != '"') ++pos;
+            if (pos >= n) break;
+            string key = parse_json_string(json, pos);
+            while (pos < n && json[pos] != ':') ++pos;
+            if (pos >= n) break;
+            ++pos;
+            while (pos < n && std::isspace((unsigned char)json[pos])) ++pos;
+            if (pos < n && json[pos] == '"') {
+                string val = parse_json_string(json, pos);
+                string lk = lower_copy(key);
+                dictionary.emplace(lk, val);
+            } else {
+                while (pos < n && json[pos] != ',' && json[pos] != '}') ++pos;
+            }
+        }
+        dict_loaded = true;
+    }
+    // Tokenizer (keeps same semantics)
+    static vector<string> tokenize_words_static(const string &s) {
+        vector<string> out;
+        string buf;
+        string lc = lower_copy(s);
+        for (size_t i = 0; i <= lc.size(); ++i) {
+            char c = (i < lc.size() ? lc[i] : ' ');
+            if (std::isalnum((unsigned char)c)) buf.push_back(c);
+            else {
+                if (buf.size() >= 2) out.push_back(buf);
+                buf.clear();
+            }
+        }
+        return out;
+    }
+    // Expand seeds using dictionary definitions up to `depth` levels (instance method)
+    // Uses BFS-style queue, but imposes a global cap to avoid explosion.
+    // Thread-safety: this function calls load_dictionary_from_blob() which is serialized.
+    unordered_set<string> expand_tokens_with_dictionary(const unordered_set<string> &seeds, int depth) {
+        unordered_set<string> result = seeds;
+        if (depth <= 0) return result;
+        if (!dict_loaded) load_dictionary_from_blob();
+        if (dictionary.empty()) return result;
+        unordered_set<string> visited = seeds;
+        std::queue<pair<string,int>> q;
+        for (const auto &w : seeds) q.push({w, 0});
+        while (!q.empty()) {
+            auto [tok, d] = q.front(); q.pop();
+            if (d >= depth) continue;
+            auto it = dictionary.find(tok);
+            if (it == dictionary.end()) continue;
+            vector<string> tokens = tokenize_words_static(it->second);
+            for (auto &t : tokens) {
+                if (visited.insert(t).second) {
+                    result.insert(t);
+                    if (result.size() > MAX_DICT_TOKENS) {
+                        // cap reached; stop further expansion for safety
+                        return result;
+                    }
+                    q.push({t, d+1});
+                }
+            }
+        }
+        return result;
+    }
+    // Build map LHS -> edges (convenience)
+    unordered_map<string, vector<Edge>> build_edge_map_snapshot_local(const vector<Edge> &edges_snapshot) {
+        unordered_map<string, vector<Edge>> m;
+        m.reserve(edges_snapshot.size() * 2 + 10);
+        for (const Edge &e : edges_snapshot) {
+            string a = node_norm(e.A);
+            m[a].push_back(e);
+        }
+        return m;
+    }
+    // Precompute candidate token-sets for all LHS keys (instance method, parallelized)
+    void precompute_candidate_tokensets(
+        const unordered_map<string, vector<Edge>> &edge_map,
+        int depth,
+        vector<string> &out_keys,
+        vector<unordered_set<string>> &out_tokensets)
+    {
+        out_keys.clear();
+        out_tokensets.clear();
+        out_keys.reserve(edge_map.size());
+        for (const auto &p : edge_map) out_keys.push_back(p.first);
+        size_t m = out_keys.size();
+        out_tokensets.resize(m);
+    #ifdef _OPENMP
+        #pragma omp parallel for schedule(dynamic)
+    #endif
+        for (int i = 0; i < (int)m; ++i) {
+            const string &lhs = out_keys[i];
+            vector<string> toks = tokenize_words_static(lhs);
+            unordered_set<string> seeds;
+            for (auto &t : toks) seeds.insert(t);
+            if (depth > 0) out_tokensets[i] = expand_tokens_with_dictionary(seeds, depth);
+            else out_tokensets[i] = std::move(seeds);
+        }
+    }
+    // Jaccard similarity (pure helper)
+    static double jaccard_similarity_static(const unordered_set<string> &A, const unordered_set<string> &B) {
+        if (A.empty() && B.empty()) return 1.0;
+        if (A.empty() || B.empty()) return 0.0;
+        const unordered_set<string> *small = &A, *large = &B;
+        if (A.size() > B.size()) { small = &B; large = &A; }
+        size_t inter = 0;
+        for (const auto &t : *small) if (large->find(t) != large->end()) ++inter;
+        size_t uni = A.size() + B.size() - inter;
+        return uni ? (double)inter / (double)uni : 0.0;
+    }
+    // Find best candidate index (parallelized)
+    pair<int,double> find_best_candidate_index_for_value(
+        const unordered_set<string> &value_tokens,
+        const vector<unordered_set<string>> &candidate_tokensets)
+    {
+        int m = (int)candidate_tokensets.size();
+        if (m == 0) return {-1, 0.0};
+        int max_threads = 1;
+    #ifdef _OPENMP
+        max_threads = omp_get_max_threads();
+    #endif
+        vector<double> local_best(max_threads, -1.0);
+        vector<int> local_idx(max_threads, -1);
+    #ifdef _OPENMP
+        #pragma omp parallel
+    #endif
+        {
+    #ifdef _OPENMP
+            int tid = omp_get_thread_num();
+    #else
+            int tid = 0;
+    #endif
+            double lbest = -1.0;
+            int lidx = -1;
+    #ifdef _OPENMP
+            #pragma omp for schedule(static)
+    #endif
+            for (int i = 0; i < m; ++i) {
+                double sim = jaccard_similarity_static(value_tokens, candidate_tokensets[i]);
+                if (sim > lbest) { lbest = sim; lidx = i; }
+            }
+            local_best[tid] = lbest;
+            local_idx[tid] = lidx;
+        } // parallel
+        double best = -1.0; int best_i = -1;
+        for (int t = 0; t < (int)local_best.size(); ++t) {
+            if (local_best[t] > best) { best = local_best[t]; best_i = local_idx[t]; }
+        }
+        return {best_i, best};
+    }
+    // Build auxiliary indices from the current snapshot of id/id2/adj/edges.
+    // Must be called with mtx held or immediately after graph rebuild (we call it holding the lock).
+    void index_graph() {
+        // assumes id, id2, adj and edges are current snapshot
+        size_t n = id2.size();
+        rev_adj.assign(n, {});
+        edges_from_node.assign(n, {});
+        edges_to_node.assign(n, {});
+        token_index.clear();
+        relevance_cache.clear();
+        // build reverse adjacency and per-node edge lists
+        for (size_t ei = 0; ei < edges.size(); ++ei) {
+            const Edge &e = edges[ei];
+            auto itA = id.find(e.A);
+            auto itB = id.find(e.B);
+            if (itA == id.end() || itB == id.end()) continue;
+            int a = itA->second, b = itB->second;
+            if ((size_t)std::max(a,b) >= n) continue;
+            rev_adj[b].push_back(a);
+            edges_from_node[a].push_back((int)ei);
+            edges_to_node[b].push_back((int)ei);
+        }
+        // build token index (tokenize node labels into lowercased alpha-numeric tokens)
+        for (int nid = 0; nid < (int)id2.size(); ++nid) {
+            string node = lower_copy(id2[nid]);
+            string token;
+            for (size_t i = 0; i <= node.size(); ++i) {
+                char c = (i < node.size()) ? node[i] : ' ';
+                if (std::isalnum((unsigned char)c) || c == '_') token.push_back(c);
+                else {
+                    if (token.size() >= 3) { token_index[token].push_back(nid); }
+                    token.clear();
+                }
+            }
+        }
+        // compute which nodes can reach the "is_variable" sentinel by forward edges
+        // (equivalently: reverse-BFS from the 'is_variable' node through rev_adj)
+        can_reach_var_decl.assign(n, false);
+        auto it_var = id.find("is_variable");
+        if (it_var != id.end()) {
+            int varid = it_var->second;
+            std::queue<int> q;
+            can_reach_var_decl[varid] = true;
+            q.push(varid);
+            while (!q.empty()) {
+                int u = q.front(); q.pop();
+                for (int pred : rev_adj[u]) {
+                    if (!can_reach_var_decl[pred]) {
+                        can_reach_var_decl[pred] = true;
+                        q.push(pred);
+                    }
+                }
+            }
+        }
+    }
+    // Trace step for one application (one implication use)
+    struct ApplicationStep {
+        string from;       // input value that matched left side
+        string to;         // right side applied
+        string form;       // edge.form
+        size_t line;       // edge.line
+        string sentence;   // edge.sentence
+    };
+    // A chain is an ordered list of ApplicationStep from original -> ... -> final
+    using ApplicationChain = vector<ApplicationStep>;
+    // Non-recursive iterative computation of application chains for `start`.
+    // Produces same shape of output as the previous recursive routine but avoids
+    // deep recursion and uses explicit stack + memoization.
+    // edge_map: LHS -> vector<Edge>
+    // memo: per-thread memo map (value -> vector<ApplicationChain>) used to avoid recomputation
+    static vector<ApplicationChain> compute_chains_iterative(
+        const string &start,
+        const unordered_map<string, vector<Edge>> &edge_map,
+        unordered_map<string, vector<ApplicationChain>> &memo)
+    {
+        // If already memoized, return immediately
+        auto itmem = memo.find(start);
+        if (itmem != memo.end()) return itmem->second;
+        // Explicit DFS stack of (node, state)
+        // state 0 = enter, 1 = exit/process
+        vector<pair<string,int>> stack;
+        stack.emplace_back(start, 0);
+        // Visiting set to detect cycles
+        unordered_set<string> visiting;
+        while (!stack.empty()) {
+            auto [node, state] = stack.back();
+            // memoized? pop and continue.
+            if (memo.find(node) != memo.end()) { stack.pop_back(); continue; }
+            auto itmap = edge_map.find(node);
+            if (state == 0) {
+                // Enter node
+                if (visiting.find(node) != visiting.end()) {
+                    // Cycle detected: treat as terminal (empty chains) to break cycle
+                    memo.emplace(node, vector<ApplicationChain>{});
+                    stack.pop_back();
+                    continue;
+                }
+                visiting.insert(node);
+                if (itmap == edge_map.end()) {
+                    // No outgoing edges => terminal marker (empty vector)
+                    memo.emplace(node, vector<ApplicationChain>{});
+                    visiting.erase(node);
+                    stack.pop_back();
+                    continue;
+                }
+                // schedule exit processing after children are ensured
+                stack.back().second = 1;
+                // push children that are not yet memoized
+                for (const Edge &e : itmap->second) {
+                    string B = node_norm(e.B);
+                    if (memo.find(B) == memo.end()) {
+                        stack.emplace_back(B, 0);
+                    }
+                }
+            } else { // state == 1 -> exit/process: build memo[node] from children memos
+                vector<ApplicationChain> out;
+                // itmap must be valid here
+                for (const Edge &e : itmap->second) {
+                    string B = node_norm(e.B);
+                    ApplicationStep step{ node, B, e.form, e.line, e.sentence };
+                    auto itB = memo.find(B);
+                    if (itB == memo.end() || itB->second.empty()) {
+                        // terminal next -> single-step chain
+                        ApplicationChain ch; ch.push_back(step); out.push_back(std::move(ch));
+                    } else {
+                        // extend each suffix
+                        for (const auto &suf : itB->second) {
+                            ApplicationChain ch; ch.reserve(1 + suf.size());
+                            ch.push_back(step);
+                            ch.insert(ch.end(), suf.begin(), suf.end());
+                            out.push_back(std::move(ch));
+                        }
+                    }
+                }
+                memo.emplace(node, std::move(out));
+                visiting.erase(node);
+                stack.pop_back();
+            }
+        }
+        auto itres = memo.find(start);
+        if (itres == memo.end()) return vector<ApplicationChain>{};
+        return itres->second;
+    }
+    string apply_implications_to_prompt_report(
+        const string &user_input,
+        const vector<Edge> &edges_snapshot,
+        const unordered_map<string,int> &id_snapshot,
+        const vector<string> &id2_snapshot)
+    {
+        // --- Helper short aliases/types ---
+        using StrSet = unordered_set<string>;
+        struct AppliedRecord {
+            Edge edge;
+            vector<pair<string,string>> antecedent_matches; // (antecedent, matched_fact)
+        };
+        // --- 1) Split prompt into normalized parts (available facts initial set) ---
+        vector<string> prompt_parts;
+        {
+            auto sents = split_into_sentences(user_input);
+            for (const auto &pr : sents) {
+                string sentence = trim(pr.first);
+                if (sentence.empty()) continue;
+                auto ants = split_antecedents(sentence);
+                for (const string &a : ants) {
+                    string n = node_norm(a);
+                    if (!n.empty()) prompt_parts.push_back(n);
+                }
+            }
+        }
+        if (prompt_parts.empty()) return string("");
+        // --- 2) Build per-edge antecedent list (edge_ants) and collect unique antecedent literals ---
+        int E = (int)edges_snapshot.size();
+        vector<vector<string>> edge_ants(E);
+        StrSet all_ants;
+        for (int i = 0; i < E; ++i) {
+            const Edge &e = edges_snapshot[i];
+            vector<string> ants = split_antecedents(e.A);
+            for (auto &a : ants) {
+                string an = node_norm(a);
+                if (!an.empty()) { edge_ants[i].push_back(an); all_ants.insert(an); }
+            }
+        }
+        // --- 3) Precompute token sets for all antecedent literals and build token->antecedent index ---
+        // Modular small helper: tokenization + optional dictionary expansion
+        auto compute_tokens_for = [&](const string &label)->StrSet {
+            vector<string> toks = tokenize_words_static(label);
+            StrSet s; for (auto &t : toks) s.insert(t);
+            if (dict_depth > 0 && !s.empty()) s = expand_tokens_with_dictionary(s, dict_depth);
+            return s;
+        };
+        // antecedent -> tokens
+        unordered_map<string, StrSet> ant_tokens;
+        ant_tokens.reserve(all_ants.size()*2);
+        // token -> antecedent list
+        unordered_map<string, vector<string>> token_to_ants;
+        token_to_ants.reserve(1024);
+        // parallel compute tokens for each antecedent
+        vector<string> all_ants_vec; all_ants_vec.reserve(all_ants.size());
+        for (auto &a : all_ants) all_ants_vec.push_back(a);
+    #ifdef _OPENMP
+        #pragma omp parallel for schedule(dynamic)
+    #endif
+        for (int i = 0; i < (int)all_ants_vec.size(); ++i) {
+            string an = all_ants_vec[i];
+            StrSet toks = compute_tokens_for(an);
+            // thread-local insertion into global maps must be synchronized
+            // we will collect per-thread lists and merge serially to avoid locks
+            // but for simplicity here we push into a temporary per-thread vector (we'll merge below)
+            // store as pair in a vector; but to keep code compact, collect into a local buffer and merge
+        }
+        // Serial merge (compute_tokens_for repeated; acceptable given earlier OpenMP stub)
+        for (const string &an : all_ants_vec) {
+            StrSet toks = compute_tokens_for(an);
+            ant_tokens.emplace(an, toks);
+            for (const auto &tk : toks) token_to_ants[tk].push_back(an);
+        }
+        // --- 4) Prepare available facts + tokens (initial facts are prompt parts) ---
+        StrSet available_facts; available_facts.reserve(prompt_parts.size()*2);
+        unordered_map<string, StrSet> fact_tokens; fact_tokens.reserve(prompt_parts.size()*2);
+        for (const string &p : prompt_parts) {
+            available_facts.insert(p);
+            fact_tokens.emplace(p, compute_tokens_for(p));
+        }
+        // --- 5) Build reverse map: antecedent -> edges indices (for exact antecedent literal) ---
+        unordered_map<string, vector<int>> ant_to_edges;
+        ant_to_edges.reserve(all_ants.size()*2);
+        for (int i = 0; i < E; ++i) {
+            for (const string &an : edge_ants[i]) ant_to_edges[an].push_back(i);
+        }
+        // --- 6) Initialize per-edge pending counts and satisfied sets ---
+        vector<int> pending(E, 0);
+        vector<unordered_set<string>> satisfied(E); // which antecedent literals of that edge have been satisfied
+        for (int i = 0; i < E; ++i) {
+            // Use unique antecedent literals per edge
+            StrSet uniq;
+            for (const string &a : edge_ants[i]) uniq.insert(a);
+            pending[i] = (int)uniq.size();
+            // satisfied[i] starts empty
+        }
+        // --- 7) Worklist algorithm: queue of newly-available facts to process ---
+        std::deque<std::string> worklist;
+        for (const string &p : prompt_parts) worklist.push_back(p);
+        // Applied records to report, and set of applied edge keys to avoid repetition
+        vector<AppliedRecord> applied_sequence;
+        unordered_set<string> applied_edge_keys; applied_edge_keys.reserve(1024);
+        // Local helper: attempt to match antecedent literal 'ant' with fact 'fact' (exact or similarity)
+        auto antecedent_matches_fact = [&](const string &ant, const string &fact)->bool {
+            if (ant == fact) return true; // exact match
+            // fuzzy: compare token sets (both precomputed if present)
+            auto itA = ant_tokens.find(ant);
+            auto itF = fact_tokens.find(fact);
+            StrSet a_toks = (itA != ant_tokens.end()) ? itA->second : compute_tokens_for(ant);
+            StrSet f_toks = (itF != fact_tokens.end()) ? itF->second : compute_tokens_for(fact);
+            if (a_toks.empty() || f_toks.empty()) return false;
+            double sim = jaccard_similarity_static(a_toks, f_toks);
+            return (sim >= dict_similarity_threshold && sim > 0.0);
+        };
+        // Helper: process one fact (decrement pending counts for edges whose antecedent literals are matched)
+        auto process_fact = [&](const string &fact){
+            // gather candidate antecedents via token index to avoid scanning all antecedents
+            StrSet candidates;
+            auto itFt = fact_tokens.find(fact);
+            if (itFt != fact_tokens.end()) {
+                for (const string &tk : itFt->second) {
+                    auto it = token_to_ants.find(tk);
+                    if (it != token_to_ants.end()) {
+                        for (const string &ant : it->second) candidates.insert(ant);
+                    }
+                }
+            }
+            // also include exact match as candidate
+            if (all_ants.find(fact) != all_ants.end()) candidates.insert(fact);
+            // For each candidate antecedent, check similarity / exactness to this fact.
+            for (const string &ant : candidates) {
+                if (!antecedent_matches_fact(ant, fact)) continue;
+                // for every edge that contains this antecedent, mark satisfied once
+                auto it_edges = ant_to_edges.find(ant);
+                if (it_edges == ant_to_edges.end()) continue;
+                for (int ei : it_edges->second) {
+                    // if this antecedent already satisfied for this edge, skip
+                    if (satisfied[ei].find(ant) != satisfied[ei].end()) continue;
+                    // mark satisfied and decrement pending
+                    satisfied[ei].insert(ant);
+                    if (pending[ei] > 0) --pending[ei];
+                    // if pending becomes zero, fire edge (produce consequent)
+                    if (pending[ei] == 0) {
+                        const Edge &e = edges_snapshot[ei];
+                        string k = key_of_edge(e);
+                        if (applied_edge_keys.insert(k).second) {
+                            // record which antecedent matched which fact for provenance:
+                            AppliedRecord rec; rec.edge = e;
+                            // For each antecedent of this edge, find the fact (from available_facts) that matched it.
+                            for (const string &edge_ant : edge_ants[ei]) {
+                                // Try exact first then similarity search among available_facts
+                                string matched_fact;
+                                if (available_facts.find(edge_ant) != available_facts.end()) {
+                                    matched_fact = edge_ant;
+                                } else {
+                                    // linear search among available_facts but typically small; can be optimized further
+                                    for (const string &af : available_facts) {
+                                        if (antecedent_matches_fact(edge_ant, af)) { matched_fact = af; break; }
+                                    }
+                                }
+                                if (matched_fact.empty()) matched_fact = string("<unknown>");
+                                rec.antecedent_matches.emplace_back(edge_ant, matched_fact);
+                            }
+                            // add consequent to available_facts and enqueue for processing if new
+                            string consequent = node_norm(e.B);
+                            if (available_facts.insert(consequent).second) {
+                                fact_tokens.emplace(consequent, compute_tokens_for(consequent));
+                                worklist.push_back(consequent);
+                            }
+                            applied_sequence.push_back(std::move(rec));
+                        }
+                    }
+                } // for each edge containing ant
+            } // for each candidate ant
+        };
+        // --- 8) Main loop: process worklist until saturation (no new facts) ---
+        while (!worklist.empty()) {
+            string fact = std::move(worklist.front()); worklist.pop_front();
+            // process_fact will examine token->antecedent candidates and fire edges as possible
+            process_fact(fact);
+        }
+        // --- 9) Build textual report with provenance (order edges were applied) ---
+        std::ostringstream agg;
+        agg << "=== Implication application (saturated forward-chaining) ===\n";
+        if (applied_sequence.empty()) {
+            agg << "  (No implications could be applied from the prompt.)\n\n";
+            return agg.str();
+        }
+        for (size_t i = 0; i < applied_sequence.size(); ++i) {
+            const AppliedRecord &r = applied_sequence[i];
+            agg << "[" << (i+1) << "] Applied: " << r.edge.A << "  ->  " << r.edge.B << "\n";
+            agg << "     Form: " << r.edge.form;
+            if (r.edge.line > 0) agg << " (line " << r.edge.line << ")";
+            agg << "\n";
+            for (size_t j = 0; j < r.antecedent_matches.size(); ++j) {
+                agg << "     Antecedent " << (j+1) << ": \"" << r.antecedent_matches[j].first
+                    << "\" matched by available fact \"" << r.antecedent_matches[j].second << "\"\n";
+            }
+            if (!r.edge.sentence.empty()) agg << "     Source sentence: " << normalize_spaces(r.edge.sentence) << "\n";
+            agg << "\n";
+        }
+        // list derived facts (those not present in the original prompt_parts)
+        agg << "=== Derived facts ===\n";
+        for (const auto &f : available_facts) {
+            bool in_prompt = false;
+            for (const string &p : prompt_parts) if (p == f) { in_prompt = true; break; }
+            if (!in_prompt) agg << " - " << f << "\n";
+        }
+        agg << "\n";
+        return agg.str();
+    }
+    // Apply a simultaneous substitution mapping (schema variable -> concrete name)
+    // and insert the instantiated edge into the KB (thread-safe).
+    void instantiate_schema_edge(const Edge &schema_edge,
+                                 const std::vector<std::pair<string,string>> &mapping_pairs,
+                                 const string &provenance_note = "instantiation:auto")
+    {
+        // build substitution map (normalized)
+        unordered_map<string,string> sub;
+        for (auto &kv : mapping_pairs) sub[node_norm(kv.first)] = node_norm(kv.second);
+        // apply substitution to a label (conservative: whole-word replacement)
+        auto apply_sub = [&](const string &label)->string {
+            string out = label;
+            // exact-match first
+            string ln = node_norm(label);
+            auto it = sub.find(ln);
+            if (it != sub.end()) return it->second;
+            // whole-word replace (regex) for occurrences within compound labels
+            for (const auto &kv : sub) {
+                std::regex pat(std::string("\\b") + kv.first + std::string("\\b"));
+                out = std::regex_replace(out, pat, kv.second);
+            }
+            return node_norm(out);
+        };
+        string Anew = apply_sub(schema_edge.A);
+        string Bnew = apply_sub(schema_edge.B);
+        if (Anew.empty() || Bnew.empty()) return;
+        Edge e{ Anew, Bnew, string("instantiated: ") + schema_edge.form, schema_edge.line, schema_edge.sentence };
+        string k = key_of_edge(e);
+        {
+            std::lock_guard<std::mutex> lock(mtx);
+            if (seen_keys.insert(k).second) {
+                edges.push_back(e);
+                edge_provenance[k] = provenance_note;
+                // rebuild condensed graph indices and token index
+                id.clear(); id2.clear(); adj.clear(); explicit_edges.clear(); form_by_idpair.clear();
+                build_graph_from_edges(edges, id, id2, adj, explicit_edges, form_by_idpair);
+                index_graph();
+            }
+        }
+    }
+    // After ingesting a user text that may declare variable names (e.g. "G and H are variables"),
+    // attempt to instantiate schema edges in the KB whose variables can be traced to declarations.
+    void perform_auto_instantiations(const string &text) {
+        // extract declared variables from text using pattern
+        vector<string> declared_vars;
+        apply_regex_iter(text, patterns.variable_decl_re, [&](const smatch &m){
+            string list = trim(m.str(1));
+            auto parts = split_antecedents(list);
+            for (auto &p : parts) {
+                string np = node_norm(p);
+                if (!np.empty()) declared_vars.push_back(np);
+            }
+        });
+        if (declared_vars.empty()) return;
+        // snapshot edges & id data under lock
+        vector<Edge> edges_snapshot;
+        vector<string> id2_snapshot;
+        vector<char> reach_var;
+        {
+            std::lock_guard<std::mutex> lock(mtx);
+            edges_snapshot = edges;
+            id2_snapshot = id2;
+            reach_var = can_reach_var_decl;
+        }
+        // find candidate schema edges: those whose A/B (or antecedents) are variable-like (can reach var decl)
+        for (const Edge &sch : edges_snapshot) {
+            // gather schema variable labels in appearance order (A then B)
+            vector<string> schema_vars;
+            // only consider atomic labels (we assume schema variables are standalone tokens)
+            if (!sch.A.empty()) schema_vars.push_back(node_norm(sch.A));
+            if (!sch.B.empty()) schema_vars.push_back(node_norm(sch.B));
+            // filter those that are marked variable-like in current index
+            vector<string> schema_vars_filtered;
+            for (const string &sv : schema_vars) {
+                auto it = id.find(sv);
+                if (it != id.end()) {
+                    int nid = it->second;
+                    if (nid >= 0 && nid < (int)reach_var.size() && reach_var[nid]) {
+                        schema_vars_filtered.push_back(sv);
+                    }
+                }
+            }
+            if (schema_vars_filtered.empty()) continue;
+            // require same arity as declared_vars (simple position-based mapping)
+            if ((int)schema_vars_filtered.size() != (int)declared_vars.size()) continue;
+            // build mapping pairs (schema var -> declared var)
+            std::vector<std::pair<string,string>> mapping;
+            for (size_t i = 0; i < schema_vars_filtered.size(); ++i) mapping.emplace_back(schema_vars_filtered[i], declared_vars[i]);
+            // instantiate
+            instantiate_schema_edge(sch, mapping, string("auto-inst-from-text"));
+        }
+    }
+    // Remove edges satisfying predicate 'pred'. Rebuilds graph indices (safe, deterministic).
+    // Thread-safe: acquires mtx.
+    void remove_edges_if(const std::function<bool(const Edge&)> &pred, const string &reason = "") {
+        std::lock_guard<std::mutex> lock(mtx);
+        vector<Edge> kept;
+        kept.reserve(edges.size());
+        size_t removed = 0;
+        for (const auto &e : edges) {
+            if (pred(e)) {
+                ++removed;
+                string k = key_of_edge(e);
+                correction_log.push_back(string("removed: ") + k + (reason.empty() ? "" : ("  // " + reason)));
+                edge_provenance.erase(k);
+            } else kept.push_back(e);
+        }
+        edges.swap(kept);
+        // rebuild node/id caches from edges
+        id.clear(); id2.clear(); adj.clear(); explicit_edges.clear(); form_by_idpair.clear();
+        build_graph_from_edges(edges, id, id2, adj, explicit_edges, form_by_idpair);
+        index_graph();
+    }
+    // Correct a concrete explicit implication A->B by replacing it with newA->newB (records provenance).
+    // Thread-safe.
+    void correct_edge(const string &A, const string &B, const string &newA, const string &newB, const string &provenance_note = "") {
+        auto match = [&](const Edge &e){ return node_norm(e.A) == node_norm(A) && node_norm(e.B) == node_norm(B); };
+        remove_edges_if(match, "corrected to " + newA + " -> " + newB);
+        // add corrected edge as explicit edge (we append to edges and rebuild indices)
+        {
+            std::lock_guard<std::mutex> lock(mtx);
+            Edge e{ node_norm(newA), node_norm(newB), string("corrected (user)"), 0, string("correction: ") + newA + " -> " + newB };
+            string k = key_of_edge(e);
+            if (seen_keys.insert(k).second) {
+                edges.push_back(e);
+                edge_provenance[k] = provenance_note.empty() ? "correction" : provenance_note;
+            }
+            // rebuild caches
+            id.clear(); id2.clear(); adj.clear(); explicit_edges.clear(); form_by_idpair.clear();
+            build_graph_from_edges(edges, id, id2, adj, explicit_edges, form_by_idpair);
+            index_graph();
+            correction_log.push_back(string("added: ") + k + (provenance_note.empty() ? "" : string("  // ") + provenance_note));
+        }
+    }
+    // Find relevant nodes given seed tokens (fast approximate attention).
+    // Returns nodes ordered by BFS distance (small first). Thread-safe snapshot.
+    vector<int> find_relevant_nodes(const vector<string> &seed_tokens, int maxDepth = 3, int maxNodes = 200) {
+        // take snapshot
+        unordered_map<string,int> id_local;
+        vector<string> id2_local;
+        vector<vector<int>> adj_local;
+        {
+            std::lock_guard<std::mutex> lock(mtx);
+            id_local = id; id2_local = id2; adj_local = adj;
+        }
+        unordered_set<int> seeds;
+        for (const auto &t : seed_tokens) {
+            string tt = lower_copy(t);
+            auto it = token_index.find(tt);
+            if (it != token_index.end()) {
+                for (int nid : it->second) seeds.insert(nid);
+            }
+        }
+        // BFS from seeds (single-threaded; adjacency traversal is typically cheap)
+        queue<pair<int,int>> q;
+        unordered_map<int,int> dist;
+        for (int s : seeds) { q.push({s,0}); dist[s] = 0; }
+        vector<int> result;
+        while (!q.empty() && (int)result.size() < maxNodes) {
+            auto [u,d] = q.front(); q.pop();
+            result.push_back(u);
+            if (d >= maxDepth) continue;
+            if (u >= 0 && u < (int)adj_local.size()) {
+                for (int w : adj_local[u]) {
+                    if (dist.find(w) == dist.end()) { dist[w] = d+1; q.push({w,d+1}); }
+                }
+            }
+        }
+        return result;
+    }
+    // Retrieve explicit Edge objects relevant to a set of node ids (unique).
+    vector<Edge> retrieve_relevant_edges(const vector<int> &node_ids) {
+        std::lock_guard<std::mutex> lock(mtx);
+        unordered_set<int> seen_ei;
+        vector<Edge> out;
+        for (int nid : node_ids) {
+            if (nid < 0 || nid >= (int)edges_from_node.size()) continue;
+            for (int ei : edges_from_node[nid]) {
+                if (seen_ei.insert(ei).second) out.push_back(edges[ei]);
+            }
+            if (nid < 0 || nid >= (int)edges_to_node.size()) continue;
+            for (int ei : edges_to_node[nid]) {
+                if (seen_ei.insert(ei).second) out.push_back(edges[ei]);
+            }
+        }
+        return out;
+    }
+    // --- End: graph backtracking / attention / retrieval indices ---
+    // Add text (such as input.txt, user input, or assistant text) into edges and rebuild graph caches.
+    // The function processes sentences in parallel with OpenMP where available for speed.
+    void ingest_text(const string &text) {
+        DBG_LINE();
+        auto sents = split_into_sentences(text);
+        if (sents.empty()) { DBG("ingest_text: no sentences"); return; }
+        // thread-local collectors
+        std::vector<vector<Edge>> local_edges;
+        std::vector<unordered_set<string>> local_seen;
+        std::vector<unordered_set<string>> local_forbidden;
+        int threads = 1;
+#ifdef _OPENMP
+        if (GLOBAL_THREADS > 0) omp_set_num_threads(GLOBAL_THREADS);
+        threads = omp_get_max_threads();
+#endif
+        if (threads < 1) threads = 1;
+        local_edges.resize(threads);
+        local_seen.resize(threads);
+        local_forbidden.resize(threads);
+        DBG("ingest_text: sentences=" << sents.size() << " threads=" << threads);
+        // parallel loop over sentences
+#ifdef _OPENMP
+        #pragma omp parallel for schedule(dynamic)
+#endif
+        for (int i = 0; i < (int)sents.size(); ++i) {
+#ifdef _OPENMP
+            int tid = omp_get_thread_num();
+#else
+            int tid = 0;
+#endif
+            const auto &pr = sents[i];
+            process_sentence(pr.first, pr.second, patterns, local_edges[tid], local_seen[tid], local_forbidden[tid]);
+            if (GLOBAL_DEBUG && (i % 500) == 0) {
+                DBG("ingest_text processed sentences=" << i << " on tid=" << tid);
+            }
+        }
+        // merge local collectors into global store guarded by mutex
+        std::lock_guard<std::mutex> lock(mtx);
+        DBG("ingest_text merging locals into global store");
+        for (int t = 0; t < threads; ++t) {
+            for (auto &e : local_edges[t]) {
+                string k = key_of_edge(e);
+                if (seen_keys.insert(k).second) {
+                    // record provenance roughly; you can make this more precise by passing a source label to ingest_text
+                    edge_provenance[k] = "ingest";
+                    edges.push_back(std::move(e));
+                }
+            }
+            for (const auto &f : local_forbidden[t]) forbidden_inferred_rev.insert(f);
+        }
+        // rebuild graph caches incrementally (simple approach: clear and rebuild from edges)
+        id.clear(); id2.clear(); adj.clear(); explicit_edges.clear(); form_by_idpair.clear();
+        build_graph_from_edges(edges, id, id2, adj, explicit_edges, form_by_idpair);
+        // NEW: build reverse adjacency, per-node edge indices and token index for fast retrieval & attention
+        index_graph();
+        DBG("ingest_text complete: total edges=" << edges.size());
+    }
+    // Save conversation history to file
+    void save_history(const string &fname) {
+        DBG_LINE();
+        std::lock_guard<std::mutex> lock(mtx);
+        std::ofstream out(fname);
+        if (!out) { DBG("save_history: cannot open file"); return; }
+        for (const auto &p : history) {
+            out << "User: " << p.first << "\n";
+            out << "Assistant: " << p.second << "\n\n";
+        }
+        DBG("save_history: saved to '" << fname << "'");
+    }
+    // Expose a method to run conservative transitive inference and return inferred edges
+    vector<Edge> infer_transitive_edges(int maxDepth = 3) {
+        DBG_LINE();
+        std::lock_guard<std::mutex> lock(mtx);
+        return infer_transitives(id2, adj, explicit_edges, form_by_idpair, forbidden_inferred_rev, maxDepth);
+    }
+    // Small synthesis engine: given user input, find nearby nodes and generate assembled text.
+    // Corrected ChatMemory::synthesize_response — releases mutex before calling ingest_text(response)
+    string synthesize_response(const string &user_input) {
+        DBG("synthesize_response start user_input='" << user_input << "'");
+        // 1) ingest user input as knowledge first (ingest_text acquires its own lock internally)
+        ingest_text(user_input);
+        // After ingesting the user's text, attempt to auto-instantiate schemas based on any variable declarations
+        perform_auto_instantiations(user_input);
+        // 2) tokenize user input (case-folded)
+        string lc = lower_copy(user_input);
+        std::istringstream iss(lc);
+        vector<string> tokens;
+        string tok;
+        while (iss >> tok) tokens.push_back(tok);
+        DBG("synthesize_response tokens=" << tokens.size());
+        // 3) take a consistent snapshot of the shared graph/state under lock and then release
+        vector<string> id2_local;
+        vector<vector<int>> adj_local;
+        unordered_map<string,string> form_by_idpair_local;
+        unordered_map<string,int> id_local;
+        vector<Edge> edges_local;
+        {
+            std::lock_guard<std::mutex> lock(mtx);
+            id2_local = id2;
+            adj_local = adj;
+            form_by_idpair_local = form_by_idpair;
+            id_local = id;
+            edges_local = edges;
+            DBG("synthesize_response: snapshot copied: nodes=" << id2_local.size() << " edges=" << edges_local.size());
+        }
+        if (id2_local.empty()) { DBG("synthesize_response: id2_local empty"); return "I have no knowledge yet."; }
+        // Additional step: run implication-application analysis on the raw user input
+        // using a snapshot of explicit edges / node map taken above. This will
+        // produce a concise aggregation/report describing recursive applications.
+        string implication_report;
+        try {
+            implication_report = apply_implications_to_prompt_report(user_input, edges_local, id_local, id2_local);
+        } catch (...) {
+            implication_report = string(" (implication analysis failed due to internal error)\n");
+        }
+        // We'll append the implication report to the assistant response below (after composing outputs).
+        // Store it in a temporary variable in this scope.
+        // 4) find seed nodes by token matching against node labels (use snapshot)
+        unordered_set<int> seed_ids;
+        for (int i = 0; i < (int)id2_local.size(); ++i) {
+            string node_lc = lower_copy(id2_local[i]);
+            for (const string &t : tokens) {
+                if (t.size() >= 3 && node_lc.find(t) != string::npos) { seed_ids.insert(i); break; }
+            }
+        }
+        // 5) fallback heuristic if no seeds: choose top nodes by frequency in edges (use snapshot)
+        if (seed_ids.empty()) {
+            unordered_map<int,int> freq;
+            for (const auto &e : edges_local) {
+                auto itA = id_local.find(e.A), itB = id_local.find(e.B);
+                if (itA != id_local.end()) ++freq[itA->second];
+                if (itB != id_local.end()) ++freq[itB->second];
+            }
+            vector<pair<int,int>> freqv;
+            freqv.reserve(freq.size());
+            for (const auto &kv : freq) freqv.emplace_back(kv.first, kv.second);
+            std::sort(freqv.begin(), freqv.end(), [](const pair<int,int> &a, const pair<int,int> &b){
+                return a.second > b.second;
+            });
+            for (size_t i = 0; i < freqv.size() && i < 3; ++i) seed_ids.insert(freqv[i].first);
+            DBG("synthesize_response seed heuristic used: " << seed_ids.size() << " seeds");
+        } else {
+            DBG("synthesize_response found " << seed_ids.size() << " seeds from tokens");
+        }
+        // 6) BFS from seeds collecting short implication chains (avoid weak edges in chaining)
+        vector<string> outputs;
+        unordered_set<string> seen_stmt;
+        for (int sid : seed_ids) {
+            queue<tuple<int, vector<int>, bool>> q; // node, path, path_has_weak
+            q.push({sid, vector<int>{sid}, false});
+            int maxDepth = 3;
+            while (!q.empty()) {
+                auto [u, path, path_has_weak] = q.front(); q.pop();
+                if ((int)path.size() > 1) {
+                    int a = path.front();
+                    int c = path.back();
+                    string Aname = (a >= 0 && a < (int)id2_local.size()) ? id2_local[a] : "<node>";
+                    string Cname = (c >= 0 && c < (int)id2_local.size()) ? id2_local[c] : "<node>";
+                    if (!path_has_weak) {
+                        std::ostringstream ss;
+                        ss << Aname << " -> " << Cname << " (chain length=" << (path.size() - 1) << ")";
+                        string line = ss.str();
+                        if (seen_stmt.insert(line).second) outputs.push_back(line);
+                    }
+                }
+                if ((int)path.size() <= maxDepth) {
+                    if (u >= 0 && u < (int)adj_local.size()) {
+                        for (int w : adj_local[u]) {
+                            // avoid cycles
+                            if (std::find(path.begin(), path.end(), w) != path.end()) continue;
+                            string edgekey = std::to_string(u) + "->" + std::to_string(w);
+                            bool weak = false;
+                            auto itfb = form_by_idpair_local.find(edgekey);
+                            if (itfb != form_by_idpair_local.end()) {
+                                string lf = lower_copy(itfb->second);
+                                if (lf.find("[weak]") != string::npos || lf.find("probab") != string::npos || lf.find("correlat") != string::npos) weak = true;
+                            }
+                            vector<int> newpath = path; newpath.push_back(w);
+                            q.push({w, newpath, path_has_weak || weak});
+                        }
+                    }
+                }
+            }
+        }
+        // 7) Streamed / batched assistant output: print already-processed chunks before continuing.
+        // Also accumulate the full response in `response` (keeps behavior of ingesting the assistant text).
+        std::ostringstream response_acc;
+        const int MAX_SHOW = 12;
+        const int BATCH_SIZE = 4;
+        response_acc << "I processed your input and found the following relevant implication chains:\n";
+        std::string header = response_acc.str();
+        std::cout << "Assistant> " << header << std::flush;
+        std::string response; // final accumulated response string
+        // stream in batches of lines (not strictly line-by-line single-char streaming)
+        int shown = 0;
+        int total = (int)outputs.size();
+        if (total == 0) {
+            std::string note = " (No strong implication chains found; try rephrasing or providing domain-specific statements.)\n";
+            std::cout << note << std::flush;
+            response += header + note;
+        } else {
+            while (shown < std::min(total, MAX_SHOW)) {
+                int end = std::min(shown + BATCH_SIZE, std::min(total, MAX_SHOW));
+                std::ostringstream batch;
+                for (int i = shown; i < end; ++i) batch << " - " << outputs[i] << "\n";
+                std::string batch_str = batch.str();
+                // Print batch and flush so user sees progress before further processing
+                std::cout << batch_str << std::flush;
+                // Append to accumulated response
+                response += (shown == 0 ? header : std::string()) + batch_str;
+                // Move forward
+                shown = end;
+            }
+            // If there were more than MAX_SHOW, indicate truncation
+            if (total > MAX_SHOW) {
+                std::string more_note = std::string("... (") + std::to_string(total - MAX_SHOW) + " more chains omitted)\n";
+                std::cout << more_note << std::flush;
+                response += more_note;
+            }
+        }
+        // append the implication report (if any) and print it in one chunk
+        if (!implication_report.empty()) {
+            std::string sep = "\n";
+            std::cout << sep << implication_report << std::flush;
+            response += sep + implication_report;
+        }
+        // 8) Record assistant response into history (briefly lock) then ingest it as knowledge WITHOUT holding the lock
+        {
+            std::lock_guard<std::mutex> lock(mtx);
+            history.emplace_back(user_input, response);
+            DBG("synthesize_response: appended to history, history size=" << history.size());
+        }
+        // IMPORTANT: ingest_text will acquire mtx internally when merging — do NOT hold the lock here
+        ingest_text(response); // program's own outputs also become knowledge
+        DBG("synthesize_response complete, response length=" << response.size());
+        return response;
+    }
+};
+/* ---------------------------------- main ---------------------------------- */
+static void print_usage(const char *prog) {
+    std::cout << "Usage: " << prog << " [--debug] [--threads N] <input.txt>\n";
+    std::cout << "  --debug       Enable debug tracing to stderr (very verbose)\n";
+    std::cout << "  --threads N   Limit OpenMP threads (default: auto)\n";
+}
+int main(int argc, char** argv) {
+    // parse optional flags while preserving original behavior
+    if (argc < 2) { print_usage(argv[0]); return 1; }
+    string input_file;
+    int DICT_DEPTH = 2; // default: 2
+    for (int i = 1; i < argc; ++i) {
+        string a = argv[i];
+        if (a == "--debug") { GLOBAL_DEBUG = true; DBG("--debug enabled"); }
+        else if (a == "--threads" && i + 1 < argc) { GLOBAL_THREADS = std::stoi(argv[++i]); DBG("--threads set to " << GLOBAL_THREADS); }
+        else if (a == "--help" || a == "-h") { print_usage(argv[0]); return 0; }
+        else if (a == "--dict-depth" && i + 1 < argc) { DICT_DEPTH = std::max(0, std::stoi(argv[++i])); DBG("--dict-depth set to " << DICT_DEPTH); }
+        else if (input_file.empty()) input_file = a;
+        else { /* ignore extras */ }
+    }
+    if (input_file.empty()) { std::cerr << "Missing input file.\n"; print_usage(argv[0]); return 1; }
+#ifdef _OPENMP
+    if (GLOBAL_THREADS > 0) {
+        omp_set_num_threads(GLOBAL_THREADS);
+        DBG("OpenMP threads limited to " << GLOBAL_THREADS);
+    }
+#endif
+    std::ifstream in(input_file, std::ios::in | std::ios::binary);
+    if (!in) { std::cerr << "Cannot open file: " << input_file << "\n"; return 1; }
+    std::ostringstream ss;
+    ss << in.rdbuf();
+    string text = ss.str();
+    if (text.empty()) { std::cout << "Input empty.\n"; return 0; }
+    DBG("Loaded input file '" << input_file << "' size=" << text.size());
+    ChatMemory memory;
+    // set dictionary expansion depth from CLI
+    memory.set_dict_depth(DICT_DEPTH);
+    // ingest the main input.txt initially
+    memory.ingest_text(text);
+    // Build initial contrapositives and inferred edges for report generation if user wants
+    auto initial_contrapositives = build_contrapositives(memory.edges, memory.seen_keys);
+    std::cout << "Knowledge base initialized from '" << input_file << "' (" << memory.edges.size() << " explicit edges).\n";
+    std::cout << "Entering interactive chat mode. Type ':quit' to exit, ':save <file>' to save history, ':report' to print current report, ':history' to show conversation history.\n";
+    string line;
+    while (true) {
+        std::cout << "You> ";
+        if (!std::getline(std::cin, line)) break;
+        string input = trim(line);
+        if (input.empty()) continue;
+        if (input == ":quit" || input == ":exit") break;
+        if (input.rfind(":save ",0) == 0) {
+            string fname = trim(input.substr(6));
+            if (fname.empty()) fname = "chat_history.txt";
+            memory.save_history(fname);
+            std::cout << "Saved history to '" << fname << "'\n";
+            continue;
+        }
+        if (input == ":history") {
+            std::lock_guard<std::mutex> lock(memory.mtx);
+            if (memory.history.empty()) std::cout << "(no history yet)\n";
+            for (size_t i = 0; i < memory.history.size(); ++i) {
+                std::cout << "[" << (i+1) << "] User: " << memory.history[i].first << "\n";
+                std::cout << "    Assistant: " << memory.history[i].second << "\n\n";
+            }
+            continue;
+        }
+        if (input == ":report") {
+            auto inferred = memory.infer_transitive_edges(3);
+            // copy containers for reporting
+            std::lock_guard<std::mutex> lock(memory.mtx);
+            output_report(memory.edges, initial_contrapositives, inferred, memory.form_by_idpair, memory.id2, memory.explicit_edges, memory.forbidden_inferred_rev);
+            continue;
+        }
+        if (input.rfind(":export-graph",0) == 0) {
+            string fname = trim(input.substr(13)); if (fname.empty()) fname = "graph_edges.txt";
+            std::lock_guard<std::mutex> lock(memory.mtx);
+            std::ofstream out(fname);
+            for (const auto &e : memory.edges) out << e.A << " -> " << e.B << "   Form: " << e.form << "\n";
+            std::cout << "Exported graph to '" << fname << "'\n";
+            continue;
+        }
+        // Normal chat input: generate response using memory's synthesis engine
+        if (GLOBAL_DEBUG) std::cerr << "[DBG] main: calling synthesize_response for input='" << input << "'\n";
+        string assistant_reply = memory.synthesize_response(input);
+        std::cout << "Assistant> " << assistant_reply << std::endl;
+    }
+    return 0;
+}

ChatIPC.depend ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # depslib dependency file v1.0
2	+ 1773273223 source:c:\users\caleb p. nwokocha\documents\research documents\chatipc\dictionary.cpp
3	+

ChatIPC.layout ADDED Viewed

	@@ -0,0 +1,10 @@

+<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+<CodeBlocks_layout_file>
+	<FileVersion major="1" minor="0" />
+	<ActiveTarget name="Debug" />
+	<File name="ChatIPC.cpp" open="1" top="1" tabpos="1" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
+		<Cursor>
+			<Cursor1 position="44403" topLine="650" />
+		</Cursor>
+	</File>
+</CodeBlocks_layout_file>

Implicational propositional calculus - Wikipedia.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3de2d4e646c3b455feae9953322cda4981cb611bd513a9802be8660ae45ba98
+size 553077

Use only C.docx ADDED Viewed

Binary file (41.8 kB). View file

a.docx ADDED Viewed

Binary file (16 kB). View file

input.txt ADDED Viewed

The diff for this file is too large to render. See raw diff