File size: 14,688 Bytes
891669b
c5c90b5
 
 
 
 
 
 
 
 
fd30803
 
 
 
 
 
 
891669b
 
 
 
6eea076
 
 
 
486b786
 
 
 
891669b
 
 
 
486b786
891669b
 
486b786
891669b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd30803
 
891669b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5c90b5
 
 
 
891669b
 
adbf39e
 
 
891669b
 
 
 
 
 
 
 
 
 
 
 
adbf39e
 
 
891669b
 
 
486b786
 
 
 
adbf39e
 
 
 
 
 
 
 
 
 
 
 
486b786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adbf39e
 
486b786
 
 
 
adbf39e
486b786
 
 
 
adbf39e
486b786
 
 
 
 
adbf39e
486b786
 
 
 
adbf39e
 
486b786
adbf39e
486b786
 
 
adbf39e
486b786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
891669b
486b786
 
 
 
 
891669b
486b786
 
891669b
 
 
 
486b786
 
891669b
 
486b786
 
 
 
 
 
 
 
 
 
 
 
891669b
 
 
 
 
486b786
891669b
486b786
891669b
 
6eea076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# ---- Changelog ----
# [2026-04-16] Claude (Sonnet 4.6) β€” Tonic enabled (heuristic mode)
# What: tonic.enabled False→True in WORKER_SNN_CONFIG.
# Why: Gradio process is long-lived β€” substrate was dormant between spec
#      executions, nodes never warmed up, zeros everywhere. TonicEngine heuristic
#      mode (no transformer weights on HF) runs a background loop at 2s/0.5s
#      cadence: thread continuity + attractor pull + prediction tension +
#      exploration. Keeps substrate alive continuously.
# How: tonic_thread.py + tonic_engine.py vendored; openclaw_hook.py wired.
#      _concurrent_lock on graph serializes on_message vs Tonic engine.
# [2026-04-15] Claude (Sonnet 4.6) β€” v0.4.1 homeostasis audit values
# What: scaling_interval 100β†’25, threshold_ceiling 5.0 added, tonic disabled
# Why: scaling_interval=100 with ephemeral subprocess calls means homeostatic
#      scaling never fires β€” same root cause as CC hook silent-node bug.
#      threshold_ceiling prevents runaway threshold growth. Tonic disabled:
#      workers are ephemeral, no persistent process to accumulate attractor state.
# How: Config changes only, no structural changes.
# [2026-03-29] Forge (TQB) β€” Worker NeuroGraph configuration and lifecycle
# What: Dedicated NG instance for the Faux_Clawdbot worker with code-judgment-optimized SNN params
# Why: Worker needs its own isolated substrate tuned for code pattern learning, not conversation
# How: Wraps NeuroGraphMemory with worker-specific config, separate workspace, three-factor learning
# [2026-04-12] Codemine (BLK-CM-DUALPASS-001) β€” Dual-pass outcome recording
# What: WorkerEcosystem proxy + record_tool_outcome wired into spec_executor
# Why: Success/failure outcome signals never reached substrate β€” three_factor_enabled was wired but never fired
# How: WorkerEcosystem.record_outcome calls on_message (STDP traces) then inject_reward (factor 3)
# [2026-03-30] Josh + Claude β€” Dual-pass ingestion via ng_embed
# What: Tool results ingested with forest (gestalt) + trees (concepts) dual-pass methodology
# Why: Multi-resolution semantic search. Single-pass was operating with one eye closed.
# How: ng_embed.NGEmbed for embedding, OpenRouter for concept extraction, both passes through on_message
# -------------------

from __future__ import annotations

import json
import logging
import os
import requests
from pathlib import Path
from typing import Any, Dict, List, Optional

from openclaw_hook import NeuroGraphMemory

logger = logging.getLogger("worker_ng")

# Worker-specific SNN config β€” tuned for code-judgment learning
WORKER_SNN_CONFIG = {
    # Higher learning rate β€” code patterns are more structured than conversation
    "learning_rate": 0.03,
    # Shorter causal windows β€” code dependencies are tighter
    "tau_plus": 10.0,
    "tau_minus": 10.0,
    # Stronger LTP/LTD β€” code patterns should be learned faster
    "A_plus": 1.2,
    "A_minus": 1.4,
    # Standard decay
    "decay_rate": 0.95,
    "default_threshold": 1.0,
    "refractory_period": 2,
    "max_weight": 5.0,
    "target_firing_rate": 0.05,
    "scaling_interval": 25,        # v0.4.1: lowered from 100 β€” homeostatic scaling fires more often
    "threshold_ceiling": 5.0,      # v0.4.1: prevents runaway threshold growth
    "weight_threshold": 0.01,
    "grace_period": 500,
    "inactivity_threshold": 1000,
    "co_activation_window": 5,
    "initial_sprouting_weight": 0.1,
    # Predictive coding
    "prediction_threshold": 3.0,
    "prediction_pre_charge_factor": 0.3,
    "prediction_window": 10,
    "prediction_chain_decay": 0.7,
    "prediction_max_chain_depth": 3,
    "prediction_confirm_bonus": 0.01,
    "prediction_error_penalty": 0.02,
    "prediction_max_active": 1000,
    "surprise_sprouting_weight": 0.1,
    # THREE-FACTOR LEARNING ENABLED β€” worker should learn from delayed feedback
    "three_factor_enabled": True,
    # Hypergraph
    "he_pattern_completion_strength": 0.3,
    "he_member_weight_lr": 0.05,
    "he_threshold_lr": 0.01,
    "he_discovery_window": 10,
    "he_discovery_min_co_fires": 5,
    "he_discovery_min_nodes": 3,
    "he_consolidation_overlap": 0.8,
    "he_experience_threshold": 100,
    # Tonic enabled β€” Gradio process is long-lived (same reasoning as CC daemon).
    # No transformer weights on HF Spaces β†’ TonicEngine runs heuristic mode
    # automatically. Background loop at 2s idle / 0.5s active.
    "tonic": {"enabled": True},
}

# Default to local data/ dir. HF Spaces sets NEUROGRAPH_WORKSPACE_DIR=/data/neurograph_worker
_DEFAULT_WORKSPACE = str(Path(__file__).resolve().parent / "data" / "neurograph_worker")
WORKER_NG_WORKSPACE = os.getenv("NEUROGRAPH_WORKSPACE_DIR", _DEFAULT_WORKSPACE)


def get_worker_ng() -> NeuroGraphMemory:
    """Get or create the worker's dedicated NeuroGraph instance.

    Uses a separate workspace dir from any ecosystem NG.
    Persists to /data/ on HF Spaces (survives container restarts).
    """
    instance = NeuroGraphMemory.get_instance(
        workspace_dir=WORKER_NG_WORKSPACE,
        config=WORKER_SNN_CONFIG,
    )
    # Save after every 100 messages β€” dual-pass sends dozens of on_message calls
    # per tool result (1 forest + N concept trees), so 5 was causing constant disk I/O
    instance.auto_save_interval = 100
    return instance


# ---------------------------------------------------------------------------
# Concept extraction config β€” OpenRouter for dual-pass Pass 2
# ---------------------------------------------------------------------------

def _get_extraction_config():
    """Lazy config β€” reads env vars at call time, after dotenv has loaded."""
    return {
        "endpoint": os.getenv("EXTRACTION_ENDPOINT", "https://openrouter.ai/api/v1/chat/completions"),
        "api_key": os.getenv("OPENROUTER_API_KEY", ""),
        "model": os.getenv("EXTRACTION_MODEL", "google/gemini-2.0-flash-001"),
        "max_content": 8000,
        "max_concepts": 100,
        "timeout": 30,
        "temperature": 0.2,
        "max_tokens": 2000,
    }

_EXTRACTION_PROMPT = """Extract the key concepts, terms, and specific references from this text. Return them as a JSON array of short strings, each one a distinct concept or term mentioned in the text.

Focus on:
- Specific technical terms
- Named entities (people, tools, systems)
- Domain-specific concepts
- Action descriptions
- Relationships between things

Return ONLY a JSON array of strings. No explanation. Example: ["concept one", "concept two", "specific term"]

Text:
{content}"""


def _extract_concepts(text: str) -> List[str]:
    """Extract concepts via OpenRouter LLM call (Pass 2 of dual-pass).

    Returns list of concept strings, or empty list on failure (non-fatal).
    """
    cfg = _get_extraction_config()
    api_key = cfg["api_key"]
    if not api_key:
        logger.debug("No OPENROUTER_API_KEY β€” skipping concept extraction (single-pass only)")
        return []

    content = text[:cfg["max_content"]]
    prompt = _EXTRACTION_PROMPT.format(content=content)

    try:
        resp = requests.post(
            cfg["endpoint"],
            headers={
                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/json",
            },
            json={
                "model": cfg["model"],
                "messages": [
                    {"role": "system", "content": "You extract concepts from text. Return only a JSON array of strings."},
                    {"role": "user", "content": prompt},
                ],
                "temperature": cfg["temperature"],
                "max_tokens": cfg["max_tokens"],
            },
            timeout=cfg["timeout"],
        )
        resp.raise_for_status()
        response_text = resp.json()["choices"][0]["message"]["content"].strip()
        return _parse_concepts(response_text)[:cfg["max_concepts"]]
    except Exception as exc:
        logger.debug("Concept extraction failed (non-fatal): %s", exc)
        return []


def _parse_concepts(text: str) -> List[str]:
    """Parse a JSON array from LLM response, handling markdown fences."""
    text = text.strip()
    if text.startswith("```"):
        lines = text.split("\n")
        lines = [l for l in lines if not l.strip().startswith("```")]
        text = "\n".join(lines).strip()
    try:
        result = json.loads(text)
        if isinstance(result, list):
            return [str(c).strip() for c in result if str(c).strip()]
    except json.JSONDecodeError:
        start = text.find("[")
        end = text.rfind("]") + 1
        if start >= 0 and end > start:
            try:
                result = json.loads(text[start:end])
                if isinstance(result, list):
                    return [str(c).strip() for c in result if str(c).strip()]
            except json.JSONDecodeError:
                pass
    return []


# ---------------------------------------------------------------------------
# Dual-pass ingestion
# ---------------------------------------------------------------------------

def ingest_tool_result(ng: NeuroGraphMemory, tool_name: str, args: dict, result: str):
    """Dual-pass ingest a tool execution into the worker's substrate.

    Pass 1 (Forest): Gestalt β€” the full tool experience as one ingestion.
    Pass 2 (Trees): Concepts extracted via OpenRouter β€” each concept ingested
    separately, creating concept-level semantic nodes in the substrate.

    Raw experience in for both passes. No classification. Law 7.
    Falls back to single-pass (forest only) if OpenRouter unavailable.
    """
    # Truncate large results to avoid flooding the substrate
    result_preview = result[:2000] if len(result) > 2000 else result
    raw_experience = f"Tool: {tool_name}\nInput: {args}\nResult: {result_preview}"

    # Pass 1: Forest β€” the gestalt experience
    ng.on_message(raw_experience)

    # Pass 2: Trees β€” concept extraction via OpenRouter
    concepts = _extract_concepts(raw_experience)
    if concepts:
        for concept in concepts:
            # Each concept enters the substrate as its own raw experience
            # Linked to the forest naturally through temporal co-activation
            # (they fire close together in time β†’ Hebbian β†’ synapses form)
            ng.on_message(f"concept: {concept}")
        logger.info("  Dual-pass: forest + %d trees for %s", len(concepts), tool_name)
    else:
        logger.info("  Single-pass (no OPENROUTER_API_KEY) for %s", tool_name)


def recall_context(ng: NeuroGraphMemory, tool_name: str, context: str, k: int = 3) -> list:
    """Recall relevant past experience before a tool call.

    Drains the bucket. Returns what the substrate has learned.
    Uses query prefix for better retrieval (ng_embed convention).
    """
    from ng_embed import embed
    query = f"{tool_name} {context}"
    return ng.recall(query, k=k, threshold=0.4)


# ---------------------------------------------------------------------------
# Dual-pass outcome recording
# ---------------------------------------------------------------------------

class WorkerEcosystem:
    """Minimal NGEcosystem proxy for dual_record_outcome compatibility.

    NeuroGraphMemory (vendored) wraps neuro_foundation.py's Graph, which
    does not have record_outcome(). This proxy implements the interface
    that ng_embed.dual_record_outcome() expects using the full SNN
    three-factor learning loop that the worker was built for.

    Law 7: raw experience in β€” no classification before substrate ingestion.
    The outcome-labeled message is semantic content. The substrate learns
    'tool:X passed' / 'tool:X failed' as raw experience, which is correct.
    """

    def __init__(self, ng: NeuroGraphMemory):
        self._ng = ng

    def record_outcome(
        self,
        embedding,
        target_id: str,
        success: bool,
        strength: float = 1.0,
        metadata: Optional[Dict] = None,
    ) -> Dict:
        """Record outcome via full SNN three-factor learning loop.

        Factor 1+2 (STDP): ng.on_message() ingests the outcome experience,
        fires nodes, and builds eligibility traces via STDP.

        Factor 3 (Reward): ng.graph.inject_reward() broadcasts the reward
        signal β€” positive for success, negative (half-strength) for failure.
        Final weight change: Ξ”w = eligibility_trace Γ— reward Γ— learning_rate.

        This activates three_factor_enabled=True in WORKER_SNN_CONFIG, which
        has been wired but never triggered until now.
        """
        outcome_label = "success" if success else "failure"
        experience = f"outcome: {target_id} {outcome_label}"
        # Tree-level calls include the concept for richer semantic signal
        if metadata and metadata.get("_concept"):
            concept = metadata["_concept"]
            experience = f"outcome: {target_id} concept:{concept} {outcome_label}"
        # Factor 1+2: ingest experience, STDP builds eligibility traces
        self._ng.on_message(experience)
        # Factor 3: inject reward β€” confirms or rejects the eligibility traces
        # Half-strength penalty on failure avoids catastrophic forgetting
        reward = strength if success else -strength * 0.5
        self._ng.graph.inject_reward(reward)
        return {"target_id": target_id, "success": success, "reward": reward, "ingested": True}


def record_tool_outcome(
    ng: NeuroGraphMemory,
    tool_name: str,
    target_id: str,
    success: bool,
    strength: float = 1.0,
    context: str = "",
) -> None:
    """Record tool execution outcome via dual-pass to the worker substrate.

    Called after step validation β€” success=True if validation passed,
    success=False if failed. Uses dual_record_outcome() from ng_embed.py
    for forest (gestalt) + tree (concept) outcome recording.

    Silent on failure β€” a dead embedding service must never interrupt
    the spec executor's flow.
    """
    try:
        from ng_embed import NGEmbed, embed
        content = f"tool:{tool_name} step:{target_id} {'PASS' if success else 'FAIL'}"
        if context:
            content += f" context:{context[:200]}"
        embedding = embed(content)
        eco = WorkerEcosystem(ng)
        NGEmbed.get_instance().dual_record_outcome(
            ecosystem=eco,
            content=content,
            embedding=embedding,
            target_id=target_id,
            success=success,
            strength=strength,
        )
        logger.info(
            "  Outcome recorded: %s %s", target_id, "PASS" if success else "FAIL"
        )
    except Exception as exc:
        logger.warning("  Outcome recording failed (non-fatal): %s", exc)