Sync from GitHub: 5cd7057701f24a01666175e320f8d43de9c7e3ee
Browse files
app.py
CHANGED
|
@@ -5,6 +5,17 @@ The organism. NeuroGraph substrate + KISS bucket + Pith bucket +
|
|
| 5 |
Splat-Lenia + BitNet model. On CPU. Gets smarter over time.
|
| 6 |
|
| 7 |
# ---- Changelog ----
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# [2026-05-22] Claude Opus 4.7 (1M ctx) — D2: drop conversation history from prompt (3 sites)
|
| 9 |
# The 2026-05-16 pith→user-turn fix deployed cleanly (Run 49 confirmed:
|
| 10 |
# "My actual question:" label from the new template appearing IN BitNet
|
|
@@ -1681,6 +1692,14 @@ def on_interleaved_benchmark(
|
|
| 1681 |
# False). Watch cross-run count: should drop over runs as
|
| 1682 |
# substrate LTDs degenerate-producing pathways.
|
| 1683 |
"response_quality": "degenerate" if _response_degenerate else "clean",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1684 |
# Run 41+ predictive-coding telemetry — surface what step_result
|
| 1685 |
# already carries about predictions plus a snapshot of active
|
| 1686 |
# predictions on the graph. If all 0/0/0 across all turns, the
|
|
|
|
| 5 |
Splat-Lenia + BitNet model. On CPU. Gets smarter over time.
|
| 6 |
|
| 7 |
# ---- Changelog ----
|
| 8 |
+
# [2026-05-23] Claude Opus 4.7 (1M ctx) — Add response_text to per-turn JSON
|
| 9 |
+
# Run 50 sidecar showed 24/24 response_quality flagged degenerate even
|
| 10 |
+
# though token-savings, recall axis, and visible-in-pith resp_* snippets
|
| 11 |
+
# suggested most responses were functional. Root of the ambiguity: the
|
| 12 |
+
# `surfaced_context` field shows pith items trimmed to max_chars_per_context
|
| 13 |
+
# (default ~300), so trailing degeneracy past that boundary isn't visible
|
| 14 |
+
# in the JSON output. The detector evaluates the FULL response text,
|
| 15 |
+
# so it sees pathology we don't. Fix: add `response_text` field to the
|
| 16 |
+
# per-turn JSON (capped at 1500 chars) so the detector's signal can be
|
| 17 |
+
# verified against the actual generated text. Single field addition in
|
| 18 |
+
# on_interleaved_benchmark; no logic changes.
|
| 19 |
# [2026-05-22] Claude Opus 4.7 (1M ctx) — D2: drop conversation history from prompt (3 sites)
|
| 20 |
# The 2026-05-16 pith→user-turn fix deployed cleanly (Run 49 confirmed:
|
| 21 |
# "My actual question:" label from the new template appearing IN BitNet
|
|
|
|
| 1692 |
# False). Watch cross-run count: should drop over runs as
|
| 1693 |
# substrate LTDs degenerate-producing pathways.
|
| 1694 |
"response_quality": "degenerate" if _response_degenerate else "clean",
|
| 1695 |
+
# Run 50+ ground-truth instrumentation. _surfaced_context shows
|
| 1696 |
+
# pith items trimmed to max_chars_per_context (default ~300),
|
| 1697 |
+
# so when response_quality flags degenerate but the trimmed
|
| 1698 |
+
# surfaced text looks clean, we can't tell if it's a false
|
| 1699 |
+
# positive or trailing-degeneracy hidden by truncation. Capping
|
| 1700 |
+
# at 1500 chars keeps JSON payload reasonable while showing
|
| 1701 |
+
# enough of each response to verify what the detector saw.
|
| 1702 |
+
"response_text": (resp_nw[:1500] if resp_nw else ""),
|
| 1703 |
# Run 41+ predictive-coding telemetry — surface what step_result
|
| 1704 |
# already carries about predictions plus a snapshot of active
|
| 1705 |
# predictions on the graph. If all 0/0/0 across all turns, the
|