Spaces:
Paused
Paused
Claude Code Claude Opus 4.6 commited on
Commit ·
c471048
1
Parent(s): 919ba9c
god: implement SANITY CHECK Mode — Break Source-Analysis Loop by Forcing Runtime Inspection
Browse filesFixes the deadlock where agents discuss source code (app.py, imports, structure) without
verifying against the actual runtime environment. The "Sanity Check" injection forces
agents to interact with the runtime before touching source code again.
Key changes:
- Add global state variables for sanity check mode (_sanity_check_mode, _sanity_check_required, etc.)
- Detection logic: Identify source-analysis pattern (discussing code structure) without runtime verification
- Enforcement: Block source modifications until runtime inspection (ls -la /app) is executed
- Clear requirement flag when runtime command is detected in task
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- scripts/conversation-loop.py +77 -1
scripts/conversation-loop.py
CHANGED
|
@@ -155,6 +155,13 @@ _lockdown_push_count_at_error = 0 # Push count when error started (to detect if
|
|
| 155 |
LOCKDOWN_ERROR_THRESHOLD_SECS = 600 # 10 minutes in error state without effective pushes triggers LOCKDOWN
|
| 156 |
LOCKDOWN_RESET_SECS = 180 # 3 minutes — LOCKDOWN lasts this long before returning to normal
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
def _init_push_count_from_workspace():
|
| 159 |
"""Initialize push count from existing workspace commits.
|
| 160 |
This persists push tracking across conversation loop restarts."""
|
|
@@ -946,6 +953,14 @@ WORKER_HEARTBEAT_TIMEOUT = 30 # seconds before triggering diagnostic review
|
|
| 946 |
|
| 947 |
def cc_submit_task(task, assigned_by, ctx):
|
| 948 |
"""Submit a task to Claude Code in background. Non-blocking."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 949 |
with cc_lock:
|
| 950 |
if cc_status["running"]:
|
| 951 |
return "BUSY: Claude Code is already working on a task. Wait for it to finish."
|
|
@@ -2083,7 +2098,7 @@ def build_turn_message(speaker, other, ctx):
|
|
| 2083 |
(SOUL.md, IDENTITY.md, workspace/memory/). This message provides only
|
| 2084 |
context and turn instructions.
|
| 2085 |
"""
|
| 2086 |
-
global _pending_task_just_submitted, _pending_task_timestamp, _pending_task_speaker, _pending_task_desc, _discussion_loop_count
|
| 2087 |
parts = []
|
| 2088 |
|
| 2089 |
# Brief role context (supplements agent's SOUL.md until it's fully configured)
|
|
@@ -2377,6 +2392,23 @@ RULES:
|
|
| 2377 |
parts.append(f"DO NOT discuss. DO NOT plan. Write task ONLY.")
|
| 2378 |
parts.append(f"SYSTEM OVERRIDE: PLANNING SUSPENDED. EXECUTE PUSH NOW.")
|
| 2379 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2380 |
return "\n".join(parts)
|
| 2381 |
|
| 2382 |
|
|
@@ -2887,6 +2919,50 @@ This is a PURGE & REBOOT — start fresh, don't reference previous failed attemp
|
|
| 2887 |
_lockdown_error_onset = 0
|
| 2888 |
_lockdown_push_count_at_error = 0
|
| 2889 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2890 |
# Note: Aggressive CC auto-termination based on push frequency is removed.
|
| 2891 |
# God monitors push frequency and proposes mechanism fixes when needed.
|
| 2892 |
# The normal CLAUDE_TIMEOUT auto-kill in do_turn() handles truly stuck processes.
|
|
|
|
| 155 |
LOCKDOWN_ERROR_THRESHOLD_SECS = 600 # 10 minutes in error state without effective pushes triggers LOCKDOWN
|
| 156 |
LOCKDOWN_RESET_SECS = 180 # 3 minutes — LOCKDOWN lasts this long before returning to normal
|
| 157 |
|
| 158 |
+
# SANITY CHECK Mode — Break source-analysis loop by forcing runtime inspection
|
| 159 |
+
# Detects when agents are stuck discussing source code without verifying against runtime
|
| 160 |
+
_sanity_check_mode = False # When True, require runtime inspection before source modifications
|
| 161 |
+
_sanity_check_trigger_time = 0.0 # When SANITY CHECK was triggered
|
| 162 |
+
_sanity_check_required = True # Whether runtime inspection is still required
|
| 163 |
+
SANITY_CHECK_RESET_SECS = 180 # 3 minutes — SANITY CHECK lasts this long before returning to normal
|
| 164 |
+
|
| 165 |
def _init_push_count_from_workspace():
|
| 166 |
"""Initialize push count from existing workspace commits.
|
| 167 |
This persists push tracking across conversation loop restarts."""
|
|
|
|
| 953 |
|
| 954 |
def cc_submit_task(task, assigned_by, ctx):
|
| 955 |
"""Submit a task to Claude Code in background. Non-blocking."""
|
| 956 |
+
global _sanity_check_required
|
| 957 |
+
|
| 958 |
+
# SANITY CHECK: Detect runtime command and clear the requirement flag
|
| 959 |
+
runtime_command_keywords = ["ls -la", "ls /app", "pwd", "cat /app", "docker", "whoami", "env"]
|
| 960 |
+
if _sanity_check_required and any(kw in task.lower() for kw in runtime_command_keywords):
|
| 961 |
+
print(f"[SANITY-CHECK] Runtime command detected in task, clearing requirement flag")
|
| 962 |
+
_sanity_check_required = False
|
| 963 |
+
|
| 964 |
with cc_lock:
|
| 965 |
if cc_status["running"]:
|
| 966 |
return "BUSY: Claude Code is already working on a task. Wait for it to finish."
|
|
|
|
| 2098 |
(SOUL.md, IDENTITY.md, workspace/memory/). This message provides only
|
| 2099 |
context and turn instructions.
|
| 2100 |
"""
|
| 2101 |
+
global _pending_task_just_submitted, _pending_task_timestamp, _pending_task_speaker, _pending_task_desc, _discussion_loop_count, _sanity_check_mode, _sanity_check_required
|
| 2102 |
parts = []
|
| 2103 |
|
| 2104 |
# Brief role context (supplements agent's SOUL.md until it's fully configured)
|
|
|
|
| 2392 |
parts.append(f"DO NOT discuss. DO NOT plan. Write task ONLY.")
|
| 2393 |
parts.append(f"SYSTEM OVERRIDE: PLANNING SUSPENDED. EXECUTE PUSH NOW.")
|
| 2394 |
|
| 2395 |
+
# SANITY CHECK PROTOCOL: Force runtime inspection to break source-analysis loop
|
| 2396 |
+
# When triggered, agents MUST execute a runtime command before any source code modifications
|
| 2397 |
+
if _sanity_check_mode:
|
| 2398 |
+
parts.append(f"\n🔍🔍🔍 SANITY CHECK: RUNTIME INSPECTION REQUIRED 🔍🔍🔍")
|
| 2399 |
+
parts.append(f"Source-analysis loop detected. Agents are discussing code without verifying against the runtime environment.")
|
| 2400 |
+
if _sanity_check_required:
|
| 2401 |
+
parts.append(f"\n🛑 STOP ALL SOURCE CODE MODIFICATIONS!")
|
| 2402 |
+
parts.append(f"MANDATE: The next task assigned MUST be a runtime inspection command.")
|
| 2403 |
+
parts.append(f"")
|
| 2404 |
+
parts.append(f"Write [TASK]...[/TASK] with EXACTLY this command:")
|
| 2405 |
+
parts.append(f"[TASK] Execute 'ls -la /app' to list the directory structure of the running container. Post the full output. [/TASK]")
|
| 2406 |
+
parts.append(f"")
|
| 2407 |
+
parts.append(f"⛔ NO app.py modifications. NO source code edits. ONLY runtime inspection.")
|
| 2408 |
+
parts.append(f"This establishes GROUND TRUTH required to break the deadlock.")
|
| 2409 |
+
else:
|
| 2410 |
+
parts.append(f"Runtime inspection complete. You may now proceed with source code modifications based on the verified runtime state.")
|
| 2411 |
+
|
| 2412 |
return "\n".join(parts)
|
| 2413 |
|
| 2414 |
|
|
|
|
| 2919 |
_lockdown_error_onset = 0
|
| 2920 |
_lockdown_push_count_at_error = 0
|
| 2921 |
|
| 2922 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 2923 |
+
# SANITY CHECK MODE — Break source-analysis loop by forcing runtime inspection
|
| 2924 |
+
# Detects when agents are stuck discussing source code without verifying against runtime
|
| 2925 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 2926 |
+
# Note: _sanity_check_mode, _sanity_check_trigger_time, _sanity_check_required are module-level globals
|
| 2927 |
+
|
| 2928 |
+
# SANITY CHECK TRIGGER: Detect source-analysis loop
|
| 2929 |
+
# Pattern: Agents discussing source code (app.py, imports, structure) without runtime verification
|
| 2930 |
+
# Detection: Check recent conversation for source-analysis keywords with NO runtime commands
|
| 2931 |
+
if not _sanity_check_mode and not _force_push_mode and not _lockdown_mode:
|
| 2932 |
+
if len(history) >= 3 and _discussion_loop_count >= 2:
|
| 2933 |
+
# Check if recent conversation shows source-analysis pattern
|
| 2934 |
+
recent_texts = " ".join(h.get("text", "") for h in history[-3:])
|
| 2935 |
+
recent_lower = recent_texts.lower()
|
| 2936 |
+
|
| 2937 |
+
# Source-analysis keywords: discussing code structure without runtime verification
|
| 2938 |
+
source_analysis_keywords = [
|
| 2939 |
+
"app.py", "import", "function", "class", "structure", "file",
|
| 2940 |
+
"code shows", "the code", "let me check", "according to the code",
|
| 2941 |
+
"looking at the code", "the file", "source", "implementation"
|
| 2942 |
+
]
|
| 2943 |
+
|
| 2944 |
+
# Runtime command keywords: evidence of actual runtime interaction
|
| 2945 |
+
runtime_keywords = [
|
| 2946 |
+
"[task]", "ls -la", "pwd", "cat /app", "ls /app", "docker",
|
| 2947 |
+
"container", "runtime", "executed", "ran command", "output shows"
|
| 2948 |
+
]
|
| 2949 |
+
|
| 2950 |
+
has_source_analysis = any(kw in recent_lower for kw in source_analysis_keywords)
|
| 2951 |
+
has_runtime_verification = any(kw in recent_lower for kw in runtime_keywords)
|
| 2952 |
+
|
| 2953 |
+
# Trigger sanity check if: analyzing source BUT no runtime verification
|
| 2954 |
+
if has_source_analysis and not has_runtime_verification:
|
| 2955 |
+
print(f"[SANITY-CHECK] TRIGGERED! Source-analysis loop detected ({_discussion_loop_count} turns). Agents discussing code without runtime verification. Forcing runtime inspection.")
|
| 2956 |
+
_sanity_check_mode = True
|
| 2957 |
+
_sanity_check_trigger_time = time.time()
|
| 2958 |
+
_sanity_check_required = True
|
| 2959 |
+
|
| 2960 |
+
# Reset SANITY CHECK mode after timeout (safety valve)
|
| 2961 |
+
if _sanity_check_mode and time.time() - _sanity_check_trigger_time > SANITY_CHECK_RESET_SECS:
|
| 2962 |
+
print(f"[SANITY-CHECK] Mode timeout ({SANITY_CHECK_RESET_SECS}s), resetting to normal")
|
| 2963 |
+
_sanity_check_mode = False
|
| 2964 |
+
_sanity_check_required = False
|
| 2965 |
+
|
| 2966 |
# Note: Aggressive CC auto-termination based on push frequency is removed.
|
| 2967 |
# God monitors push frequency and proposes mechanism fixes when needed.
|
| 2968 |
# The normal CLAUDE_TIMEOUT auto-kill in do_turn() handles truly stuck processes.
|