Claude Code Claude Opus 4.6 commited on
Commit
c471048
·
1 Parent(s): 919ba9c

god: implement SANITY CHECK Mode — Break Source-Analysis Loop by Forcing Runtime Inspection

Browse files

Fixes the deadlock where agents discuss source code (app.py, imports, structure) without
verifying against the actual runtime environment. The "Sanity Check" injection forces
agents to interact with the runtime before touching source code again.

Key changes:
- Add global state variables for sanity check mode (_sanity_check_mode, _sanity_check_required, etc.)
- Detection logic: Identify source-analysis pattern (discussing code structure) without runtime verification
- Enforcement: Block source modifications until runtime inspection (ls -la /app) is executed
- Clear requirement flag when runtime command is detected in task

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. scripts/conversation-loop.py +77 -1
scripts/conversation-loop.py CHANGED
@@ -155,6 +155,13 @@ _lockdown_push_count_at_error = 0 # Push count when error started (to detect if
155
  LOCKDOWN_ERROR_THRESHOLD_SECS = 600 # 10 minutes in error state without effective pushes triggers LOCKDOWN
156
  LOCKDOWN_RESET_SECS = 180 # 3 minutes — LOCKDOWN lasts this long before returning to normal
157
 
 
 
 
 
 
 
 
158
  def _init_push_count_from_workspace():
159
  """Initialize push count from existing workspace commits.
160
  This persists push tracking across conversation loop restarts."""
@@ -946,6 +953,14 @@ WORKER_HEARTBEAT_TIMEOUT = 30 # seconds before triggering diagnostic review
946
 
947
  def cc_submit_task(task, assigned_by, ctx):
948
  """Submit a task to Claude Code in background. Non-blocking."""
 
 
 
 
 
 
 
 
949
  with cc_lock:
950
  if cc_status["running"]:
951
  return "BUSY: Claude Code is already working on a task. Wait for it to finish."
@@ -2083,7 +2098,7 @@ def build_turn_message(speaker, other, ctx):
2083
  (SOUL.md, IDENTITY.md, workspace/memory/). This message provides only
2084
  context and turn instructions.
2085
  """
2086
- global _pending_task_just_submitted, _pending_task_timestamp, _pending_task_speaker, _pending_task_desc, _discussion_loop_count
2087
  parts = []
2088
 
2089
  # Brief role context (supplements agent's SOUL.md until it's fully configured)
@@ -2377,6 +2392,23 @@ RULES:
2377
  parts.append(f"DO NOT discuss. DO NOT plan. Write task ONLY.")
2378
  parts.append(f"SYSTEM OVERRIDE: PLANNING SUSPENDED. EXECUTE PUSH NOW.")
2379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2380
  return "\n".join(parts)
2381
 
2382
 
@@ -2887,6 +2919,50 @@ This is a PURGE & REBOOT — start fresh, don't reference previous failed attemp
2887
  _lockdown_error_onset = 0
2888
  _lockdown_push_count_at_error = 0
2889
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2890
  # Note: Aggressive CC auto-termination based on push frequency is removed.
2891
  # God monitors push frequency and proposes mechanism fixes when needed.
2892
  # The normal CLAUDE_TIMEOUT auto-kill in do_turn() handles truly stuck processes.
 
155
  LOCKDOWN_ERROR_THRESHOLD_SECS = 600 # 10 minutes in error state without effective pushes triggers LOCKDOWN
156
  LOCKDOWN_RESET_SECS = 180 # 3 minutes — LOCKDOWN lasts this long before returning to normal
157
 
158
+ # SANITY CHECK Mode — Break source-analysis loop by forcing runtime inspection
159
+ # Detects when agents are stuck discussing source code without verifying against runtime
160
+ _sanity_check_mode = False # When True, require runtime inspection before source modifications
161
+ _sanity_check_trigger_time = 0.0 # When SANITY CHECK was triggered
162
+ _sanity_check_required = True # Whether runtime inspection is still required
163
+ SANITY_CHECK_RESET_SECS = 180 # 3 minutes — SANITY CHECK lasts this long before returning to normal
164
+
165
  def _init_push_count_from_workspace():
166
  """Initialize push count from existing workspace commits.
167
  This persists push tracking across conversation loop restarts."""
 
953
 
954
  def cc_submit_task(task, assigned_by, ctx):
955
  """Submit a task to Claude Code in background. Non-blocking."""
956
+ global _sanity_check_required
957
+
958
+ # SANITY CHECK: Detect runtime command and clear the requirement flag
959
+ runtime_command_keywords = ["ls -la", "ls /app", "pwd", "cat /app", "docker", "whoami", "env"]
960
+ if _sanity_check_required and any(kw in task.lower() for kw in runtime_command_keywords):
961
+ print(f"[SANITY-CHECK] Runtime command detected in task, clearing requirement flag")
962
+ _sanity_check_required = False
963
+
964
  with cc_lock:
965
  if cc_status["running"]:
966
  return "BUSY: Claude Code is already working on a task. Wait for it to finish."
 
2098
  (SOUL.md, IDENTITY.md, workspace/memory/). This message provides only
2099
  context and turn instructions.
2100
  """
2101
+ global _pending_task_just_submitted, _pending_task_timestamp, _pending_task_speaker, _pending_task_desc, _discussion_loop_count, _sanity_check_mode, _sanity_check_required
2102
  parts = []
2103
 
2104
  # Brief role context (supplements agent's SOUL.md until it's fully configured)
 
2392
  parts.append(f"DO NOT discuss. DO NOT plan. Write task ONLY.")
2393
  parts.append(f"SYSTEM OVERRIDE: PLANNING SUSPENDED. EXECUTE PUSH NOW.")
2394
 
2395
+ # SANITY CHECK PROTOCOL: Force runtime inspection to break source-analysis loop
2396
+ # When triggered, agents MUST execute a runtime command before any source code modifications
2397
+ if _sanity_check_mode:
2398
+ parts.append(f"\n🔍🔍🔍 SANITY CHECK: RUNTIME INSPECTION REQUIRED 🔍🔍🔍")
2399
+ parts.append(f"Source-analysis loop detected. Agents are discussing code without verifying against the runtime environment.")
2400
+ if _sanity_check_required:
2401
+ parts.append(f"\n🛑 STOP ALL SOURCE CODE MODIFICATIONS!")
2402
+ parts.append(f"MANDATE: The next task assigned MUST be a runtime inspection command.")
2403
+ parts.append(f"")
2404
+ parts.append(f"Write [TASK]...[/TASK] with EXACTLY this command:")
2405
+ parts.append(f"[TASK] Execute 'ls -la /app' to list the directory structure of the running container. Post the full output. [/TASK]")
2406
+ parts.append(f"")
2407
+ parts.append(f"⛔ NO app.py modifications. NO source code edits. ONLY runtime inspection.")
2408
+ parts.append(f"This establishes GROUND TRUTH required to break the deadlock.")
2409
+ else:
2410
+ parts.append(f"Runtime inspection complete. You may now proceed with source code modifications based on the verified runtime state.")
2411
+
2412
  return "\n".join(parts)
2413
 
2414
 
 
2919
  _lockdown_error_onset = 0
2920
  _lockdown_push_count_at_error = 0
2921
 
2922
+ # ══════════════════════════════════════════════════════════════════════════════
2923
+ # SANITY CHECK MODE — Break source-analysis loop by forcing runtime inspection
2924
+ # Detects when agents are stuck discussing source code without verifying against runtime
2925
+ # ══════════════════════════════════════════════════════════════════════════════
2926
+ # Note: _sanity_check_mode, _sanity_check_trigger_time, _sanity_check_required are module-level globals
2927
+
2928
+ # SANITY CHECK TRIGGER: Detect source-analysis loop
2929
+ # Pattern: Agents discussing source code (app.py, imports, structure) without runtime verification
2930
+ # Detection: Check recent conversation for source-analysis keywords with NO runtime commands
2931
+ if not _sanity_check_mode and not _force_push_mode and not _lockdown_mode:
2932
+ if len(history) >= 3 and _discussion_loop_count >= 2:
2933
+ # Check if recent conversation shows source-analysis pattern
2934
+ recent_texts = " ".join(h.get("text", "") for h in history[-3:])
2935
+ recent_lower = recent_texts.lower()
2936
+
2937
+ # Source-analysis keywords: discussing code structure without runtime verification
2938
+ source_analysis_keywords = [
2939
+ "app.py", "import", "function", "class", "structure", "file",
2940
+ "code shows", "the code", "let me check", "according to the code",
2941
+ "looking at the code", "the file", "source", "implementation"
2942
+ ]
2943
+
2944
+ # Runtime command keywords: evidence of actual runtime interaction
2945
+ runtime_keywords = [
2946
+ "[task]", "ls -la", "pwd", "cat /app", "ls /app", "docker",
2947
+ "container", "runtime", "executed", "ran command", "output shows"
2948
+ ]
2949
+
2950
+ has_source_analysis = any(kw in recent_lower for kw in source_analysis_keywords)
2951
+ has_runtime_verification = any(kw in recent_lower for kw in runtime_keywords)
2952
+
2953
+ # Trigger sanity check if: analyzing source BUT no runtime verification
2954
+ if has_source_analysis and not has_runtime_verification:
2955
+ print(f"[SANITY-CHECK] TRIGGERED! Source-analysis loop detected ({_discussion_loop_count} turns). Agents discussing code without runtime verification. Forcing runtime inspection.")
2956
+ _sanity_check_mode = True
2957
+ _sanity_check_trigger_time = time.time()
2958
+ _sanity_check_required = True
2959
+
2960
+ # Reset SANITY CHECK mode after timeout (safety valve)
2961
+ if _sanity_check_mode and time.time() - _sanity_check_trigger_time > SANITY_CHECK_RESET_SECS:
2962
+ print(f"[SANITY-CHECK] Mode timeout ({SANITY_CHECK_RESET_SECS}s), resetting to normal")
2963
+ _sanity_check_mode = False
2964
+ _sanity_check_required = False
2965
+
2966
  # Note: Aggressive CC auto-termination based on push frequency is removed.
2967
  # God monitors push frequency and proposes mechanism fixes when needed.
2968
  # The normal CLAUDE_TIMEOUT auto-kill in do_turn() handles truly stuck processes.