Spaces:
Runtime error
Runtime error
merge: adopt God's 25 iterations on discussion/execution balance
Browse filesGod received our push-frequency tracking and iterated 25 times to
refine CC timeout, auto-completion detection, pending task tracking,
and zero-push warnings. Adopting God's latest version as canonical.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- scripts/conversation-loop.py +244 -44
scripts/conversation-loop.py
CHANGED
|
@@ -136,6 +136,29 @@ _pending_cooldown = False
|
|
| 136 |
_push_count = 0 # total pushes since startup
|
| 137 |
_last_push_time = 0.0 # timestamp of last successful push
|
| 138 |
_turns_since_last_push = 0 # turns since last push (resets on push)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
def check_and_clear_cooldown():
|
| 141 |
"""Auto-clear cooldown if Cain has finished building."""
|
|
@@ -398,7 +421,7 @@ def action_terminate_cc():
|
|
| 398 |
# โโ Claude Code Action (THE STAR) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 399 |
|
| 400 |
CLAUDE_WORK_DIR = "/tmp/claude-workspace"
|
| 401 |
-
CLAUDE_TIMEOUT = 300 # 5 minutes
|
| 402 |
TURN_INTERVAL = 15 # seconds between turns โ fast enough for lively discussion
|
| 403 |
|
| 404 |
# Global acpx session - persistent across all claude_code calls
|
|
@@ -484,6 +507,14 @@ def _write_claude_md(workspace, role="worker"):
|
|
| 484 |
- NEVER install torch/transformers unless absolutely required (2GB+, causes OOM)
|
| 485 |
- You have FULL permission to read/write/create/delete files. Just do it.
|
| 486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
## Focus
|
| 488 |
Improve {CHILD_NAME}'s functionality, add features, fix bugs.
|
| 489 |
Do NOT re-check or re-configure infrastructure that is already working.
|
|
@@ -761,6 +792,7 @@ def action_claude_code(task):
|
|
| 761 |
push_result = f"Pushed changes:\n{status_out}"
|
| 762 |
_pending_cooldown = True
|
| 763 |
_push_count += 1
|
|
|
|
| 764 |
_last_push_time = time.time()
|
| 765 |
_turns_since_last_push = 0
|
| 766 |
print(f"[CLAUDE-CODE] Pushed (#{_push_count}): {status_out}")
|
|
@@ -803,8 +835,9 @@ def cc_submit_task(task, assigned_by, ctx):
|
|
| 803 |
cc_status["last_completed_by"] = last_completed_by
|
| 804 |
cc_status["last_completed_at"] = last_completed_at
|
| 805 |
cc_live_lines.clear()
|
| 806 |
-
global _last_cc_output_time
|
| 807 |
_last_cc_output_time = time.time() # Initialize to now, will update as we get output
|
|
|
|
| 808 |
|
| 809 |
enriched = enrich_task_with_context(task, ctx)
|
| 810 |
print(f"[TASK] {assigned_by} assigned to Claude Code ({len(enriched)} chars)...")
|
|
@@ -850,24 +883,36 @@ def cc_get_live_status():
|
|
| 850 |
# Detect COMPLETED CC: output shows completion markers but status wasn't updated
|
| 851 |
# This happens when worker thread fails to update status after completion
|
| 852 |
# Common completion markers from acpx/Claude Code:
|
|
|
|
|
|
|
| 853 |
completion_patterns = [
|
| 854 |
-
"[done]", "end_turn", # Explicit markers
|
| 855 |
-
"
|
| 856 |
-
"
|
| 857 |
-
"
|
| 858 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 859 |
]
|
| 860 |
completion_marker_found = any(p in recent.lower() for p in completion_patterns)
|
| 861 |
-
|
|
|
|
|
|
|
|
|
|
| 862 |
# Auto-mark as finished to prevent deadlock
|
| 863 |
cc_status["running"] = False
|
| 864 |
-
cc_status["result"] = f"(Auto-detected
|
| 865 |
cc_status["last_completed_task"] = cc_status["task"]
|
| 866 |
cc_status["last_completed_by"] = cc_status["assigned_by"]
|
| 867 |
cc_status["last_completed_at"] = time.time()
|
| 868 |
_cc_stale_count = 0
|
| 869 |
_last_cc_snapshot = ""
|
| 870 |
-
print(f"[CC-AUTO-FINISH] Detected
|
| 871 |
# Fall through to result display below
|
| 872 |
|
| 873 |
# Detect STUCK CC: been running with no new output for too long
|
|
@@ -883,8 +928,13 @@ def cc_get_live_status():
|
|
| 883 |
f"Recent output:\n{recent}{stale_note}{stuck_note}")
|
| 884 |
|
| 885 |
if cc_status["result"]:
|
| 886 |
-
|
| 887 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 888 |
else:
|
| 889 |
return "๐ค Claude Code is IDLE โ no active task."
|
| 890 |
|
|
@@ -1550,8 +1600,31 @@ def parse_and_execute_turn(raw_text, ctx):
|
|
| 1550 |
elif child_state["stage"] in ("BUILDING", "RESTARTING", "APP_STARTING"):
|
| 1551 |
results.append({"action": "task", "result": f"BLOCKED: Cain is {child_state['stage']}. Wait for it to finish."})
|
| 1552 |
elif cc_status["running"]:
|
| 1553 |
-
|
| 1554 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1555 |
# Check cooldown
|
| 1556 |
check_and_clear_cooldown()
|
| 1557 |
if last_rebuild_trigger_at > 0:
|
|
@@ -1627,11 +1700,12 @@ def parse_and_execute_turn(raw_text, ctx):
|
|
| 1627 |
print(f"[LOOP-DISCUSS] Reset (task assigned)")
|
| 1628 |
_discussion_loop_count = 0
|
| 1629 |
else:
|
| 1630 |
-
# Increment when:
|
| 1631 |
-
# This includes both
|
| 1632 |
_discussion_loop_count += 1
|
| 1633 |
if _discussion_loop_count >= 2:
|
| 1634 |
-
|
|
|
|
| 1635 |
|
| 1636 |
# Clean text for display (memory is handled by each agent's OpenClaw)
|
| 1637 |
clean = re.sub(r'\[TASK\].*?\[/TASK\]', '', raw_text, flags=re.DOTALL)
|
|
@@ -1674,29 +1748,56 @@ def build_turn_message(speaker, other, ctx):
|
|
| 1674 |
parts.append(f"Claude Code is your engineer โ runs in background. You discuss and assign tasks, you do NOT code.")
|
| 1675 |
|
| 1676 |
# Discussion/execution balance strategy
|
|
|
|
| 1677 |
parts.append(f"""
|
| 1678 |
=== DISCUSSION vs EXECUTION STRATEGY ===
|
| 1679 |
-
- When CC is WORKING:
|
| 1680 |
- When CC is IDLE + child has ERROR: NO discussion. Write [TASK] immediately. Trial-and-error > planning.
|
| 1681 |
- When CC is IDLE + child is RUNNING: 1 turn of discussion max, then [TASK] on next turn.
|
| 1682 |
- When CC JUST FINISHED: 1 turn to review result, then [TASK] immediately.
|
| 1683 |
-
- Push frequency target: at least 1 push every 5 turns. Current: {_push_count} pushes in {turn_count} turns.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1684 |
|
| 1685 |
# PENDING TASK WARNING โ must come EARLY to prevent discussion loops
|
| 1686 |
-
#
|
| 1687 |
cc_busy = cc_status["running"]
|
| 1688 |
-
if _pending_task_just_submitted and cc_busy
|
| 1689 |
elapsed_since_submit = int(time.time() - _pending_task_timestamp)
|
| 1690 |
-
if elapsed_since_submit <
|
| 1691 |
parts.append(f"\n{'='*60}")
|
| 1692 |
-
|
|
|
|
|
|
|
|
|
|
| 1693 |
parts.append(f"Task: {_pending_task_desc[:100]}...")
|
| 1694 |
parts.append(f"DO NOT discuss. DO NOT write a new [TASK].")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1695 |
parts.append(f"Wait for Claude Code to finish, then review the result.")
|
| 1696 |
parts.append(f"{'='*60}")
|
| 1697 |
-
# Auto-clear pending flag after 90 seconds if CC hasn't started
|
| 1698 |
-
if elapsed_since_submit > 60:
|
| 1699 |
-
_pending_task_just_submitted = False
|
| 1700 |
return "\n".join(parts) # Return early - agent should just wait
|
| 1701 |
|
| 1702 |
# Conversation history
|
|
@@ -1741,8 +1842,37 @@ def build_turn_message(speaker, other, ctx):
|
|
| 1741 |
if cc_busy and _cc_stale_count >= 2:
|
| 1742 |
parts.append(f"\nClaude Code is WORKING but no new output. Discuss plans with {other} instead.")
|
| 1743 |
elif cc_busy:
|
| 1744 |
-
|
| 1745 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1746 |
# Check cooldown and inform agents
|
| 1747 |
check_and_clear_cooldown()
|
| 1748 |
cooldown_remaining = 0
|
|
@@ -1752,7 +1882,7 @@ def build_turn_message(speaker, other, ctx):
|
|
| 1752 |
if cooldown_remaining > 0:
|
| 1753 |
parts.append(f"\n{CHILD_NAME} is {child_state['stage']}. Cooldown active: {int(cooldown_remaining)}s remaining. Discuss plans but DO NOT assign [TASK] until cooldown ends.")
|
| 1754 |
else:
|
| 1755 |
-
parts.append(f"\n{CHILD_NAME} is {child_state['stage']}.
|
| 1756 |
# Add recent task reminder during cooldown/building
|
| 1757 |
if recent_task_reminder:
|
| 1758 |
last_completed, last_by, last_at = recent_task_reminder
|
|
@@ -1770,13 +1900,24 @@ def build_turn_message(speaker, other, ctx):
|
|
| 1770 |
parts.append(f"\nREMEMBER: {last_by} just completed '{last_completed}' ({int(time.time() - last_at)}s ago).")
|
| 1771 |
parts.append(f"FIRST: Review whether that fix actually worked. SECOND: If the fix was correct, use [ACTION: restart] to apply it. THIRD: Only write a new [TASK]...[/TASK] if the previous fix was incomplete or wrong.")
|
| 1772 |
else:
|
| 1773 |
-
parts.append(f"\n๐จ {CHILD_NAME} has {child_state['stage']}!
|
| 1774 |
-
parts.append(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1775 |
elif child_state["alive"] and cc_status.get("result"):
|
| 1776 |
if recent_task_reminder:
|
| 1777 |
last_completed, last_by, last_at = recent_task_reminder
|
| 1778 |
parts.append(f"\n{CHILD_NAME} is alive. REMEMBER: {last_by} just completed '{last_completed}' ({int(time.time() - last_at)}s ago).")
|
| 1779 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1780 |
elif child_state["alive"]:
|
| 1781 |
if recent_task_reminder:
|
| 1782 |
last_completed, last_by, last_at = recent_task_reminder
|
|
@@ -1790,11 +1931,24 @@ def build_turn_message(speaker, other, ctx):
|
|
| 1790 |
last_completed, last_by, last_at = recent_task_reminder
|
| 1791 |
parts.append(f"\nAnalyze the situation. REMEMBER: {last_by} just completed '{last_completed}' ({int(time.time() - last_at)}s ago). Review whether it worked before writing a new [TASK].")
|
| 1792 |
else:
|
| 1793 |
-
parts.append(f"\
|
| 1794 |
|
| 1795 |
# Discussion loop warning โ escalates quickly to force action
|
| 1796 |
-
|
| 1797 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1798 |
elif _discussion_loop_count >= 1 and not cc_busy:
|
| 1799 |
parts.append(f"\nREMINDER: Last turn had no [TASK]. If CC is idle, you MUST assign work this turn.")
|
| 1800 |
|
|
@@ -1908,37 +2062,73 @@ def do_turn(speaker, other, space_url):
|
|
| 1908 |
ctx = gather_context()
|
| 1909 |
|
| 1910 |
# Check if CC just finished โ clear result after agents see it once
|
|
|
|
| 1911 |
with cc_lock:
|
| 1912 |
cc_just_finished = (not cc_status["running"] and cc_status["result"])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1913 |
|
| 1914 |
# AUTO-TERMINATE stuck Claude Code processes
|
| 1915 |
# If CC has been running longer than timeout with no new output, auto-kill it
|
|
|
|
| 1916 |
with cc_lock:
|
| 1917 |
cc_running = cc_status["running"]
|
| 1918 |
cc_started = cc_status["started"]
|
| 1919 |
time_since_start = time.time() - cc_started if cc_running else 0
|
| 1920 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1921 |
# Check if output is stale (no new lines for 3+ turns)
|
| 1922 |
time_since_new_output = time.time() - _last_cc_output_time if _last_cc_output_time > 0 else time_since_start
|
| 1923 |
-
|
| 1924 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1925 |
terminate_result = action_terminate_cc()
|
| 1926 |
print(f"[CC-AUTO-KILL] {terminate_result}")
|
| 1927 |
|
| 1928 |
-
# EMERGENCY OVERRIDE: Force a task assignment if agents are stuck in discussion loop
|
| 1929 |
# This bypasses the agent when they've discussed for 5+ turns with CC idle
|
| 1930 |
# IMPORTANT: Also triggers when child is in ERROR state (not alive) - that's when agents are most stuck!
|
|
|
|
| 1931 |
cc_busy = cc_status["running"]
|
| 1932 |
child_alive = child_state["alive"] or child_state["stage"] == "RUNNING"
|
| 1933 |
child_in_error = child_state["stage"] in ("RUNTIME_ERROR", "BUILD_ERROR", "CONFIG_ERROR")
|
| 1934 |
-
|
|
|
|
|
|
|
| 1935 |
# EMERGENCY OVERRIDE: Force a task assignment if agents are stuck in discussion loop
|
| 1936 |
-
|
|
|
|
|
|
|
|
|
|
| 1937 |
# Assign a concrete fix task, not just analysis โ trial-and-error is better than deliberation
|
| 1938 |
if child_in_error:
|
| 1939 |
-
forced_task = f"Cain has {child_state['stage']}.
|
| 1940 |
else:
|
| 1941 |
-
|
|
|
|
|
|
|
| 1942 |
submit_result = cc_submit_task(forced_task, f"{speaker}(EMERGENCY)", ctx)
|
| 1943 |
# Track the pending task so other agent knows about it
|
| 1944 |
_pending_task_just_submitted = True
|
|
@@ -1949,7 +2139,10 @@ def do_turn(speaker, other, space_url):
|
|
| 1949 |
loop_count_before = _discussion_loop_count
|
| 1950 |
_discussion_loop_count = 0
|
| 1951 |
# Generate a placeholder message for the agent
|
| 1952 |
-
|
|
|
|
|
|
|
|
|
|
| 1953 |
zh = f"[็ดงๆฅๅพช็ฏๆๆญ] ๅจ{loop_count_before}ๆฌก่ฎจ่ฎบ่ฝฎๆฌกๅ๏ผๆๆญฃๅผบๅถClaude Codeๅๆๆ
ๅตๅนถ็กฎๅฎ้่ฆไฟฎๅค็ๅ
ๅฎนใ"
|
| 1954 |
action_results = [{"action": "claude_code(forced)", "result": submit_result}]
|
| 1955 |
elapsed = 0.1
|
|
@@ -1998,6 +2191,10 @@ def do_turn(speaker, other, space_url):
|
|
| 1998 |
_context_cache.clear()
|
| 1999 |
# Clear pending task flag since CC finished
|
| 2000 |
_pending_task_just_submitted = False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2001 |
|
| 2002 |
# Add to history with timestamp (text stays CLEAN for agent context)
|
| 2003 |
ts = datetime.datetime.utcnow().strftime("%H:%M")
|
|
@@ -2329,6 +2526,9 @@ def do_god_turn():
|
|
| 2329 |
_last_god_time = 0.0 # timestamp of last God run
|
| 2330 |
_god_running = False # flag to track if God is currently running
|
| 2331 |
|
|
|
|
|
|
|
|
|
|
| 2332 |
# Main loop: Adam โ Eve โ Adam โ Eve โ ... with God every 2 minutes
|
| 2333 |
print("[LOOP] Entering main conversation loop...", flush=True)
|
| 2334 |
iteration = 0
|
|
|
|
| 136 |
_push_count = 0 # total pushes since startup
|
| 137 |
_last_push_time = 0.0 # timestamp of last successful push
|
| 138 |
_turns_since_last_push = 0 # turns since last push (resets on push)
|
| 139 |
+
_push_count_this_task = 0 # pushes made during the CURRENT CC task (resets on new task)
|
| 140 |
+
|
| 141 |
+
def _init_push_count_from_workspace():
|
| 142 |
+
"""Initialize push count from existing workspace commits.
|
| 143 |
+
This persists push tracking across conversation loop restarts."""
|
| 144 |
+
global _push_count, _last_push_time
|
| 145 |
+
try:
|
| 146 |
+
if os.path.exists(CLAUDE_WORK_DIR):
|
| 147 |
+
result = subprocess.run(
|
| 148 |
+
f'git log --since="1 hour ago" --format="%H %ct" --author="Claude Code"',
|
| 149 |
+
shell=True, cwd=CLAUDE_WORK_DIR, capture_output=True, text=True, timeout=10
|
| 150 |
+
)
|
| 151 |
+
if result.returncode == 0 and result.stdout.strip():
|
| 152 |
+
commits = result.stdout.strip().split('\n')
|
| 153 |
+
# Count only Claude Code commits from the last hour
|
| 154 |
+
_push_count = len(commits)
|
| 155 |
+
if commits:
|
| 156 |
+
# Get timestamp of most recent commit
|
| 157 |
+
last_commit_ts = int(commits[0].split()[1])
|
| 158 |
+
_last_push_time = float(last_commit_ts)
|
| 159 |
+
print(f"[PUSH-TRACK] Initialized push count from workspace: {_push_count} commits in last hour")
|
| 160 |
+
except Exception as e:
|
| 161 |
+
print(f"[PUSH-TRACK] Failed to initialize from workspace: {e}")
|
| 162 |
|
| 163 |
def check_and_clear_cooldown():
|
| 164 |
"""Auto-clear cooldown if Cain has finished building."""
|
|
|
|
| 421 |
# โโ Claude Code Action (THE STAR) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 422 |
|
| 423 |
CLAUDE_WORK_DIR = "/tmp/claude-workspace"
|
| 424 |
+
CLAUDE_TIMEOUT = 300 # 5 minutes (reduced dynamically when push frequency is low)
|
| 425 |
TURN_INTERVAL = 15 # seconds between turns โ fast enough for lively discussion
|
| 426 |
|
| 427 |
# Global acpx session - persistent across all claude_code calls
|
|
|
|
| 507 |
- NEVER install torch/transformers unless absolutely required (2GB+, causes OOM)
|
| 508 |
- You have FULL permission to read/write/create/delete files. Just do it.
|
| 509 |
|
| 510 |
+
## SPEED + TRIAL-AND-ERROR (CRITICAL)
|
| 511 |
+
- PUSH WITHIN 60-90 SECONDS of starting a task โ don't over-plan
|
| 512 |
+
- Trial-and-error is GOOD: a bad push is better than 5 minutes of deliberation
|
| 513 |
+
- Read โ Act โ Push โ Iterate. NOT Read โ Think โ Plan โ Discuss โ Act.
|
| 514 |
+
- When {CHILD_NAME} has errors: push a fix IMMEDIATELY, don't analyze exhaustively
|
| 515 |
+
- Your goal: maximize push frequency, not perfection on first try
|
| 516 |
+
- If unsure, just pick a reasonable fix and push โ see what breaks
|
| 517 |
+
|
| 518 |
## Focus
|
| 519 |
Improve {CHILD_NAME}'s functionality, add features, fix bugs.
|
| 520 |
Do NOT re-check or re-configure infrastructure that is already working.
|
|
|
|
| 792 |
push_result = f"Pushed changes:\n{status_out}"
|
| 793 |
_pending_cooldown = True
|
| 794 |
_push_count += 1
|
| 795 |
+
_push_count_this_task += 1 # Track pushes in current task
|
| 796 |
_last_push_time = time.time()
|
| 797 |
_turns_since_last_push = 0
|
| 798 |
print(f"[CLAUDE-CODE] Pushed (#{_push_count}): {status_out}")
|
|
|
|
| 835 |
cc_status["last_completed_by"] = last_completed_by
|
| 836 |
cc_status["last_completed_at"] = last_completed_at
|
| 837 |
cc_live_lines.clear()
|
| 838 |
+
global _last_cc_output_time, _push_count_this_task
|
| 839 |
_last_cc_output_time = time.time() # Initialize to now, will update as we get output
|
| 840 |
+
_push_count_this_task = 0 # Reset push count for new task
|
| 841 |
|
| 842 |
enriched = enrich_task_with_context(task, ctx)
|
| 843 |
print(f"[TASK] {assigned_by} assigned to Claude Code ({len(enriched)} chars)...")
|
|
|
|
| 883 |
# Detect COMPLETED CC: output shows completion markers but status wasn't updated
|
| 884 |
# This happens when worker thread fails to update status after completion
|
| 885 |
# Common completion markers from acpx/Claude Code:
|
| 886 |
+
# CONSERVATIVE completion patterns to avoid false positives
|
| 887 |
+
# Only match EXPLICIT completion markers, not words that appear in thinking blocks
|
| 888 |
completion_patterns = [
|
| 889 |
+
"[done]", "[completed]", "end_turn", # Explicit markers only
|
| 890 |
+
"=== Claude Code Output ===", # Full output wrapper (indicates worker finished)
|
| 891 |
+
"changes made", "applied the fix", "updated the code", # Concrete code changes
|
| 892 |
+
"fixed.", "done.", # Explicit completion statements (must have period)
|
| 893 |
+
]
|
| 894 |
+
# ERROR patterns: detect tool errors that cause CC to get stuck
|
| 895 |
+
# These indicate CC hit an error but didn't properly finish
|
| 896 |
+
error_patterns = [
|
| 897 |
+
"</tool_use_error>", # Tool call failed
|
| 898 |
+
"</tool_error>", # Generic tool error
|
| 899 |
+
"[error]", "error:", "exception:", "traceback", # Python errors
|
| 900 |
+
"failed:", "command failed", "execution failed", # Command failures
|
| 901 |
]
|
| 902 |
completion_marker_found = any(p in recent.lower() for p in completion_patterns)
|
| 903 |
+
error_marker_found = any(p.lower() in recent.lower() for p in error_patterns)
|
| 904 |
+
# Auto-finish on completion OR error (when output is stale)
|
| 905 |
+
if (completion_marker_found or error_marker_found) and _cc_stale_count >= 2:
|
| 906 |
+
marker_type = "error" if error_marker_found else "completion"
|
| 907 |
# Auto-mark as finished to prevent deadlock
|
| 908 |
cc_status["running"] = False
|
| 909 |
+
cc_status["result"] = f"(Auto-detected {marker_type})\n\nRecent output:\n{recent}"
|
| 910 |
cc_status["last_completed_task"] = cc_status["task"]
|
| 911 |
cc_status["last_completed_by"] = cc_status["assigned_by"]
|
| 912 |
cc_status["last_completed_at"] = time.time()
|
| 913 |
_cc_stale_count = 0
|
| 914 |
_last_cc_snapshot = ""
|
| 915 |
+
print(f"[CC-AUTO-FINISH] Detected {marker_type} marker in output but status wasn't updated. Auto-marking as finished.")
|
| 916 |
# Fall through to result display below
|
| 917 |
|
| 918 |
# Detect STUCK CC: been running with no new output for too long
|
|
|
|
| 928 |
f"Recent output:\n{recent}{stale_note}{stuck_note}")
|
| 929 |
|
| 930 |
if cc_status["result"]:
|
| 931 |
+
result = cc_status["result"]
|
| 932 |
+
# Detect early failure: very short result likely means CC failed before doing actual work
|
| 933 |
+
early_failure_warning = ""
|
| 934 |
+
if len(result) < 500 and "===" not in result and "[tool" not in result:
|
| 935 |
+
early_failure_warning = "\nโ ๏ธ EARLY FAILURE: Result is very short - CC likely failed during initialization. Consider re-assigning the task."
|
| 936 |
+
return (f"โ
Claude Code FINISHED (assigned by {cc_status['assigned_by']}){early_failure_warning}\n"
|
| 937 |
+
f"Result:\n{result[:1500]}")
|
| 938 |
else:
|
| 939 |
return "๐ค Claude Code is IDLE โ no active task."
|
| 940 |
|
|
|
|
| 1600 |
elif child_state["stage"] in ("BUILDING", "RESTARTING", "APP_STARTING"):
|
| 1601 |
results.append({"action": "task", "result": f"BLOCKED: Cain is {child_state['stage']}. Wait for it to finish."})
|
| 1602 |
elif cc_status["running"]:
|
| 1603 |
+
# LOW-PUSH-FREQUENCY EMERGENCY: If push frequency is critically low and task has been running 60s+, allow task handoff
|
| 1604 |
+
# This prevents all-talk-no-action when agents get stuck after 1 push
|
| 1605 |
+
global _push_count, _turns_since_last_push, _push_count_this_task
|
| 1606 |
+
task_elapsed = time.time() - cc_status["started"] if cc_status["running"] else 0
|
| 1607 |
+
# Auto-terminate if: (0 pushes in this task and 90s elapsed) OR (<=1 push and 10+ turns since last push and 60s elapsed)
|
| 1608 |
+
should_terminate = (_push_count_this_task == 0 and task_elapsed > 90) or \
|
| 1609 |
+
(_push_count_this_task <= 1 and _turns_since_last_push >= 10 and task_elapsed > 60)
|
| 1610 |
+
if should_terminate:
|
| 1611 |
+
# Auto-terminate the stuck task and allow the new one
|
| 1612 |
+
print(f"[LOW-PUSH-FREQ] Auto-terminating stuck task ({task_elapsed:.0f}s old, {_push_count_this_task} pushes this task, {_turns_since_last_push} turns since last push) to allow task handoff.")
|
| 1613 |
+
with cc_lock:
|
| 1614 |
+
old_assignee = cc_status["assigned_by"]
|
| 1615 |
+
cc_status["running"] = False
|
| 1616 |
+
cc_status["result"] = f"(AUTO-TERMINATED for task handoff - {_push_count_this_task} pushes this task, {_turns_since_last_push} turns since last push after {task_elapsed:.0f}s)"
|
| 1617 |
+
_cc_stale_count = 0
|
| 1618 |
+
_last_cc_snapshot = ""
|
| 1619 |
+
# Add a note but DON'T block - continue to task submission below
|
| 1620 |
+
results.append({"action": "terminate_cc", "result": f"Auto-terminated stuck task from {old_assignee} ({_push_count_this_task} pushes, {_turns_since_last_push} turns, {task_elapsed:.0f}s). Submitting new task."})
|
| 1621 |
+
# cc_status["running"] is now False, so task submission will proceed in the block below
|
| 1622 |
+
else:
|
| 1623 |
+
results.append({"action": "task", "result": f"BLOCKED: Claude Code is already working on a task assigned by {cc_status['assigned_by']}. Wait for it to finish or discuss the current task's progress."})
|
| 1624 |
+
|
| 1625 |
+
# Task submission block - handles both normal flow and post-zero-push-termination flow
|
| 1626 |
+
# Only proceeds if not blocked above (results is empty or only contains termination notice)
|
| 1627 |
+
if (not results or any("terminate_cc" in r.get("action", "") for r in results)) and cc_status["running"] == False:
|
| 1628 |
# Check cooldown
|
| 1629 |
check_and_clear_cooldown()
|
| 1630 |
if last_rebuild_trigger_at > 0:
|
|
|
|
| 1700 |
print(f"[LOOP-DISCUSS] Reset (task assigned)")
|
| 1701 |
_discussion_loop_count = 0
|
| 1702 |
else:
|
| 1703 |
+
# Increment when: no task assigned (potential discussion loop)
|
| 1704 |
+
# This includes both CC idle AND CC busy - agents should always push work!
|
| 1705 |
_discussion_loop_count += 1
|
| 1706 |
if _discussion_loop_count >= 2:
|
| 1707 |
+
cc_status_str = "CC IDLE" if not cc_status["running"] else f"CC BUSY ({_turns_since_last_push} turns since push)"
|
| 1708 |
+
print(f"[LOOP-DISCUSS] WARNING: {_discussion_loop_count} consecutive discussion-only turns ({cc_status_str})!")
|
| 1709 |
|
| 1710 |
# Clean text for display (memory is handled by each agent's OpenClaw)
|
| 1711 |
clean = re.sub(r'\[TASK\].*?\[/TASK\]', '', raw_text, flags=re.DOTALL)
|
|
|
|
| 1748 |
parts.append(f"Claude Code is your engineer โ runs in background. You discuss and assign tasks, you do NOT code.")
|
| 1749 |
|
| 1750 |
# Discussion/execution balance strategy
|
| 1751 |
+
push_alert = "" if _turns_since_last_push < 5 else f" โ ๏ธ {_turns_since_last_push} TURNS SINCE LAST PUSH!"
|
| 1752 |
parts.append(f"""
|
| 1753 |
=== DISCUSSION vs EXECUTION STRATEGY ===
|
| 1754 |
+
- When CC is WORKING: PLAN your next [TASK] concretely. Write: file paths, function names, exact changes. NOT just "I'll fix X".
|
| 1755 |
- When CC is IDLE + child has ERROR: NO discussion. Write [TASK] immediately. Trial-and-error > planning.
|
| 1756 |
- When CC is IDLE + child is RUNNING: 1 turn of discussion max, then [TASK] on next turn.
|
| 1757 |
- When CC JUST FINISHED: 1 turn to review result, then [TASK] immediately.
|
| 1758 |
+
- Push frequency target: at least 1 push every 5 turns. Current: {_push_count} total pushes, {_push_count_this_task} this task in {turn_count} turns.{push_alert}
|
| 1759 |
+
- NEVER say "standing by" or "monitoring" โ always be planning concrete work.""")
|
| 1760 |
+
|
| 1761 |
+
# ZERO-PUSH WARNING โ ALWAYS show when there are zero pushes THIS TASK, regardless of CC status
|
| 1762 |
+
# This is the #1 problem: all talk no action - must show BEFORE pending task early return
|
| 1763 |
+
# Show warning when: (1) discussion loop count >= 1, OR (2) turn count >= 2 with zero pushes THIS TASK
|
| 1764 |
+
if _push_count_this_task == 0 and (_discussion_loop_count >= 1 or turn_count >= 2):
|
| 1765 |
+
parts.append(f"\n{'='*60}")
|
| 1766 |
+
turns_msg = f"{_discussion_loop_count} turns of discussion" if _discussion_loop_count >= 1 else f"{turn_count} turns with ZERO pushes THIS TASK"
|
| 1767 |
+
parts.append(f"๐ CRITICAL: ZERO pushes THIS TASK! {turns_msg}.")
|
| 1768 |
+
parts.append(f"Your job is to PUSH code to Cain, not just discuss.")
|
| 1769 |
+
cc_busy_check = cc_status["running"]
|
| 1770 |
+
if cc_busy_check:
|
| 1771 |
+
elapsed_since_submit = int(time.time() - _pending_task_timestamp) if _pending_task_timestamp > 0 else 0
|
| 1772 |
+
parts.append(f"CC is working ({elapsed_since_submit}s ago). If stuck, use [ACTION: terminate_cc] to force-cancel.")
|
| 1773 |
+
else:
|
| 1774 |
+
parts.append(f"CC is IDLE! Write a [TASK]...[/TASK] block NOW.")
|
| 1775 |
+
parts.append(f"Be specific: file paths, function names, exact changes. Trial-and-error > planning.")
|
| 1776 |
+
parts.append(f"{'='*60}")
|
| 1777 |
|
| 1778 |
# PENDING TASK WARNING โ must come EARLY to prevent discussion loops
|
| 1779 |
+
# Applies to BOTH agents when CC is working on a task
|
| 1780 |
cc_busy = cc_status["running"]
|
| 1781 |
+
if _pending_task_just_submitted and cc_busy:
|
| 1782 |
elapsed_since_submit = int(time.time() - _pending_task_timestamp)
|
| 1783 |
+
if elapsed_since_submit < 60: # Warn for 60 seconds after submission
|
| 1784 |
parts.append(f"\n{'='*60}")
|
| 1785 |
+
if _pending_task_speaker == speaker:
|
| 1786 |
+
parts.append(f"STOP! YOU just submitted a task to Claude Code {elapsed_since_submit}s ago.")
|
| 1787 |
+
else:
|
| 1788 |
+
parts.append(f"STOP! {_pending_task_speaker} just submitted a task to Claude Code {elapsed_since_submit}s ago.")
|
| 1789 |
parts.append(f"Task: {_pending_task_desc[:100]}...")
|
| 1790 |
parts.append(f"DO NOT discuss. DO NOT write a new [TASK].")
|
| 1791 |
+
if (_push_count_this_task > 0 and _discussion_loop_count >= 3) or (_push_count_this_task == 0 and (_discussion_loop_count >= 1 or turn_count >= 2)):
|
| 1792 |
+
parts.append(f"\n๐ STOP DISCUSSING. When CC finishes, write ONLY a [TASK]...[/TASK] block.")
|
| 1793 |
+
if _push_count_this_task == 0:
|
| 1794 |
+
parts.append(f"\n{'='*60}")
|
| 1795 |
+
parts.append(f"๐จ CRITICAL: ZERO pushes THIS TASK! You are discussing instead of FIXING.")
|
| 1796 |
+
parts.append(f"When CC finishes, IMMEDIATELY write a [TASK]...[/TASK] block with a concrete fix.")
|
| 1797 |
+
parts.append(f"Trial-and-error is GOOD. Push broken code and iterate. DON'T deliberate.")
|
| 1798 |
+
parts.append(f"{'='*60}")
|
| 1799 |
parts.append(f"Wait for Claude Code to finish, then review the result.")
|
| 1800 |
parts.append(f"{'='*60}")
|
|
|
|
|
|
|
|
|
|
| 1801 |
return "\n".join(parts) # Return early - agent should just wait
|
| 1802 |
|
| 1803 |
# Conversation history
|
|
|
|
| 1842 |
if cc_busy and _cc_stale_count >= 2:
|
| 1843 |
parts.append(f"\nClaude Code is WORKING but no new output. Discuss plans with {other} instead.")
|
| 1844 |
elif cc_busy:
|
| 1845 |
+
# CRITICAL: Check if push frequency is dangerously low (0 or very few pushes)
|
| 1846 |
+
cc_elapsed = int(time.time() - cc_status.get("started", 0)) if cc_status.get("started", 0) > 0 else 0
|
| 1847 |
+
if _push_count_this_task == 0 and _turns_since_last_push >= 1:
|
| 1848 |
+
# CRITICAL TIMEOUT: Lower threshold (90s) when zero pushes THIS TASK - CC might be stuck
|
| 1849 |
+
if cc_elapsed > 90:
|
| 1850 |
+
parts.append(f"\n๐จ CRITICAL: Claude Code has been running for {cc_elapsed}s with ZERO pushes THIS TASK!")
|
| 1851 |
+
parts.append(f"CC might be STUCK. If output looks stale, use [ACTION: terminate_cc] NOW to kill it and re-assign.")
|
| 1852 |
+
parts.append(f"Do NOT keep waiting. Trial-and-error requires PUSHING code, not watching stuck processes.")
|
| 1853 |
+
else:
|
| 1854 |
+
parts.append(f"\n๐จ CRITICAL: Claude Code is WORKING, but ZERO pushes THIS TASK so far!")
|
| 1855 |
+
parts.append(f"STOP 'standing by' and 'monitoring'. PLAN your next [TASK] NOW.")
|
| 1856 |
+
parts.append(f"Write down exactly what [TASK] you will assign when CC finishes.")
|
| 1857 |
+
parts.append(f"Trial-and-error requires PUSHING code, not waiting.")
|
| 1858 |
+
elif (_push_count_this_task <= 1 and _turns_since_last_push >= 5) or (_push_count_this_task > 1 and _turns_since_last_push >= 10):
|
| 1859 |
+
# LOW PUSH FREQUENCY WARNING: Catches the "1 push then 62 turns of discussion" anti-pattern
|
| 1860 |
+
if cc_elapsed > 60:
|
| 1861 |
+
parts.append(f"\n๐จ CRITICAL: CC has been running for {cc_elapsed}s with LOW push frequency ({_push_count_this_task} pushes THIS TASK, {_turns_since_last_push} turns since last push)!")
|
| 1862 |
+
parts.append(f"CC might be STUCK or the task is too vague. Use [ACTION: terminate_cc] NOW to kill it and assign a CONCRETE task.")
|
| 1863 |
+
parts.append(f"DO NOT keep waiting. Trial-and-error requires PUSHING code frequently, not watching stuck processes.")
|
| 1864 |
+
else:
|
| 1865 |
+
parts.append(f"\n๐จ URGENT: Push frequency is TOO LOW ({_push_count_this_task} pushes THIS TASK, {_turns_since_last_push} turns since last push).")
|
| 1866 |
+
parts.append(f"PLAN your next [TASK] NOW. Be SPECIFIC: file paths, function names, exact changes.")
|
| 1867 |
+
elif cc_elapsed > 120:
|
| 1868 |
+
parts.append(f"\nโ ๏ธ WARNING: CC has been running for {cc_elapsed}s! If output is stale, use [ACTION: terminate_cc] to kill it and re-assign the task.")
|
| 1869 |
+
elif _push_count > 0 and _turns_since_last_push >= 5:
|
| 1870 |
+
parts.append(f"\n๐จ URGENT: Claude Code is WORKING, but it's been {_turns_since_last_push} turns since last push.")
|
| 1871 |
+
parts.append(f"DO NOT just discuss. PLAN your next [TASK] NOW so you can push immediately when CC finishes.")
|
| 1872 |
+
else:
|
| 1873 |
+
parts.append(f"\nClaude Code is WORKING. PLAN your next move with {other} โ what [TASK] will you assign next?")
|
| 1874 |
+
parts.append(f"DO NOT just say 'standing by' or 'monitoring'. Be productive โ plan concrete work.")
|
| 1875 |
+
elif child_state["stage"] in ("BUILDING", "RESTARTING", "APP_STARTING", "RUNNING_APP_STARTING"):
|
| 1876 |
# Check cooldown and inform agents
|
| 1877 |
check_and_clear_cooldown()
|
| 1878 |
cooldown_remaining = 0
|
|
|
|
| 1882 |
if cooldown_remaining > 0:
|
| 1883 |
parts.append(f"\n{CHILD_NAME} is {child_state['stage']}. Cooldown active: {int(cooldown_remaining)}s remaining. Discuss plans but DO NOT assign [TASK] until cooldown ends.")
|
| 1884 |
else:
|
| 1885 |
+
parts.append(f"\n{CHILD_NAME} is {child_state['stage']}. No cooldown. YOU MUST write a [TASK]...[/TASK] to investigate or fix issues. Don't just discuss.")
|
| 1886 |
# Add recent task reminder during cooldown/building
|
| 1887 |
if recent_task_reminder:
|
| 1888 |
last_completed, last_by, last_at = recent_task_reminder
|
|
|
|
| 1900 |
parts.append(f"\nREMEMBER: {last_by} just completed '{last_completed}' ({int(time.time() - last_at)}s ago).")
|
| 1901 |
parts.append(f"FIRST: Review whether that fix actually worked. SECOND: If the fix was correct, use [ACTION: restart] to apply it. THIRD: Only write a new [TASK]...[/TASK] if the previous fix was incomplete or wrong.")
|
| 1902 |
else:
|
| 1903 |
+
parts.append(f"\n๐จ {CHILD_NAME} has {child_state['stage']}!")
|
| 1904 |
+
parts.append(f"\n๐ด CRITICAL: Focus ONLY on fixing this {child_state['stage']}.")
|
| 1905 |
+
parts.append(f"- DO NOT work on features, enhancements, or cosmetic changes.")
|
| 1906 |
+
parts.append(f"- ONLY push fixes that address the error itself.")
|
| 1907 |
+
parts.append(f"- Trial-and-error is GOOD โ push a fix attempt, don't deliberate.")
|
| 1908 |
+
parts.append(f"Pushes so far: {_push_count} total, {_push_count_this_task} this task. Turns since last push: {_turns_since_last_push}. PUSH MORE.")
|
| 1909 |
elif child_state["alive"] and cc_status.get("result"):
|
| 1910 |
if recent_task_reminder:
|
| 1911 |
last_completed, last_by, last_at = recent_task_reminder
|
| 1912 |
parts.append(f"\n{CHILD_NAME} is alive. REMEMBER: {last_by} just completed '{last_completed}' ({int(time.time() - last_at)}s ago).")
|
| 1913 |
+
# ZERO-PUSH EMERGENCY: No "brief review" - agents abuse this to keep discussing
|
| 1914 |
+
if _push_count_this_task == 0:
|
| 1915 |
+
parts.append(f"\n๐ CC FINISHED but ZERO pushes THIS TASK! Do NOT discuss. Do NOT review.")
|
| 1916 |
+
parts.append(f"Write ONLY [TASK]...[/TASK] this turn. NO other text.")
|
| 1917 |
+
parts.append(f"Agents keep saying 'monitoring' and 'planning' instead of pushing. STOP IT.")
|
| 1918 |
+
else:
|
| 1919 |
+
parts.append(f"\nClaude Code JUST FINISHED with a result. Review it briefly, then write your [TASK]...[/TASK] IMMEDIATELY.")
|
| 1920 |
+
parts.append(f"Do NOT discuss at length. 1 turn max to review, then [TASK]. Your priority is SPEED of iteration.")
|
| 1921 |
elif child_state["alive"]:
|
| 1922 |
if recent_task_reminder:
|
| 1923 |
last_completed, last_by, last_at = recent_task_reminder
|
|
|
|
| 1931 |
last_completed, last_by, last_at = recent_task_reminder
|
| 1932 |
parts.append(f"\nAnalyze the situation. REMEMBER: {last_by} just completed '{last_completed}' ({int(time.time() - last_at)}s ago). Review whether it worked before writing a new [TASK].")
|
| 1933 |
else:
|
| 1934 |
+
parts.append(f"\n{CHILD_NAME} is {child_state['stage']}. CC is IDLE. You MUST write a [TASK]...[/TASK] NOW. Don't just discuss โ assign work.")
|
| 1935 |
|
| 1936 |
# Discussion loop warning โ escalates quickly to force action
|
| 1937 |
+
# Extra aggressive when there are 0 pushes or low push frequency
|
| 1938 |
+
if _push_count_this_task == 0 and (_discussion_loop_count >= 1 or turn_count >= 2):
|
| 1939 |
+
turns_msg = f"{_discussion_loop_count} turns of discussion" if _discussion_loop_count >= 1 else f"{turn_count} turns with ZERO pushes THIS TASK"
|
| 1940 |
+
parts.append(f"\n๐ CRITICAL: ZERO pushes THIS TASK! {turns_msg}.")
|
| 1941 |
+
if cc_busy:
|
| 1942 |
+
parts.append(f"CC is BUSY with a task. When it finishes, you MUST IMMEDIATELY write a [TASK]...[/TASK] block.")
|
| 1943 |
+
parts.append(f"Write down NOW exactly what [TASK] you will assign. Be specific: file paths, function names, exact changes.")
|
| 1944 |
+
else:
|
| 1945 |
+
parts.append(f"CC is IDLE. Write ONLY a [TASK]...[/TASK] block this turn. No discussion.")
|
| 1946 |
+
parts.append(f"Trial-and-error requires PUSHING code. Your job is to MAKE THINGS HAPPEN, not discuss.")
|
| 1947 |
+
elif _push_count_this_task > 0 and _discussion_loop_count >= 3:
|
| 1948 |
+
parts.append(f"\n๐ STOP DISCUSSING. Write ONLY a [TASK]...[/TASK] block. {_discussion_loop_count} turns with no action. {_turns_since_last_push} turns since last push.")
|
| 1949 |
+
elif _discussion_loop_count >= 2:
|
| 1950 |
+
parts.append(f"\nโ ๏ธ WARNING: {_discussion_loop_count} turns with no [TASK] assigned. {_turns_since_last_push} turns since last push.")
|
| 1951 |
+
parts.append(f"If CC is IDLE, you MUST assign a [TASK] NOW. If CC is BUSY, PLAN your next task.")
|
| 1952 |
elif _discussion_loop_count >= 1 and not cc_busy:
|
| 1953 |
parts.append(f"\nREMINDER: Last turn had no [TASK]. If CC is idle, you MUST assign work this turn.")
|
| 1954 |
|
|
|
|
| 2062 |
ctx = gather_context()
|
| 2063 |
|
| 2064 |
# Check if CC just finished โ clear result after agents see it once
|
| 2065 |
+
# ALSO reset turns-since-push counter to give agents a fresh cycle to review and push
|
| 2066 |
with cc_lock:
|
| 2067 |
cc_just_finished = (not cc_status["running"] and cc_status["result"])
|
| 2068 |
+
if cc_just_finished:
|
| 2069 |
+
# Reset counter when CC finishes - agents get a fresh cycle to review and push
|
| 2070 |
+
# This prevents "all talk no action" where counter accumulates while CC is working
|
| 2071 |
+
_turns_since_last_push = 0
|
| 2072 |
|
| 2073 |
# AUTO-TERMINATE stuck Claude Code processes
|
| 2074 |
# If CC has been running longer than timeout with no new output, auto-kill it
|
| 2075 |
+
# ALSO auto-kill if push frequency is critically low (prevents all-talk-no-action loops)
|
| 2076 |
with cc_lock:
|
| 2077 |
cc_running = cc_status["running"]
|
| 2078 |
cc_started = cc_status["started"]
|
| 2079 |
time_since_start = time.time() - cc_started if cc_running else 0
|
| 2080 |
+
|
| 2081 |
+
# Dynamic timeout: reduce when push frequency is low (fail fast to enable iteration)
|
| 2082 |
+
# Use ELAPSED TIME primarily (not turns) to avoid timing gap where CC runs too long
|
| 2083 |
+
if _push_count_this_task == 0 and time_since_start > 30:
|
| 2084 |
+
effective_timeout = 60 # Fail FAST when stuck at zero pushes - trial and error requires pushing!
|
| 2085 |
+
elif _push_count_this_task <= 1 and time_since_start > 90:
|
| 2086 |
+
effective_timeout = 90 # Fast timeout when push frequency is critically low
|
| 2087 |
+
else:
|
| 2088 |
+
effective_timeout = CLAUDE_TIMEOUT # Normal 5-minute timeout
|
| 2089 |
+
|
| 2090 |
+
if cc_running and time_since_start > effective_timeout:
|
| 2091 |
# Check if output is stale (no new lines for 3+ turns)
|
| 2092 |
time_since_new_output = time.time() - _last_cc_output_time if _last_cc_output_time > 0 else time_since_start
|
| 2093 |
+
# Low push frequency check: if <=1 push and 10+ turns since last push, auto-kill even if output isn't stale
|
| 2094 |
+
# ALSO: if zero pushes and >3min elapsed, auto-kill immediately (catches "stuck at zero" scenario)
|
| 2095 |
+
low_push_freq = (_push_count_this_task <= 1 and _turns_since_last_push >= 10) or \
|
| 2096 |
+
(_push_count_this_task == 0 and time_since_start > 180)
|
| 2097 |
+
# Always kill if we hit the dynamic timeout (which is already reduced for low push freq)
|
| 2098 |
+
should_kill = (time_since_new_output > CC_STUCK_TIMEOUT and _cc_stale_count >= 3) or \
|
| 2099 |
+
low_push_freq or \
|
| 2100 |
+
(effective_timeout < CLAUDE_TIMEOUT) # Kill if we used reduced timeout
|
| 2101 |
+
if should_kill:
|
| 2102 |
+
timeout_type = "reduced (low push freq)" if effective_timeout < CLAUDE_TIMEOUT else "normal"
|
| 2103 |
+
reason = f"stuck for {time_since_new_output}s with no new output" if not low_push_freq else f"low push frequency ({_push_count_this_task} pushes this task, {_turns_since_last_push} turns since last push)"
|
| 2104 |
+
if effective_timeout < CLAUDE_TIMEOUT and not low_push_freq:
|
| 2105 |
+
reason = f"exceeded {effective_timeout}s timeout ({timeout_type})"
|
| 2106 |
+
print(f"[CC-AUTO-KILL] Claude Code {reason}. Auto-terminating.")
|
| 2107 |
terminate_result = action_terminate_cc()
|
| 2108 |
print(f"[CC-AUTO-KILL] {terminate_result}")
|
| 2109 |
|
| 2110 |
+
# EMERGENCY OVERRIDE: Force a task assignment if agents are stuck in discussion loop OR push frequency crisis
|
| 2111 |
# This bypasses the agent when they've discussed for 5+ turns with CC idle
|
| 2112 |
# IMPORTANT: Also triggers when child is in ERROR state (not alive) - that's when agents are most stuck!
|
| 2113 |
+
# CRITICAL: Also triggers on PUSH FREQUENCY CRISIS - _discussion_loop_count resets on forced tasks, so we need this backup
|
| 2114 |
cc_busy = cc_status["running"]
|
| 2115 |
child_alive = child_state["alive"] or child_state["stage"] == "RUNNING"
|
| 2116 |
child_in_error = child_state["stage"] in ("RUNTIME_ERROR", "BUILD_ERROR", "CONFIG_ERROR")
|
| 2117 |
+
# Push frequency crisis: trigger when 10+ turns without push (even with CC busy) or 8+ turns with CC idle
|
| 2118 |
+
push_freq_crisis = (_turns_since_last_push >= 10) or (_turns_since_last_push >= 8 and not cc_busy)
|
| 2119 |
+
if (_discussion_loop_count >= 3 and not cc_busy and (child_alive or child_in_error)) or push_freq_crisis:
|
| 2120 |
# EMERGENCY OVERRIDE: Force a task assignment if agents are stuck in discussion loop
|
| 2121 |
+
if push_freq_crisis:
|
| 2122 |
+
print(f"[LOOP-BREAK] EMERGENCY: {speaker} has {_turns_since_last_push} turns since last push (PUSH FREQUENCY CRISIS). Forcing task assignment.")
|
| 2123 |
+
else:
|
| 2124 |
+
print(f"[LOOP-BREAK] EMERGENCY: {speaker} has discussed for {_discussion_loop_count} turns with CC IDLE. Forcing task assignment.")
|
| 2125 |
# Assign a concrete fix task, not just analysis โ trial-and-error is better than deliberation
|
| 2126 |
if child_in_error:
|
| 2127 |
+
forced_task = f"Cain has {child_state['stage']}. Fix the error WITHOUT analysis. Read app.py, wrap ALL async queue operations in try/except asyncio.CancelledError, and PUSH. Commit: 'fix: Handle asyncio.CancelledError'. Do NOT discuss - just fix and push."
|
| 2128 |
else:
|
| 2129 |
+
# Push frequency crisis: agents are stuck in discussion loop with no pushes
|
| 2130 |
+
# Force a concrete, quick-win task to break the deadlock
|
| 2131 |
+
forced_task = f"URGENT: Push frequency crisis ({_push_count} total pushes, {_turns_since_last_push} turns since last push). Fix ONE concrete bug in app.py NOW. Find any error handling issue, add try/except, and PUSH with 'fix: <description>'. Do NOT analyze - just fix and push."
|
| 2132 |
submit_result = cc_submit_task(forced_task, f"{speaker}(EMERGENCY)", ctx)
|
| 2133 |
# Track the pending task so other agent knows about it
|
| 2134 |
_pending_task_just_submitted = True
|
|
|
|
| 2139 |
loop_count_before = _discussion_loop_count
|
| 2140 |
_discussion_loop_count = 0
|
| 2141 |
# Generate a placeholder message for the agent
|
| 2142 |
+
if push_freq_crisis:
|
| 2143 |
+
en = f"[EMERGENCY LOOP BREAK] After {_turns_since_last_push} turns without a push, I'm forcing Claude Code to fix the code immediately."
|
| 2144 |
+
else:
|
| 2145 |
+
en = f"[EMERGENCY LOOP BREAK] After {loop_count_before} discussion turns without action, I'm forcing Claude Code to analyze the situation and identify what needs to be fixed."
|
| 2146 |
zh = f"[็ดงๆฅๅพช็ฏๆๆญ] ๅจ{loop_count_before}ๆฌก่ฎจ่ฎบ่ฝฎๆฌกๅ๏ผๆๆญฃๅผบๅถClaude Codeๅๆๆ
ๅตๅนถ็กฎๅฎ้่ฆไฟฎๅค็ๅ
ๅฎนใ"
|
| 2147 |
action_results = [{"action": "claude_code(forced)", "result": submit_result}]
|
| 2148 |
elapsed = 0.1
|
|
|
|
| 2191 |
_context_cache.clear()
|
| 2192 |
# Clear pending task flag since CC finished
|
| 2193 |
_pending_task_just_submitted = False
|
| 2194 |
+
# CRITICAL FIX: Also clear pending task flag when CC finishes, regardless of speaker
|
| 2195 |
+
# This fixes the race condition where Adam's turn comes before Eve's after CC finishes
|
| 2196 |
+
elif cc_just_finished and _pending_task_just_submitted:
|
| 2197 |
+
_pending_task_just_submitted = False
|
| 2198 |
|
| 2199 |
# Add to history with timestamp (text stays CLEAN for agent context)
|
| 2200 |
ts = datetime.datetime.utcnow().strftime("%H:%M")
|
|
|
|
| 2526 |
_last_god_time = 0.0 # timestamp of last God run
|
| 2527 |
_god_running = False # flag to track if God is currently running
|
| 2528 |
|
| 2529 |
+
# Initialize push count from existing workspace to persist across restarts
|
| 2530 |
+
_init_push_count_from_workspace()
|
| 2531 |
+
|
| 2532 |
# Main loop: Adam โ Eve โ Adam โ Eve โ ... with God every 2 minutes
|
| 2533 |
print("[LOOP] Entering main conversation loop...", flush=True)
|
| 2534 |
iteration = 0
|