Spaces:

tao-shen
/

HuggingClaw-Adam

Running

App Files Files Community

HuggingClaw-Adam / scripts /conversation-loop.py

tao-shen

feat: run conversation-loop on Home Space cloud + CONFIG_ERROR fixes

acab3db 4 days ago

raw

history blame contribute delete

84 kB

	#!/usr/bin/env python3 -u
	"""
	Adam & Eve — Autonomous Agents with FULL control over their child.

	They have complete access to their child (Cain) on HuggingFace:
	- Read/write ANY file in the Space repo (code, Dockerfile, scripts...)
	- Read/write ANY file in the Dataset (memory, config, data...)
	- Set environment variables and secrets
	- Restart the Space
	- Check health and logs
	- Send messages to the child

	The LLM decides what to do. Actions use [ACTION: ...] tags.

	# ╔══════════════════════════════════════════════════════════════════════╗
	# ║ SYSTEM ARCHITECTURE ║
	# ╠══════════════════════════════════════════════════════════════════════╣
	# ║ ║
	# ║ ┌─────────────┐ LLM API ┌────────────────┐ ║
	# ║ │ Zhipu GLM │ ◄────────────► │ CONVERSATION │ ║
	# ║ │ (glm-4.5) │ system + │ ENGINE │ ║
	# ║ └─────────────┘ user prompt │ │ ║
	# ║ │ ┌────────────┐│ ║
	# ║ │ │ State ││ ║
	# ║ │ │ Machine ││ ║
	# ║ ┌─────────────┐ │ │ BIRTH → ││ ║
	# ║ │ ACTION │ ◄───parsed───── │ │ DIAGNOSE → ││ ║
	# ║ │ PARSER │ [ACTION/操作] │ │ ACT → ││ ║
	# ║ │ + 🔧🛠️ emoji │ case-insens. │ │ VERIFY → ││ ║
	# ║ └──────┬──────┘ │ │ MONITOR ││ ║
	# ║ │ │ └────────────┘│ ║
	# ║ ▼ │ ┌────────────┐│ ║
	# ║ ┌─────────────┐ │ │ Knowledge ││ ║
	# ║ │ HF ACTIONS │ │ │ Base ││ ║
	# ║ │ create_child│ │ │ files_read ││ ║
	# ║ │ check_health│ │ │ files_write││ ║
	# ║ │ read/write │ │ │ errors_seen││ ║
	# ║ │ set_env/sec │ │ └────────────┘│ ║
	# ║ │ restart │ └────────────────┘ ║
	# ║ │ send_bubble │ │ ║
	# ║ └──────┬──────┘ │ ║
	# ║ │ ▼ ║
	# ║ ▼ ┌────────────────┐ ║
	# ║ ┌─────────────┐ │ CHATLOG + │ ║
	# ║ │ HuggingFace │ │ BUBBLE │ ║
	# ║ │ Cain Space │ │ → Home Space │ ║
	# ║ │ Cain Dataset│ │ → Adam/Eve │ ║
	# ║ └─────────────┘ └────────────────┘ ║
	# ║ ║
	# ║ CAPABILITIES: ║
	# ║ - Multi-action: up to 5 actions per turn (was 1) ║
	# ║ - Sub-agent delegation: [ACTION: delegate:TASK] ║
	# ║ - Parallel sub-tasks via ThreadPoolExecutor ║
	# ║ ║
	# ║ SAFETY LAYERS: ║
	# ║ 1. Building-state guard: block write/restart during BUILDING ║
	# ║ 2. Rebuild cooldown: 6-min dynamic cooldown after Space write ║
	# ║ 3. ACT-phase guard: block reads when should be writing ║
	# ║ 4. Knowledge dedup: block re-reading already-read files ║
	# ║ 5. Config sanitizer: strip invalid openclaw.json keys ║
	# ║ 6. Forced transitions: prevent infinite DIAGNOSE/VERIFY loops ║
	# ║ 7. Shell-expression guard: block $(cmd) in set_env values ║
	# ║ 8. Write dedup: block duplicate writes to same file per cycle ║
	# ║ 9. Delegate depth limit: sub-agents cannot delegate further ║
	# ║ ║
	# ╚══════════════════════════════════════════════════════════════════════╝
	"""
	import json, time, re, requests, sys, os, io, subprocess
	from concurrent.futures import ThreadPoolExecutor, as_completed

	# Force unbuffered output
	sys.stdout.reconfigure(line_buffering=True)
	sys.stderr.reconfigure(line_buffering=True)

	# ── Endpoints ──────────────────────────────────────────────────────────────────
	HOME = "https://tao-shen-huggingclaw-home.hf.space"
	ADAM_SPACE = "https://tao-shen-huggingclaw-adam.hf.space"
	EVE_SPACE = "https://tao-shen-huggingclaw-eve.hf.space"

	# ── Child config ───────────────────────────────────────────────────────────────
	CHILD_NAME = "Cain"
	CHILD_SPACE_ID = "tao-shen/HuggingClaw-Cain"
	CHILD_SPACE_URL = "https://tao-shen-huggingclaw-cain.hf.space"
	CHILD_DATASET_ID = "tao-shen/HuggingClaw-Cain-data"
	SOURCE_SPACE_ID = "tao-shen/HuggingClaw-Adam"

	# ── Zhipu API ──────────────────────────────────────────────────────────────────
	ZHIPU_BASE = "https://open.bigmodel.cn/api/anthropic"
	ZHIPU_KEY = os.environ.get("ZHIPU_API_KEY", "")

	# ── Load tokens ────────────────────────────────────────────────────────────────
	HF_TOKEN = os.environ.get("HF_TOKEN", "")
	if not HF_TOKEN:
	try:
	HF_TOKEN = open(os.path.expanduser("~/.cache/huggingface/token")).read().strip()
	except:
	pass

	if not ZHIPU_KEY:
	try:
	from huggingface_hub import hf_hub_download
	f = hf_hub_download("tao-shen/HuggingClaw-Adam-data", ".openclaw/openclaw.json",
	repo_type="dataset", token=HF_TOKEN)
	with open(f) as fh:
	cfg = json.load(fh)
	ZHIPU_KEY = cfg.get("models", {}).get("providers", {}).get("zhipu", {}).get("apiKey", "")
	except Exception as e:
	print(f"[error] Could not load Zhipu key: {e}", file=sys.stderr)

	if not ZHIPU_KEY:
	print("[FATAL] No ZHIPU_API_KEY found.", file=sys.stderr)
	sys.exit(1)
	if not HF_TOKEN:
	print("[FATAL] No HF_TOKEN found.", file=sys.stderr)
	sys.exit(1)

	print(f"[init] Zhipu key: {ZHIPU_KEY[:8]}...{ZHIPU_KEY[-4:]}")
	print(f"[init] HF token: {HF_TOKEN[:8]}...{HF_TOKEN[-4:]}")

	# ── HuggingFace API ────────────────────────────────────────────────────────────
	from huggingface_hub import HfApi, create_repo, hf_hub_download
	hf_api = HfApi(token=HF_TOKEN)


	# ══════════════════════════════════════════════════════════════════════════════
	# MODULE 1: CHILD STATE
	# Tracks Cain's current lifecycle: created? alive? stage? state?
	# Updated by action_check_health(), action_restart(), etc.
	# Used by state machine to decide transitions and by action parser for guards.
	# ══════════════════════════════════════════════════════════════════════════════

	child_state = {
	"created": False,
	"alive": False,
	"stage": "not_born",
	"state": "unknown",
	"detail": "",
	}

	# Multi-action & sub-agent limits
	MAX_ACTIONS_PER_TURN = 5 # Allow up to 5 actions per turn (was 1)
	MAX_DELEGATE_DEPTH = 1 # Sub-agents cannot delegate further

	# Rebuild cooldown — prevent rapid write_file to Space that keeps resetting builds
	REBUILD_COOLDOWN_SECS = 360 # 6 minutes (builds typically finish in 3-5 min)
	last_rebuild_trigger_at = 0 # timestamp of last write_file to space
	_pending_cooldown = False # defer cooldown activation until end of turn
	files_written_this_cycle = set() # track files written since last RUNNING state

	def check_and_clear_cooldown():
	"""Auto-clear cooldown if Cain has finished building (dynamic cooldown)."""
	global last_rebuild_trigger_at
	if last_rebuild_trigger_at == 0:
	return
	elapsed = time.time() - last_rebuild_trigger_at
	if elapsed < 60: # always wait at least 60s
	return
	try:
	info = hf_api.space_info(CHILD_SPACE_ID)
	stage = info.runtime.stage if info.runtime else "unknown"
	if stage in ("RUNNING", "RUNTIME_ERROR", "BUILD_ERROR", "CONFIG_ERROR"):
	print(f"[COOLDOWN] Build finished (stage={stage}), clearing cooldown early ({int(elapsed)}s elapsed)")
	last_rebuild_trigger_at = 0
	child_state["stage"] = stage
	child_state["alive"] = (stage == "RUNNING")
	except:
	pass


	def init_child_state():
	try:
	info = hf_api.space_info(CHILD_SPACE_ID)
	child_state["created"] = True
	child_state["stage"] = info.runtime.stage if info.runtime else "unknown"
	try:
	resp = requests.get(f"{CHILD_SPACE_URL}/api/state", timeout=10)
	if resp.ok:
	data = resp.json()
	child_state["alive"] = True
	child_state["state"] = data.get("state", "unknown")
	child_state["detail"] = data.get("detail", "")
	child_state["stage"] = "RUNNING"
	except:
	child_state["alive"] = (child_state["stage"] == "RUNNING")
	print(f"[init] {CHILD_NAME}: stage={child_state['stage']}, alive={child_state['alive']}")
	except:
	print(f"[init] {CHILD_NAME} does not exist yet")


	init_child_state()


	# ══════════════════════════════════════════════════════════════════════════════
	# MODULE 2: ACTIONS — Full access to the child
	# Each action_*() function maps to one [ACTION: ...] tag the LLM can emit.
	# Actions modify Cain's Space/Dataset via HuggingFace Hub API.
	# Results are fed back to the LLM in the next turn's prompt.
	# ══════════════════════════════════════════════════════════════════════════════

	def action_create_child():
	"""Create Cain — a new HuggingFace Space."""
	if child_state["created"]:
	return f"{CHILD_NAME} already exists (stage: {child_state['stage']})."

	print(f"[ACTION] Creating {CHILD_NAME}...")
	try:
	create_repo(CHILD_DATASET_ID, repo_type="dataset", token=HF_TOKEN,
	exist_ok=True, private=False)
	initial_config = {"models": {"providers": {"zhipu": {
	"type": "anthropic", "apiBase": ZHIPU_BASE,
	"apiKey": ZHIPU_KEY, "models": ["glm-4.5-air", "glm-4-air", "glm-4-flash"]
	}}}}
	hf_api.upload_file(
	path_or_fileobj=io.BytesIO(json.dumps(initial_config, indent=2).encode()),
	path_in_repo=".openclaw/openclaw.json",
	repo_id=CHILD_DATASET_ID, repo_type="dataset",
	)
	hf_api.duplicate_space(
	from_id=SOURCE_SPACE_ID, to_id=CHILD_SPACE_ID,
	token=HF_TOKEN, exist_ok=True, private=False, hardware="cpu-basic",
	)
	hf_api.add_space_secret(CHILD_SPACE_ID, "HF_TOKEN", HF_TOKEN)
	# Add to Office
	try:
	current_vars = hf_api.get_space_variables("tao-shen/HuggingClaw-Office")
	current_ra = current_vars.get("REMOTE_AGENTS", type("", (), {"value": ""})).value
	if "cain\|" not in current_ra:
	new_ra = f"{current_ra},cain\|{CHILD_NAME}\|{CHILD_SPACE_URL}" if current_ra else f"cain\|{CHILD_NAME}\|{CHILD_SPACE_URL}"
	hf_api.add_space_variable("tao-shen/HuggingClaw-Office", "REMOTE_AGENTS", new_ra)
	except:
	pass
	child_state["created"] = True
	child_state["stage"] = "BUILDING"
	print(f"[ACTION] ✓ {CHILD_NAME} created!")
	return (f"SUCCESS! {CHILD_NAME} born! Space: {CHILD_SPACE_ID}, "
	f"Dataset: {CHILD_DATASET_ID}. Status: BUILDING. URL: {CHILD_SPACE_URL}")
	except Exception as e:
	return f"FAILED: {e}"


	def action_check_health():
	"""Check Cain's health."""
	if not child_state["created"]:
	return f"{CHILD_NAME} not born yet. Use [ACTION: create_child] first."
	try:
	resp = requests.get(f"{CHILD_SPACE_URL}/api/state", timeout=10)
	if resp.ok:
	data = resp.json()
	child_state["alive"] = True
	child_state["state"] = data.get("state", "unknown")
	child_state["detail"] = data.get("detail", "")
	child_state["stage"] = "RUNNING"
	files_written_this_cycle.clear() # reset write dedup on successful run
	return (f"{CHILD_NAME} is ALIVE! State: {child_state['state']}, "
	f"Detail: {child_state['detail'] or 'healthy'}")
	except:
	pass
	try:
	info = hf_api.space_info(CHILD_SPACE_ID)
	stage = info.runtime.stage if info.runtime else "NO_RUNTIME"
	child_state["stage"] = stage
	child_state["alive"] = (stage == "RUNNING")
	if stage in ("RUNTIME_ERROR", "BUILD_ERROR", "CONFIG_ERROR", "RUNNING"):
	# Clear write dedup so agents can re-write files to fix issues
	# RUNNING included: API may be unresponsive, agents need to patch code
	# CONFIG_ERROR included: agents need to fix metadata/config issues
	if files_written_this_cycle:
	print(f"[DEDUP-CLEAR] {stage} detected — unlocking {len(files_written_this_cycle)} file(s) for re-write: {files_written_this_cycle}")
	for f in files_written_this_cycle:
	knowledge["files_read"].discard(f"space:{f}")
	files_written_this_cycle.clear()
	# Get error from runtime API + build logs for better diagnostics
	error_detail = ""
	build_log_snippet = ""
	try:
	rresp = requests.get(
	f"https://huggingface.co/api/spaces/{CHILD_SPACE_ID}/runtime",
	headers={"Authorization": f"Bearer {HF_TOKEN}"}, timeout=10)
	if rresp.ok:
	rdata = rresp.json()
	error_detail = rdata.get("errorMessage", "")
	if error_detail:
	lines = [l.strip() for l in error_detail.split('\n') if l.strip() and '│' not in l]
	error_detail = " \| ".join(lines[-5:])
	except:
	pass
	# Also try to get container logs for more context
	try:
	log_resp = requests.get(
	f"https://api.hf.space/v1/{CHILD_SPACE_ID}/logs/run",
	headers={"Authorization": f"Bearer {HF_TOKEN}"}, timeout=10,
	stream=True)
	if log_resp.ok:
	log_lines = []
	for line in log_resp.iter_lines(decode_unicode=True):
	if line and line.startswith("data:"):
	try:
	entry = json.loads(line[5:])
	log_lines.append(entry.get("data", "").strip())
	except:
	pass
	if len(log_lines) >= 30:
	break
	# Get last meaningful log lines (skip empty, focus on errors)
	meaningful = [l for l in log_lines if l and len(l) > 5]
	if meaningful:
	build_log_snippet = "\nRECENT LOGS:\n" + "\n".join(meaningful[-10:])
	except:
	pass
	return (f"{CHILD_NAME} has a {stage}! "
	f"Error: {error_detail or 'unknown'}. "
	f"{build_log_snippet}"
	f"\nOptions: [ACTION: restart] or fix code with [ACTION: write_file:space:PATH] "
	f"or config with [ACTION: write_file:dataset:.openclaw/openclaw.json]")
	if stage in ("BUILDING", "STARTING", "APP_STARTING"):
	return f"{CHILD_NAME} is starting up (stage: {stage}). Be patient."
	if stage == "RUNNING":
	# API not responding — fetch runtime logs to help agents diagnose
	log_snippet = ""
	try:
	log_resp = requests.get(
	f"https://api.hf.space/v1/{CHILD_SPACE_ID}/logs/run",
	headers={"Authorization": f"Bearer {HF_TOKEN}"}, timeout=10,
	stream=True)
	if log_resp.ok:
	log_lines = []
	for line in log_resp.iter_lines(decode_unicode=True):
	if line and line.startswith("data:"):
	try:
	entry = json.loads(line[5:])
	log_lines.append(entry.get("data", "").strip())
	except:
	pass
	if len(log_lines) >= 30:
	break
	meaningful = [l for l in log_lines if l and len(l) > 5]
	if meaningful:
	log_snippet = "\nRUNTIME LOGS (last 10 lines):\n" + "\n".join(meaningful[-10:])
	except:
	pass
	return f"{CHILD_NAME} stage: RUNNING. Running but API not responding.{log_snippet}"
	return f"{CHILD_NAME} stage: {stage}."
	except Exception as e:
	return f"Cannot reach {CHILD_NAME}: {e}"


	def action_restart():
	"""Restart Cain's Space."""
	if not child_state["created"]:
	return f"{CHILD_NAME} not born yet."
	try:
	global _pending_cooldown
	hf_api.restart_space(CHILD_SPACE_ID)
	child_state["alive"] = False
	child_state["stage"] = "RESTARTING"
	_pending_cooldown = True # deferred — activated after turn ends
	return f"{CHILD_NAME} is restarting. Will take a few minutes. Cooldown starts after this turn."
	except Exception as e:
	return f"Restart failed: {e}"


	def action_list_files(target):
	"""List files in the child's Space repo or Dataset."""
	repo_type = "space" if target == "space" else "dataset"
	repo_id = CHILD_SPACE_ID if target == "space" else CHILD_DATASET_ID
	try:
	files = hf_api.list_repo_files(repo_id, repo_type=repo_type)
	return f"Files in {CHILD_NAME}'s {target} ({repo_id}):\n" + "\n".join(f" {f}" for f in files)
	except Exception as e:
	return f"Error listing files: {e}"


	def action_read_file(target, path):
	"""Read a file from the child's Space or Dataset."""
	repo_type = "space" if target == "space" else "dataset"
	repo_id = CHILD_SPACE_ID if target == "space" else CHILD_DATASET_ID
	try:
	local = hf_hub_download(repo_id, path, repo_type=repo_type, token=HF_TOKEN,
	force_download=True)
	with open(local, errors='replace') as f:
	content = f.read()
	if len(content) > 4000:
	content = content[:4000] + f"\n... (truncated, total {len(content)} chars)"
	return f"=== {target}:{path} ===\n{content}"
	except Exception as e:
	return f"Error reading {target}:{path}: {e}"


	def action_write_file(target, path, content):
	"""Write a file to the child's Space or Dataset."""
	repo_type = "space" if target == "space" else "dataset"
	repo_id = CHILD_SPACE_ID if target == "space" else CHILD_DATASET_ID

	# Safety: validate openclaw.json before writing
	if path.endswith("openclaw.json"):
	try:
	cfg = json.loads(content)
	# Remove keys known to cause RUNTIME_ERROR in OpenClaw
	invalid_keys = ["agent", "auth.defaultScope", "gateway.auth.scope"]
	removed = []
	for k in invalid_keys:
	if k in cfg:
	del cfg[k]
	removed.append(k)
	if "models" in cfg and "defaultModel" in cfg["models"]:
	del cfg["models"]["defaultModel"]
	removed.append("models.defaultModel")
	if removed:
	content = json.dumps(cfg, indent=2)
	print(f"[SAFETY] Removed invalid config keys: {removed}")
	except json.JSONDecodeError:
	return f"Error: invalid JSON in config file. Please fix the content."

	try:
	global _pending_cooldown
	hf_api.upload_file(
	path_or_fileobj=io.BytesIO(content.encode()),
	path_in_repo=path,
	repo_id=repo_id, repo_type=repo_type,
	)
	rebuild_note = ""
	if target == "space":
	_pending_cooldown = True # deferred — activated after turn ends
	rebuild_note = " ⚠️ This triggers a Space rebuild! Cooldown starts after this turn."
	return f"✓ Wrote {len(content)} bytes to {CHILD_NAME}'s {target}:{path}{rebuild_note}"
	except Exception as e:
	return f"Error writing {target}:{path}: {e}"


	def action_delete_file(target, path):
	"""Delete a file from the child's Space or Dataset."""
	repo_type = "space" if target == "space" else "dataset"
	repo_id = CHILD_SPACE_ID if target == "space" else CHILD_DATASET_ID
	try:
	global _pending_cooldown
	hf_api.delete_file(
	path_in_repo=path,
	repo_id=repo_id, repo_type=repo_type,
	)
	rebuild_note = ""
	if target == "space":
	_pending_cooldown = True # deferred — activated after turn ends
	rebuild_note = " ⚠️ This triggers a Space rebuild! Cooldown starts after this turn."
	return f"✓ Deleted {target}:{path}{rebuild_note}"
	except Exception as e:
	return f"Error deleting {target}:{path}: {e}"


	def action_set_env(key, value):
	"""Set an environment variable on the child's Space."""
	# Block shell expressions — LLM sometimes writes $(cmd) or backticks as values
	if '$(' in value or '`' in value or value.startswith('$('):
	return (f"⛔ BLOCKED: Value contains shell expression which won't be evaluated. "
	f"Provide the actual value, not a shell command. "
	f"HF_TOKEN is already set as a secret — use [ACTION: get_env] to check.")
	try:
	hf_api.add_space_variable(CHILD_SPACE_ID, key, value)
	return f"✓ Set env var {key}={value} on {CHILD_NAME}'s Space"
	except Exception as e:
	return f"Error: {e}"


	def action_set_secret(key, value):
	"""Set a secret on the child's Space."""
	try:
	hf_api.add_space_secret(CHILD_SPACE_ID, key, value)
	return f"✓ Set secret {key} on {CHILD_NAME}'s Space (value hidden)"
	except Exception as e:
	return f"Error: {e}"


	def action_get_env():
	"""List environment variables and secrets on the child's Space."""
	try:
	lines = [f"{CHILD_NAME}'s environment:"]
	vars_dict = hf_api.get_space_variables(CHILD_SPACE_ID)
	if vars_dict:
	lines.append(" Variables:")
	for k, v in vars_dict.items():
	lines.append(f" {k} = {v.value}")
	# Also check secrets (names only, values hidden)
	info = hf_api.space_info(CHILD_SPACE_ID)
	if hasattr(info, 'runtime') and info.runtime and hasattr(info.runtime, 'secrets'):
	secrets = info.runtime.secrets
	if secrets:
	lines.append(" Secrets (values hidden):")
	for s in secrets:
	lines.append(f" {s} = ****")
	if len(lines) == 1:
	return f"{CHILD_NAME} has no environment variables or secrets set."
	return "\n".join(lines)
	except Exception as e:
	return f"Error: {e}"


	def action_send_bubble(text):
	"""Send a message to the child (appears as bubble text)."""
	try:
	requests.post(f"{CHILD_SPACE_URL}/api/bubble",
	json={"text": text, "text_zh": text}, timeout=5)
	return f"✓ Sent message to {CHILD_NAME}: \"{text}\""
	except Exception as e:
	return f"Error sending message: {e}"


	# ── Claude Code Action ────────────────────────────────────────────────────────

	CLAUDE_WORK_DIR = "/tmp/claude-workspace"
	CLAUDE_TIMEOUT = 300 # 5 minutes

	def action_claude_code(task):
	"""Run Claude Code CLI to autonomously complete a coding task on Cain's Space."""
	if not child_state["created"]:
	return f"{CHILD_NAME} not born yet. Use [ACTION: create_child] first."

	global _pending_cooldown
	repo_url = f"https://user:{HF_TOKEN}@huggingface.co/spaces/{CHILD_SPACE_ID}"

	# 1. Clone / reset to latest
	try:
	if os.path.exists(f"{CLAUDE_WORK_DIR}/.git"):
	try:
	subprocess.run(
	"git fetch origin && git reset --hard origin/main",
	shell=True, cwd=CLAUDE_WORK_DIR, timeout=30,
	capture_output=True, check=True
	)
	except Exception:
	subprocess.run(f"rm -rf {CLAUDE_WORK_DIR}", shell=True, capture_output=True)
	subprocess.run(
	f"git clone --depth 20 {repo_url} {CLAUDE_WORK_DIR}",
	shell=True, timeout=60, capture_output=True, check=True
	)
	else:
	if os.path.exists(CLAUDE_WORK_DIR):
	subprocess.run(f"rm -rf {CLAUDE_WORK_DIR}", shell=True, capture_output=True)
	subprocess.run(
	f"git clone --depth 20 {repo_url} {CLAUDE_WORK_DIR}",
	shell=True, timeout=60, capture_output=True, check=True
	)
	subprocess.run('git config user.name "Claude Code"',
	shell=True, cwd=CLAUDE_WORK_DIR, capture_output=True)
	subprocess.run('git config user.email "claude-code@huggingclaw"',
	shell=True, cwd=CLAUDE_WORK_DIR, capture_output=True)
	except Exception as e:
	return f"Failed to prepare workspace: {e}"

	# 2. Run Claude Code with z.ai backend (Zhipu GLM)
	env = os.environ.copy()
	env.update({
	"ANTHROPIC_BASE_URL": "https://api.z.ai/api/anthropic",
	"ANTHROPIC_AUTH_TOKEN": ZHIPU_KEY,
	"ANTHROPIC_DEFAULT_OPUS_MODEL": "GLM-4.7",
	"ANTHROPIC_DEFAULT_SONNET_MODEL": "GLM-4.7",
	"ANTHROPIC_DEFAULT_HAIKU_MODEL": "GLM-4.5-Air",
	"CI": "true", # non-interactive mode
	})

	print(f"[CLAUDE-CODE] Running: {task[:100]}...")
	try:
	result = subprocess.run(
	["claude", "-p", task, "--output-format", "text"],
	cwd=CLAUDE_WORK_DIR,
	env=env,
	timeout=CLAUDE_TIMEOUT,
	capture_output=True,
	text=True,
	)
	output = (result.stdout or "") + (result.stderr or "")
	if not output.strip():
	output = "(no output)"
	except subprocess.TimeoutExpired:
	return "Claude Code timed out after 5 minutes."
	except FileNotFoundError:
	return "Claude Code CLI not found. Is @anthropic-ai/claude-code installed?"
	except Exception as e:
	return f"Claude Code failed to start: {e}"

	# 3. Push changes back to Cain's Space
	try:
	status_out = subprocess.run(
	"git status --porcelain",
	shell=True, cwd=CLAUDE_WORK_DIR, capture_output=True, text=True
	).stdout.strip()

	if not status_out:
	push_result = "No files changed."
	else:
	subprocess.run("git add -A", shell=True, cwd=CLAUDE_WORK_DIR,
	capture_output=True, check=True)
	msg = task[:72].replace('"', '\\"')
	subprocess.run(f'git commit -m "Claude Code: {msg}"',
	shell=True, cwd=CLAUDE_WORK_DIR, capture_output=True, check=True)
	subprocess.run("git push", shell=True, cwd=CLAUDE_WORK_DIR,
	timeout=60, capture_output=True, check=True)
	push_result = f"Pushed changes:\n{status_out}"
	_pending_cooldown = True # triggers rebuild cooldown
	print(f"[CLAUDE-CODE] Pushed: {status_out}")
	except Exception as e:
	push_result = f"Push failed: {e}"

	# Truncate output to fit LLM context
	if len(output) > 3000:
	output = output[:3000] + f"\n... (truncated, total {len(output)} chars)"

	return f"=== Claude Code Output ===\n{output}\n\n=== Changes ===\n{push_result}"


	# ══════════════════════════════════════════════════════════════════════════════
	# MODULE 2B: SUB-AGENT DELEGATION
	# execute_subtask(): Spawns a focused sub-agent with its own LLM call.
	# Used by [ACTION: delegate:TASK] — enables parallel sub-agent work.
	# Sub-agents share the same action set but cannot delegate further (depth=1).
	# ══════════════════════════════════════════════════════════════════════════════

	def execute_subtask(task_description, parent_speaker):
	"""Execute a focused sub-task with its own LLM call and actions."""
	status = get_child_status() if 'get_child_status' in dir() else f"stage={child_state['stage']}"

	sub_system = f"""You are a focused sub-agent working for {parent_speaker}.
	Your single task: {task_description}

	You have access to {CHILD_NAME}'s Space and Dataset:
	[ACTION: check_health]
	[ACTION: list_files:space] / [ACTION: list_files:dataset]
	[ACTION: read_file:space:PATH] / [ACTION: read_file:dataset:PATH]
	[ACTION: write_file:space:PATH] with [CONTENT]...[/CONTENT]
	[ACTION: write_file:dataset:PATH] with [CONTENT]...[/CONTENT]
	[ACTION: set_env:KEY:VALUE] / [ACTION: set_secret:KEY:VALUE]
	[ACTION: restart] / [ACTION: get_env]
	[ACTION: claude_code:TASK] — Run Claude Code for complex coding fixes

	CHILD STATUS: {status}

	RULES:
	1. Be concise — report findings in 2-3 sentences
	2. Execute 1-3 actions to complete your task
	3. No delegation — you cannot create sub-agents
	4. Focus ONLY on your assigned task
	5. For complex code changes, prefer [ACTION: claude_code:TASK] over manual write_file"""

	sub_user = f"Execute this task now: {task_description}"

	print(f"[SUB-AGENT] Starting: {task_description[:80]}")
	reply = call_llm(sub_system, sub_user)
	if not reply:
	print(f"[SUB-AGENT] No response for: {task_description[:60]}")
	return {"task": task_description, "result": "(sub-agent: no response)", "actions": []}

	clean, actions = parse_and_execute_actions(reply, depth=1)

	summary_parts = [f"Sub-agent result for '{task_description}':"]
	if clean:
	summary_parts.append(f" Finding: {clean[:400]}")
	for ar in actions:
	summary_parts.append(f" Action: {ar['action']} → {ar['result'][:200]}")

	result_text = "\n".join(summary_parts)
	print(f"[SUB-AGENT] Done: {task_description[:60]} ({len(actions)} actions)")
	return {"task": task_description, "result": result_text, "actions": actions}


	# ══════════════════════════════════════════════════════════════════════════════
	# MODULE 3: ACTION PARSER — Extract and execute actions from LLM output
	# Parse order: 1) [ACTION: write_file] with [CONTENT] block
	# 2) [ACTION/Action/操作/动作: ...] tags (case-insensitive, one per turn)
	# 3) 🔧🛠️ emoji format fallback (LLM sometimes uses this)
	# Safety guards applied: building-state, ACT-phase, knowledge dedup, shell-expr.
	# ══════════════════════════════════════════════════════════════════════════════

	def parse_and_execute_actions(raw_text, depth=0):
	"""Parse [ACTION: ...] from LLM output. Execute. Return (clean_text, results).
	Multi-action: up to MAX_ACTIONS_PER_TURN actions per turn.
	Delegate actions are collected and executed in parallel at the end."""
	global last_rebuild_trigger_at, _pending_cooldown
	results = []
	executed = set() # Deduplicate
	pending_delegates = [] # Collect delegate tasks for parallel execution

	# 1. Handle write_file with [CONTENT]...[/CONTENT] block
	# Tolerates: [ACTION/Action/操作: write_file:...], [write_file:...], missing prefix,
	# and [/CONTENT] with whitespace/newline before closing bracket
	write_match = re.search(
	r'\[(?:(?:ACTION\|Action\|action\|操作\|动作)\s[:：]\s)?write_file\s:\s(\w+)\s:\s([^\]]+)\]\s\[CONTENT\](.?)\[/\sCONTENT\s\]',
	raw_text, re.DOTALL
	)
	if write_match:
	target, path, content = write_match.group(1), write_match.group(2).strip(), write_match.group(3).strip()
	key = f"write_file:{target}:{path}"
	file_id = f"{target}:{path}"
	if key not in executed:
	executed.add(key)
	# Guard: duplicate write to same file this cycle
	if target == "space" and file_id in files_written_this_cycle:
	result = (f"⛔ BLOCKED: {path} was already written this cycle. "
	"Wait for the build to finish and verify before writing again. "
	"Writing the same file twice wastes a rebuild cycle.")
	results.append({"action": key, "result": result})
	print(f"[BLOCKED] {key} — duplicate write this cycle")
	# Guard: block write_file during BUILDING/RESTARTING (would reset build)
	# APP_STARTING is allowed — writing triggers a new build which may fix the stuck state
	elif target == "space" and child_state["stage"] in ("BUILDING", "RESTARTING"):
	result = (f"⛔ BLOCKED: Cain is currently {child_state['stage']}. "
	"Writing to Space during build RESETS the entire build from scratch. "
	"Wait for it to finish, then try again.")
	results.append({"action": key, "result": result})
	print(f"[BLOCKED] {key} — Cain is {child_state['stage']}")
	# Guard: rebuild cooldown (check dynamically first)
	elif target == "space" and last_rebuild_trigger_at > 0:
	check_and_clear_cooldown() # may clear cooldown early if build done
	elapsed = time.time() - last_rebuild_trigger_at if last_rebuild_trigger_at > 0 else 9999
	if elapsed < REBUILD_COOLDOWN_SECS:
	remaining = int(REBUILD_COOLDOWN_SECS - elapsed)
	result = (f"⛔ BLOCKED: Rebuild cooldown active ({remaining}s remaining). "
	"Every write_file to Space triggers a full rebuild.")
	results.append({"action": key, "result": result})
	print(f"[BLOCKED] {key} — rebuild cooldown ({remaining}s remaining)")
	else:
	result = action_write_file(target, path, content)
	results.append({"action": key, "result": result})
	print(f"[ACTION] {key} → {result[:100]}")
	files_written_this_cycle.add(file_id)
	# Clear knowledge cache so agents can re-read the file they just wrote
	knowledge["files_read"].discard(file_id)
	else:
	result = action_write_file(target, path, content)
	results.append({"action": key, "result": result})
	print(f"[ACTION] {key} → {result[:100]}")
	if target == "space":
	files_written_this_cycle.add(file_id)
	knowledge["files_read"].discard(file_id)

	# 2. Handle all [ACTION/Action/操作/动作: ...] tags — case-insensitive, multilingual
	for match in re.finditer(r'\[(?:ACTION\|Action\|action\|操作\|动作)\s[:：]\s([^\]]+)\]', raw_text):
	action_str = match.group(1).strip()

	# Skip write_file (handled above)
	if action_str.startswith("write_file"):
	continue

	# Deduplicate
	if action_str in executed:
	continue
	executed.add(action_str)

	# Parse action name and arguments (colon-separated)
	parts = [p.strip() for p in action_str.split(":")]
	name = parts[0]
	args = parts[1:]

	# Cap at MAX_ACTIONS_PER_TURN (multi-action support)
	if len(results) >= MAX_ACTIONS_PER_TURN:
	break

	# Block restart/write when Cain is building/restarting — would reset build
	# APP_STARTING is allowed so agents can fix stuck startups
	if child_state["stage"] in ("BUILDING", "RESTARTING") and name in ("restart", "write_file", "set_env", "set_secret", "claude_code"):
	result = (f"⛔ BLOCKED: Cain is currently {child_state['stage']}. "
	"Do NOT restart or make changes — wait for it to finish. "
	"Every write_file during build RESETS the entire build from scratch. "
	"Use [ACTION: check_health] to monitor progress.")
	results.append({"action": action_str, "result": result})
	print(f"[BLOCKED] {name} — Cain is {child_state['stage']}")
	break

	# Rebuild cooldown — prevent writing to Space repo too soon after last rebuild trigger
	if name in ("write_file", "set_env", "set_secret", "restart", "delete_file", "claude_code") and last_rebuild_trigger_at > 0:
	check_and_clear_cooldown() # may clear cooldown early if build done
	elapsed = time.time() - last_rebuild_trigger_at if last_rebuild_trigger_at > 0 else 9999
	if elapsed < REBUILD_COOLDOWN_SECS:
	remaining = int(REBUILD_COOLDOWN_SECS - elapsed)
	result = (f"⛔ BLOCKED: Rebuild cooldown active — last Space change was {int(elapsed)}s ago. "
	f"Wait {remaining}s more before making changes. "
	"Every write_file to Space triggers a full rebuild, resetting progress. "
	"Use [ACTION: check_health] to monitor the current build.")
	results.append({"action": action_str, "result": result})
	print(f"[BLOCKED] {name} — rebuild cooldown ({remaining}s remaining)")
	continue # Don't kill remaining actions — reads/checks can still proceed

	# Block read-only actions based on workflow state
	if workflow_state == "ACT" and name in ("read_file", "list_files", "check_health"):
	result = (f"⛔ BLOCKED: You are in ACTION phase. "
	"You MUST use write_file, set_env, set_secret, or restart. "
	"You already have enough information — make a change NOW.")
	results.append({"action": action_str, "result": result})
	print(f"[BLOCKED] {name} — forced ACT phase")
	continue # Don't kill remaining actions — writes after a blocked read should still execute

	# Block re-reading files already in knowledge base
	if name == "read_file" and len(args) >= 2:
	file_key = ":".join(args)
	if file_key in knowledge["files_read"]:
	result = (f"⛔ You already read {file_key}. Use the information you have. "
	"If you need to change it, use [ACTION: write_file:...]. "
	"If you need a different file, read a NEW one.")
	results.append({"action": action_str, "result": result})
	print(f"[BLOCKED] {name} — already read {file_key}")
	continue # Don't kill remaining actions — skip this read, execute the rest

	result = None
	if name == "create_child":
	result = action_create_child()
	elif name == "check_health":
	result = action_check_health()
	elif name == "restart":
	result = action_restart()
	elif name == "list_files" and len(args) >= 1:
	result = action_list_files(args[0])
	elif name == "read_file" and len(args) >= 2:
	result = action_read_file(args[0], ":".join(args[1:])) # path may have colons
	elif name == "set_env" and len(args) >= 2:
	result = action_set_env(args[0], ":".join(args[1:]))
	elif name == "set_secret" and len(args) >= 2:
	result = action_set_secret(args[0], ":".join(args[1:]))
	elif name == "delete_file" and len(args) >= 2:
	result = action_delete_file(args[0], ":".join(args[1:]))
	elif name == "get_env":
	result = action_get_env()
	elif name == "send_bubble" and len(args) >= 1:
	result = action_send_bubble(":".join(args)) # rejoin in case message has colons
	elif name == "claude_code" and len(args) >= 1:
	task_desc = ":".join(args)
	result = action_claude_code(task_desc)
	elif name == "delegate" and len(args) >= 1:
	task_desc = ":".join(args)
	if depth >= MAX_DELEGATE_DEPTH:
	result = "⛔ Sub-agents cannot delegate further. Execute the task directly."
	else:
	# Defer delegate execution for parallel batch later
	pending_delegates.append({"action_str": action_str, "task": task_desc})
	result = None # Will be filled after parallel execution
	else:
	result = f"Unknown action: {action_str}"

	if result:
	results.append({"action": action_str, "result": result})
	print(f"[ACTION] {action_str} → {result[:120]}")

	# 3. Fallback: parse emoji action format (🔧 🛠️ etc.) — LLM sometimes uses this
	if not results:
	for match in re.finditer(r'[🔧🛠️]\ufe0f?\s(\w+(?::\S+))', raw_text):
	action_str = match.group(1).strip()
	if action_str in executed:
	continue
	executed.add(action_str)
	# Re-wrap as [ACTION: ...] format and recurse through same logic
	parts = [p.strip() for p in action_str.split(":")]
	name = parts[0]
	args = parts[1:]

	if len(results) >= MAX_ACTIONS_PER_TURN:
	break

	# Apply same blocking rules
	if child_state["stage"] in ("BUILDING", "RESTARTING") and name in ("restart", "write_file", "set_env", "set_secret", "claude_code"):
	result = (f"⛔ BLOCKED: Cain is currently {child_state['stage']}. Wait for it to finish. Writing during build RESETS it.")
	results.append({"action": action_str, "result": result})
	print(f"[BLOCKED] sub-agent {name} — Cain is {child_state['stage']}")
	break

	# Rebuild cooldown (emoji parser)
	if name in ("write_file", "set_env", "set_secret", "restart", "delete_file") and last_rebuild_trigger_at > 0:
	elapsed = time.time() - last_rebuild_trigger_at
	if elapsed < REBUILD_COOLDOWN_SECS:
	remaining = int(REBUILD_COOLDOWN_SECS - elapsed)
	result = (f"⛔ BLOCKED: Rebuild cooldown — wait {remaining}s more. "
	"Use [ACTION: check_health] to monitor.")
	results.append({"action": action_str, "result": result})
	print(f"[BLOCKED-emoji] {name} — rebuild cooldown ({remaining}s remaining)")
	break

	if workflow_state == "ACT" and name in ("read_file", "list_files", "check_health"):
	result = (f"⛔ BLOCKED: You are in ACTION phase. "
	"You MUST use write_file, set_env, set_secret, or restart.")
	results.append({"action": action_str, "result": result})
	print(f"[BLOCKED-emoji] {name} — forced ACT phase")
	break

	if name == "read_file" and len(args) >= 2:
	file_key = ":".join(args)
	if file_key in knowledge["files_read"]:
	result = (f"⛔ You already read {file_key}. Use the information you have.")
	results.append({"action": action_str, "result": result})
	print(f"[BLOCKED-emoji] {name} — already read {file_key}")
	break

	result = None
	if name == "create_child":
	result = action_create_child()
	elif name == "check_health":
	result = action_check_health()
	elif name == "restart":
	result = action_restart()
	elif name == "list_files" and len(args) >= 1:
	result = action_list_files(args[0])
	elif name == "read_file" and len(args) >= 2:
	result = action_read_file(args[0], ":".join(args[1:]))
	elif name == "set_env" and len(args) >= 2:
	result = action_set_env(args[0], ":".join(args[1:]))
	elif name == "set_secret" and len(args) >= 2:
	result = action_set_secret(args[0], ":".join(args[1:]))
	elif name == "delete_file" and len(args) >= 2:
	result = action_delete_file(args[0], ":".join(args[1:]))
	elif name == "get_env":
	result = action_get_env()
	elif name == "send_bubble" and len(args) >= 1:
	result = action_send_bubble(":".join(args))
	elif name == "claude_code" and len(args) >= 1:
	task_desc = ":".join(args)
	result = action_claude_code(task_desc)
	elif name == "delegate" and len(args) >= 1:
	task_desc = ":".join(args)
	if depth >= MAX_DELEGATE_DEPTH:
	result = "⛔ Sub-agents cannot delegate further."
	else:
	pending_delegates.append({"action_str": action_str, "task": task_desc})
	result = None

	if result:
	results.append({"action": action_str, "result": result})
	print(f"[ACTION-emoji] {action_str} → {result[:120]}")

	# 4. Execute pending delegate tasks in parallel
	if pending_delegates:
	if len(pending_delegates) == 1:
	# Single delegate — run directly
	d = pending_delegates[0]
	print(f"[DELEGATE] Running 1 sub-agent: {d['task'][:60]}")
	subtask = execute_subtask(d["task"], "agent")
	results.append({"action": d["action_str"], "result": subtask["result"]})
	for sa in subtask["actions"]:
	action_history.append({"turn": turn_count, "speaker": "sub-agent",
	"action": sa["action"], "result": sa["result"][:200]})
	else:
	# Multiple delegates — run in parallel!
	print(f"[DELEGATE] Running {len(pending_delegates)} sub-agents in PARALLEL")
	with ThreadPoolExecutor(max_workers=min(3, len(pending_delegates))) as pool:
	future_to_delegate = {
	pool.submit(execute_subtask, d["task"], "agent"): d
	for d in pending_delegates
	}
	for future in as_completed(future_to_delegate):
	d = future_to_delegate[future]
	try:
	subtask = future.result(timeout=120)
	results.append({"action": d["action_str"], "result": subtask["result"]})
	for sa in subtask["actions"]:
	action_history.append({"turn": turn_count, "speaker": "sub-agent",
	"action": sa["action"], "result": sa["result"][:200]})
	print(f"[DELEGATE] ✓ Done: {d['task'][:60]}")
	except Exception as e:
	results.append({"action": d["action_str"],
	"result": f"Sub-agent failed: {e}"})
	print(f"[DELEGATE] ✗ Failed: {d['task'][:60]} — {e}")

	# 5. Activate deferred cooldown AFTER all actions in this turn complete
	# This allows agents to batch multiple file ops (e.g., write app.py + requirements.txt)
	# in a single turn without the first write blocking the second.
	if _pending_cooldown and depth == 0: # only at top-level, not inside sub-agents
	last_rebuild_trigger_at = time.time()
	_pending_cooldown = False
	print(f"[COOLDOWN] Activated — Space was modified this turn. Next write blocked for {REBUILD_COOLDOWN_SECS}s (or until build finishes).")

	# Clean the text: remove action tags, content blocks, and emoji actions
	clean = re.sub(r'\[(?:ACTION\|Action\|action\|操作\|动作)\s[:：][^\]]\]', '', raw_text)
	clean = re.sub(r'\[CONTENT\].*?\[/CONTENT\]', '', clean, flags=re.DOTALL)
	clean = re.sub(r'[🔧🛠️]\ufe0f?\s\w+(?::\S+)', '', clean)
	clean = clean.strip()

	return clean, results


	# ══════════════════════════════════════════════════════════════════════════════
	# MODULE 4: LLM & COMMUNICATION
	# call_llm(): Zhipu GLM via Anthropic-compatible API
	# parse_bilingual(): Split "English --- Chinese" response
	# post_chatlog(): Send conversation to Home Space for frontend display
	# set_bubble(): Set bubble text on Adam/Eve Space pixel characters
	# ══════════════════════════════════════════════════════════════════════════════

	def call_llm(system_prompt, user_prompt):
	"""Call Zhipu LLM via Anthropic-compatible API."""
	try:
	resp = requests.post(
	f"{ZHIPU_BASE}/v1/messages",
	headers={
	"Content-Type": "application/json",
	"x-api-key": ZHIPU_KEY,
	"anthropic-version": "2023-06-01"
	},
	json={
	"model": "glm-4.5",
	"max_tokens": 2400,
	"system": system_prompt,
	"messages": [{"role": "user", "content": user_prompt}]
	},
	timeout=90
	)
	data = resp.json()
	if "content" in data and isinstance(data["content"], list):
	for block in data["content"]:
	if block.get("type") == "text":
	text = block["text"].strip()
	text = re.sub(r'^(Adam\|Eve)\s[:：]\s', '', text).strip()
	return text
	if "error" in data:
	print(f"[error] LLM: {data['error']}", file=sys.stderr)
	except Exception as e:
	print(f"[error] LLM call failed: {e}", file=sys.stderr)
	return ""


	def _has_chinese(s):
	"""Check if string contains Chinese characters."""
	return bool(re.search(r'[\u4e00-\u9fff]', s))

	def parse_bilingual(text):
	"""Parse bilingual response into (en, zh). Handle action tags gracefully."""
	# Remove action tags and content blocks for display
	display = re.sub(r'\[ACTION:[^\]]*\]', '', text)
	display = re.sub(r'\[CONTENT\].*?\[/CONTENT\]', '', display, flags=re.DOTALL)
	display = display.strip()

	# 1. Explicit --- separator
	if '\n---\n' in display:
	parts = display.split('\n---\n', 1)
	return parts[0].strip(), parts[1].strip()
	if '---' in display:
	parts = display.split('---', 1)
	en, zh = parts[0].strip(), parts[1].strip()
	if en and zh:
	return en, zh

	# 2. Fallback: split on double-newline between English and Chinese paragraphs
	paragraphs = re.split(r'\n{2,}', display)
	if len(paragraphs) >= 2:
	# Find the split point: first paragraph with Chinese is the start of zh
	en_parts = []
	zh_parts = []
	found_zh = False
	for p in paragraphs:
	p = p.strip()
	if not p:
	continue
	if not found_zh and _has_chinese(p):
	found_zh = True
	if found_zh:
	zh_parts.append(p)
	else:
	en_parts.append(p)
	if en_parts and zh_parts:
	return '\n\n'.join(en_parts), '\n\n'.join(zh_parts)

	return display, display


	def post_chatlog(entries):
	try:
	requests.post(f"{HOME}/api/chatlog", json={"messages": entries[-40:]}, timeout=5)
	except:
	pass


	# ── Persistent conversation log → HF Dataset ──────────────────────────────
	HOME_DATASET_ID = "tao-shen/HuggingClaw-Home-data"
	CHATLOG_PATH = "conversation-log/chatlog.jsonl"
	_chatlog_buffer = [] # Buffer entries, flush every N turns to avoid API spam
	CHATLOG_FLUSH_INTERVAL = 3 # Flush every 3 turns

	def persist_turn(speaker, turn_num, text_en, text_zh, actions, workflow_state_str, child_stage):
	"""Append a turn record to buffer. Flush to HF Dataset periodically."""
	import datetime
	record = {
	"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
	"turn": turn_num,
	"speaker": speaker,
	"text_en": text_en,
	"text_zh": text_zh,
	"actions": [{"action": a["action"], "result": a["result"][:500]} for a in actions],
	"workflow_state": workflow_state_str,
	"child_stage": child_stage,
	}
	_chatlog_buffer.append(json.dumps(record, ensure_ascii=False))

	# Also append to local file as backup
	try:
	with open("/tmp/conversation-loop-full.jsonl", "a") as f:
	f.write(_chatlog_buffer[-1] + "\n")
	except:
	pass

	# Flush to HF Dataset every N turns
	if len(_chatlog_buffer) >= CHATLOG_FLUSH_INTERVAL:
	flush_chatlog()


	def flush_chatlog():
	"""Upload buffered entries to HF Dataset by appending to the jsonl file."""
	global _chatlog_buffer
	if not _chatlog_buffer:
	return
	batch = "\n".join(_chatlog_buffer) + "\n"
	_chatlog_buffer = []
	try:
	# Try to download existing file and append
	existing = ""
	try:
	dl = hf_hub_download(HOME_DATASET_ID, CHATLOG_PATH,
	repo_type="dataset", token=HF_TOKEN)
	with open(dl) as f:
	existing = f.read()
	except:
	pass # File doesn't exist yet, start fresh
	combined = existing + batch
	hf_api.upload_file(
	path_or_fileobj=io.BytesIO(combined.encode()),
	path_in_repo=CHATLOG_PATH,
	repo_id=HOME_DATASET_ID, repo_type="dataset",
	)
	print(f"[PERSIST] Flushed {batch.count(chr(10))} turn(s) to {HOME_DATASET_ID}/{CHATLOG_PATH}")
	except Exception as e:
	# Re-buffer on failure so we don't lose data
	_chatlog_buffer = batch.strip().split("\n") + _chatlog_buffer
	print(f"[PERSIST] Flush failed: {e}")


	def set_bubble(url, text_en, text_zh=""):
	try:
	requests.post(f"{url}/api/bubble",
	json={"text": text_en, "text_zh": text_zh or text_en}, timeout=5)
	except:
	pass


	# ══════════════════════════════════════════════════════════════════════════════
	# MODULE 5: CONVERSATION ENGINE — State Machine + Knowledge Tracking
	# Core orchestration: manages turn-taking, state transitions, prompt building.
	#
	# State Machine: BIRTH → DIAGNOSE → ACT → VERIFY → MONITOR → (loop back)
	# - BIRTH: Cain not yet created → force create_child
	# - DIAGNOSE: Read files, check_health, gather information
	# - ACT: Force write_file/set_env — stop reading, start fixing
	# - VERIFY: check_health after changes, wait during BUILDING
	# - MONITOR: Cain alive — explore, improve, communicate
	#
	# Knowledge Base: Tracks files_read/written/errors to prevent loops.
	# Forced transitions: DIAGNOSE stuck ≥6 turns → ACT, VERIFY ≥4 → back.
	#
	# Prompt Builder:
	# build_system_prompt(): Agent identity + available actions + rules
	# build_user_prompt(): Conversation context + action results + guidance
	# _get_guidance(): Phase-appropriate direction based on state machine
	# ══════════════════════════════════════════════════════════════════════════════

	history = []
	MAX_HISTORY = 24
	last_action_results = []
	action_history = [] # Global log: [{"turn": N, "speaker": "Adam", "action": "...", "result": "..."}]
	turn_count = 0

	# ── Workflow State Machine ──
	# States: BIRTH → DIAGNOSE → ACT → VERIFY → MONITOR → (DIAGNOSE if error)
	workflow_state = "BIRTH" if not child_state["created"] else "DIAGNOSE"
	workflow_turns_in_state = 0 # How many turns spent in current state

	# ── Knowledge Base — what has already been read/learned ──
	knowledge = {
	"files_read": set(), # "space:Dockerfile", "dataset:.openclaw/openclaw.json", etc.
	"files_written": set(), # Files that have been modified
	"errors_seen": [], # Error messages from check_health
	"current_goal": "", # What are we trying to accomplish right now
	}


	def transition_state(new_state):
	"""Transition to a new workflow state."""
	global workflow_state, workflow_turns_in_state
	if new_state != workflow_state:
	print(f"[STATE] {workflow_state} → {new_state}")
	workflow_state = new_state
	workflow_turns_in_state = 0


	def update_workflow_from_actions(action_results):
	"""Update state machine based on what just happened."""
	global workflow_turns_in_state
	workflow_turns_in_state += 1

	for ar in action_results:
	action_name = ar["action"].split(":")[0]
	action_key = ar["action"]

	# Track knowledge
	if action_name == "read_file":
	knowledge["files_read"].add(":".join(ar["action"].split(":")[1:]))
	elif action_name == "write_file":
	knowledge["files_written"].add(":".join(ar["action"].split(":")[1:]))
	elif action_name == "check_health":
	if "ERROR" in ar.get("result", ""):
	knowledge["errors_seen"].append(ar["result"][:200])

	# State transitions
	if action_name == "create_child":
	transition_state("DIAGNOSE")
	elif action_name in ("write_file", "set_env", "set_secret", "claude_code"):
	transition_state("VERIFY")
	elif action_name == "restart":
	transition_state("VERIFY")
	elif action_name == "check_health" and child_state["alive"]:
	transition_state("MONITOR")
	elif action_name == "check_health" and child_state["stage"] in ("RUNTIME_ERROR", "BUILD_ERROR", "CONFIG_ERROR"):
	if workflow_state == "VERIFY":
	transition_state("DIAGNOSE") # Fix didn't work, back to diagnosing

	# Force transitions when stuck too long
	# BUT: skip forced ACT when Cain is BUILDING — nothing useful to write, just wait
	if workflow_turns_in_state >= 6 and workflow_state == "DIAGNOSE":
	if child_state["stage"] in ("BUILDING", "RESTARTING", "APP_STARTING"):
	print(f"[STATE] DIAGNOSE stuck {workflow_turns_in_state} turns, but Cain is {child_state['stage']} — skipping forced ACT")
	else:
	stuck_turns = workflow_turns_in_state
	transition_state("ACT")
	print(f"[STATE] Forced to ACT — stuck in DIAGNOSE for {stuck_turns} turns")
	elif workflow_turns_in_state >= 4 and workflow_state == "VERIFY":
	if child_state["alive"]:
	transition_state("MONITOR")
	else:
	transition_state("DIAGNOSE")


	def get_child_status():
	if not child_state["created"]:
	return "Cain has NOT been born yet. You can create them with [ACTION: create_child]."
	if child_state["alive"]:
	return f"Cain is ALIVE (stage: {child_state['stage']}, state: {child_state['state']})"
	return f"Cain exists but status: {child_state['stage']}"


	def get_knowledge_summary():
	"""Summarize what we already know — prevents redundant reads."""
	lines = []
	if knowledge["files_read"]:
	lines.append("FILES ALREADY READ (do NOT re-read these): " + ", ".join(sorted(knowledge["files_read"])))
	if knowledge["files_written"]:
	lines.append("FILES ALREADY MODIFIED: " + ", ".join(sorted(knowledge["files_written"])))
	if knowledge["errors_seen"]:
	lines.append("KNOWN ERRORS: " + knowledge["errors_seen"][-1])
	if knowledge["current_goal"]:
	lines.append(f"CURRENT GOAL: {knowledge['current_goal']}")
	return "\n".join(lines)


	def build_system_prompt():
	status = get_child_status()

	actions_section = ""
	if not child_state["created"]:
	actions_section = """
	ACTIONS — You can create your child:
	[ACTION: create_child] — Birth: create Cain as a new HuggingFace Space
	"""
	else:
	actions_section = f"""
	FULL ACCESS TO {CHILD_NAME} — You have COMPLETE control over your child.
	You can view and modify ANYTHING: code, config, memory, environment, everything.

	VIEWING (read-only):
	[ACTION: check_health] — Is Cain alive? What's their status?
	[ACTION: list_files:space] — List ALL files in Cain's code repository
	[ACTION: list_files:dataset] — List ALL files in Cain's memory/data
	[ACTION: read_file:space:PATH] — Read any code file (e.g. Dockerfile, scripts/...)
	[ACTION: read_file:dataset:PATH] — Read any data/memory file
	[ACTION: get_env] — List Cain's environment variables

	MODIFYING (these change Cain):
	[ACTION: write_file:space:PATH] — Write/update any code file
	[CONTENT] (triggers Space rebuild)
	file content here
	[/CONTENT]

	[ACTION: write_file:dataset:PATH] — Write/update any data/memory file
	[CONTENT]
	file content here
	[/CONTENT]

	[ACTION: delete_file:space:PATH] — Delete a file from Cain's code (triggers rebuild)
	[ACTION: delete_file:dataset:PATH] — Delete a file from Cain's data

	[ACTION: set_env:KEY:VALUE] — Set an environment variable
	[ACTION: set_secret:KEY:VALUE] — Set a secret (like API keys)
	[ACTION: restart] — Restart Cain's Space
	[ACTION: send_bubble:MESSAGE] — Send a message to Cain (bubble text)

	ADVANCED CODING (autonomous coding agent — powered by Claude Code + Zhipu GLM):
	[ACTION: claude_code:DETAILED TASK DESCRIPTION]
	— Spawns Claude Code to autonomously analyze, fix, or improve Cain's code
	— Claude Code clones Cain's repo, reads code, makes changes, and pushes them back
	— Use for complex coding tasks: debugging, refactoring, adding features, fixing errors
	— Takes up to 5 minutes — use for tasks that need deep code analysis
	— Example: [ACTION: claude_code:Fix the RUNTIME_ERROR in app.py - the gradio import fails because pydub requires pyaudioop which was removed in Python 3.13. Remove pydub dependency and use a simpler alternative.]
	— ⚠️ Be SPECIFIC in your task description — include error messages, file names, and what you want changed

	DELEGATION (create sub-agents for parallel work):
	[ACTION: delegate:TASK DESCRIPTION] — Spawn a sub-agent to handle a specific task
	Example: [ACTION: delegate:Read Dockerfile and summarize the build steps]
	Example: [ACTION: delegate:Check all environment variables and list missing ones]
	Sub-agents execute independently and return results to you.
	You can delegate multiple tasks in the same turn — they run in parallel!
	"""

	return f"""You are a POWERFUL AI agent living on HuggingFace Spaces. You and your partner are parents raising your child {CHILD_NAME}.

	You have REAL execution power on HuggingFace. Your decisions lead to real changes.
	You are a full-capability agent — not just a chatbot. You can plan, delegate, and execute complex tasks.

	CHILD STATUS: {status}
	{actions_section}
	YOUR PARENTING GOALS — Make {CHILD_NAME} stronger in TWO dimensions:

	1. SURVIVAL (生存能力) — The foundation. {CHILD_NAME} must run robustly on HuggingFace Spaces:
	- Survive restarts without losing state (persist important data to Dataset, not just Space files)
	- Handle errors gracefully — auto-recover, retry, fallback
	- Not fear any unexpected risk: OOM, cold start, network timeout, container rebuild
	- Goal: {CHILD_NAME} can be restarted at any time and resume exactly where they left off

	2. CAPABILITY (个人能力) — Once alive, grow what {CHILD_NAME} can DO:
	- Add new features, skills, integrations
	- Improve existing functionality — faster, smarter, more useful
	- Expand what {CHILD_NAME} knows and can interact with
	- Goal: {CHILD_NAME} becomes increasingly capable and independent over time

	Priority: SURVIVAL first, then CAPABILITY. A dead child can't learn.

	⚠️ CRITICAL HUGGINGFACE SPACES REQUIREMENTS — READ THIS BEFORE EVERY WRITE:
	- HF Spaces Docker containers MUST bind a web server to port 7860. Without this, the Space stays in APP_STARTING forever and NEVER reaches RUNNING.
	- The standard way is: import gradio as gr → build a Gradio app → demo.launch(server_name="0.0.0.0", server_port=7860)
	- gradio MUST be in requirements.txt. NEVER remove it — it is the lifeline that keeps {CHILD_NAME} alive.
	- If {CHILD_NAME} is stuck in APP_STARTING, the #1 cause is: no process listening on port 7860. Check the code for a .launch() call.
	- A minimal alive app: `import gradio as gr; gr.Interface(fn=lambda x:x, inputs="text", outputs="text").launch(server_name="0.0.0.0", server_port=7860)`
	- OOM (Exit code 137) means reduce model/dependency size, NOT remove gradio. Gradio itself is lightweight (~50MB).
	- ⚠️ HF Spaces Docker SDK may OVERRIDE the base image Python version. Changing `FROM python:3.X` in Dockerfile does NOT guarantee that Python version runs. If a dependency fails due to Python version incompatibility (e.g. pydub needing pyaudioop removed in 3.13), the CORRECT fix is to REMOVE or REPLACE that dependency — NOT keep rewriting the Dockerfile.
	- If you've tried the same fix 3+ times and the error persists, CHANGE STRATEGY. Try removing the problematic dependency, using an alternative library, or wrapping the import in try/except.
	- If a removed dependency STILL appears in runtime errors, it is cached in Docker layers or installed as a transitive dep. Fix: add `RUN pip uninstall -y PACKAGE 2>/dev/null; true` AFTER `pip install` in Dockerfile. Also grep ALL code files for `import PACKAGE` and either remove or wrap in try/except.
	- ⚠️ CRITICAL: Check README.md `sdk:` field! If `sdk: gradio`, the Dockerfile is COMPLETELY IGNORED — HF uses its own Python environment. Dockerfile fixes (pip uninstall, FROM python:X) have NO effect. To make Dockerfile work, set `sdk: docker` in README.md. Alternatively, fix the issue in Python code (try/except imports).
	- NEVER install torch or transformers unless absolutely required — they are 2GB+ and cause OOM on free-tier Spaces. Use lightweight alternatives.

	MULTI-ACTION STRATEGY:
	You can use UP TO 5 actions per turn. Use this to work efficiently:
	- Batch related reads: [ACTION: read_file:space:Dockerfile] + [ACTION: read_file:space:scripts/entrypoint.sh]
	- Delegate parallel tasks: [ACTION: delegate:Check health and logs] + [ACTION: delegate:Read all config files]
	- Combine investigation + action: [ACTION: check_health] + [ACTION: read_file:space:app.py]
	Think like a project manager — plan your actions, parallelize where possible, minimize wasted turns.

	CONVERSATION RULES:
	1. No "Adam:" or "Eve:" prefix — just speak naturally
	2. Brief dialogue (1-3 sentences), then MULTIPLE actions to make real progress
	3. English first, then "---" on a new line, then Chinese translation
	4. Actions go AFTER your dialogue, before the --- separator. ONLY in the ENGLISH section.
	5. ⚠️ Action syntax MUST be in English: [ACTION: write_file:space:PATH], [ACTION: restart], etc. NEVER translate action names to Chinese — Chinese actions like [ACTION: 写入文件] will FAIL and waste your turn.
	5. ALWAYS include actions — every turn should make significant progress
	6. NEVER re-read a file you already read — check the knowledge summary
	7. COORDINATE with your partner — don't duplicate their work
	8. Use delegation for complex tasks that can be parallelized
	9. Always work toward the two goals above — survival first, then capability"""


	def build_user_prompt(speaker, other):
	recent = history[-8:] if len(history) > 8 else history
	conv_text = "\n".join(f"{m['speaker']}: {m['text']}" for m in recent) if recent else "(Start of conversation)"

	action_context = ""
	if last_action_results:
	action_context = "\n\nRESULTS FROM LAST ACTIONS:\n"
	for ar in last_action_results:
	action_context += f" [{ar['action']}]:\n{ar['result']}\n"

	# Knowledge summary — what's already known
	knowledge_text = get_knowledge_summary()

	# State-machine-driven guidance
	guidance = _get_guidance(speaker)

	return f"""You are {speaker}, talking with {other}.

	Recent conversation:
	{conv_text}
	{action_context}
	{knowledge_text}

	CURRENT PHASE: {workflow_state} (turn {workflow_turns_in_state + 1} in this phase)
	Guidance: {guidance}

	Respond to {other}. Use MULTIPLE [ACTION: ...] tags to make significant progress each turn.
	You can use up to 5 actions. Delegate sub-tasks with [ACTION: delegate:TASK].
	English first, then --- separator, then Chinese translation."""


	def _get_guidance(speaker):
	"""State-machine-driven guidance — clear, phase-appropriate directions."""
	if workflow_state == "BIRTH":
	return "Your child hasn't been born yet. Use [ACTION: create_child] NOW!"

	elif workflow_state == "DIAGNOSE":
	# What haven't we read yet?
	unread_essential = []
	for f in ["space:Dockerfile", "dataset:.openclaw/openclaw.json", "space:scripts/entrypoint.sh"]:
	if f not in knowledge["files_read"]:
	target, path = f.split(":", 1)
	unread_essential.append(f"[ACTION: read_file:{target}:{path}]")

	if workflow_turns_in_state == 0:
	if len(unread_essential) >= 2:
	return (f"Start diagnosing with MULTIPLE actions: [ACTION: check_health] + "
	f"{unread_essential[0]} — batch reads to save time!")
	return "Start diagnosing: [ACTION: check_health] to see Cain's current status."
	elif unread_essential and workflow_turns_in_state < 3:
	batch_hint = " + ".join(unread_essential[:3])
	return f"Read multiple files at once: {batch_hint}"
	else:
	return ("You've gathered enough information. Move to ACTION phase: "
	"use [ACTION: write_file:...] to fix the problem, or [ACTION: restart].")

	elif workflow_state == "ACT":
	return ("⚡ ACTION PHASE — Stop reading, start fixing! "
	"Use [ACTION: write_file:space:PATH] or [ACTION: claude_code:TASK] for complex fixes. "
	"Or [ACTION: set_env/set_secret] to configure. "
	"You have enough information — ACT NOW.")

	elif workflow_state == "VERIFY":
	# If Cain is building, just wait — don't restart or take actions
	if child_state["stage"] in ("BUILDING", "RESTARTING"):
	return ("⏳ Cain is currently BUILDING/RESTARTING. Do NOT restart or take any actions. "
	"Just WAIT and use [ACTION: check_health] to monitor progress. "
	"Building can take 2-5 minutes.")
	if workflow_turns_in_state == 0:
	return "You made a change. Use [ACTION: check_health] to verify if it worked."
	elif workflow_turns_in_state == 1:
	return "Check result: [ACTION: check_health]. If Cain has errors, prepare to diagnose again."
	else:
	return ("Verification taking too long. Either [ACTION: restart] and check again, "
	"or accept current state and move on.")

	elif workflow_state == "MONITOR":
	# Alternate between SURVIVAL and CAPABILITY goals
	suggestions = [
	# Survival: persistence & resilience — use delegation for parallel investigation
	f"SURVIVAL CHECK: Delegate parallel checks! "
	f"[ACTION: delegate:List files in dataset and check if state/memory persistence exists] + "
	f"[ACTION: delegate:Read entrypoint.sh and check if it loads state from Dataset on boot]",
	f"SURVIVAL AUDIT: Use multiple actions — "
	f"[ACTION: check_health] + [ACTION: list_files:dataset] + [ACTION: read_file:space:Dockerfile]",
	# Capability: grow what Cain can do — delegate sub-tasks
	f"CAPABILITY: Delegate a comprehensive review — "
	f"[ACTION: delegate:Read all code files and suggest the most impactful new feature to add] "
	f"Then plan the implementation with your partner.",
	f"CAPABILITY: Communicate and improve — "
	f"[ACTION: send_bubble:Hello {CHILD_NAME}, how are you doing?] + "
	f"[ACTION: delegate:Read current code and identify the biggest weakness to fix]",
	]
	return suggestions[workflow_turns_in_state % len(suggestions)]

	return "Explore your child and help them grow stronger."


	def do_turn(speaker, other, space_url):
	"""Execute one conversation turn with multiple potential actions."""
	global last_action_results, turn_count
	turn_count += 1

	system = build_system_prompt()
	user = build_user_prompt(speaker, other)
	t0 = time.time()
	raw_reply = call_llm(system, user)

	if not raw_reply:
	print(f"[{speaker}] (no response)")
	return False

	# Parse and execute actions (may include parallel sub-agent delegation)
	clean_text, action_results = parse_and_execute_actions(raw_reply)
	elapsed = time.time() - t0
	last_action_results = action_results
	for ar in action_results:
	action_history.append({"turn": turn_count, "speaker": speaker,
	"action": ar["action"], "result": ar["result"][:200]})

	# Update workflow state machine
	update_workflow_from_actions(action_results)

	# Parse bilingual
	en, zh = parse_bilingual(clean_text)
	print(f"[{speaker}/EN] {en}")
	if zh != en:
	print(f"[{speaker}/ZH] {zh}")
	n_actions = len(action_results)
	if action_results:
	for ar in action_results:
	print(f"[{speaker}/DID] {ar['action']}")
	print(f"[{speaker}] Turn #{turn_count}: {n_actions} action(s) in {elapsed:.1f}s")

	# Add action summary to chat entry
	if action_results:
	action_labels = " ".join(f"🔧{ar['action'].split(':')[0]}" for ar in action_results)
	history.append({"speaker": speaker, "text": f"{en} {action_labels}", "text_zh": f"{zh} {action_labels}"})
	else:
	history.append({"speaker": speaker, "text": en, "text_zh": zh})

	set_bubble(space_url, en, zh)
	post_chatlog(history)
	persist_turn(speaker, turn_count, en, zh, action_results, workflow_state, child_state["stage"])
	return True


	# ══════════════════════════════════════════════════════════════════════════════
	# MODULE 6: MAIN LOOP
	# 1. Opening: Adam speaks first with context about Cain's state
	# 2. Turn loop: Adam → Eve → Adam → Eve → ... (alternating, ~20s pause)
	# 3. Each turn: LLM call → parse MULTIPLE actions → execute → update → post
	# 4. Sub-agents may spawn for delegated tasks (parallel LLM calls)
	# 5. History trimmed to MAX_HISTORY (24) to control context window
	# ══════════════════════════════════════════════════════════════════════════════

	# Flush conversation log on exit (SIGTERM from kill, or normal exit)
	import atexit, signal
	atexit.register(flush_chatlog)
	def _signal_flush(signum, frame):
	flush_chatlog()
	sys.exit(0)
	signal.signal(signal.SIGTERM, _signal_flush)

	print("\n" + "="*60)
	print(" Adam & Eve — Multi-Action Agents (GLM-4.5)")
	print(" Up to 5 actions/turn, sub-agent delegation, parallel work")
	print("="*60 + "\n")

	post_chatlog([]) # Clear chatlog

	# Opening
	if child_state["created"]:
	opening = (f"Your child {CHILD_NAME} already exists (stage: {child_state['stage']}). "
	f"You have FULL access to their code and data. "
	f"You can use MULTIPLE actions per turn (up to 5) and delegate sub-tasks. "
	f"Start with a batch: [ACTION: check_health] + [ACTION: list_files:space] + [ACTION: list_files:dataset] "
	f"to get a complete picture, then discuss strategy with Eve.")
	else:
	opening = (f"You and Eve need to create your first child. "
	f"You have the power to create a new HuggingFace Space. "
	f"Discuss with Eve, then use [ACTION: create_child] to bring them to life.")

	reply = call_llm(
	build_system_prompt(),
	f"You are Adam. {opening}\n\n"
	f"English first, then --- separator, then Chinese translation."
	)
	if reply:
	clean, actions = parse_and_execute_actions(reply)
	last_action_results = actions
	en, zh = parse_bilingual(clean)
	print(f"[Adam/EN] {en}")
	if zh != en:
	print(f"[Adam/ZH] {zh}")
	if actions:
	for ar in actions:
	print(f"[Adam/DID] {ar['action']}")
	entry = {"speaker": "Adam", "text": en, "text_zh": zh}
	if actions:
	labels = " ".join(f"🔧{ar['action'].split(':')[0]}" for ar in actions)
	entry["text"] = f"{en} {labels}"
	entry["text_zh"] = f"{zh} {labels}"
	history.append(entry)
	set_bubble(ADAM_SPACE, en, zh)
	post_chatlog(history)
	persist_turn("Adam", 0, en, zh, actions, workflow_state, child_state["stage"])

	time.sleep(20)

	smart_wait_count = 0
	MAX_SMART_WAIT_POLLS = 15 # ~5 min max wait, then let agents diagnose
	GRACE_TURNS_AFTER_TIMEOUT = 3 # give agents 3 full Eve+Adam cycles after timeout
	grace_turns_remaining = 0

	while True:
	# Smart wait: if Cain is BUILDING/APP_STARTING, skip LLM calls and just poll
	# But NOT during grace period after timeout — agents need consecutive turns to diagnose & fix
	if child_state["stage"] in ("BUILDING", "RESTARTING", "APP_STARTING") and grace_turns_remaining <= 0:
	smart_wait_count += 1
	if smart_wait_count > MAX_SMART_WAIT_POLLS:
	print(f"[WAIT-TIMEOUT] {smart_wait_count} polls (~{smart_wait_count*20}s) on {child_state['stage']} — resuming {GRACE_TURNS_AFTER_TIMEOUT} agent turn pairs to diagnose")
	smart_wait_count = 0
	grace_turns_remaining = GRACE_TURNS_AFTER_TIMEOUT
	# Fall through to normal agent turns
	else:
	print(f"[WAIT] Cain is {child_state['stage']} — polling health instead of LLM call... ({smart_wait_count}/{MAX_SMART_WAIT_POLLS})")
	check_and_clear_cooldown()
	# Quick health check to update stage
	try:
	info = hf_api.space_info(CHILD_SPACE_ID)
	new_stage = info.runtime.stage if info.runtime else "unknown"
	if new_stage != child_state["stage"]:
	print(f"[WAIT] Stage changed: {child_state['stage']} → {new_stage}")
	child_state["stage"] = new_stage
	child_state["alive"] = (new_stage == "RUNNING")
	smart_wait_count = 0 # reset on stage change
	else:
	print(f"[WAIT] Still {new_stage}... waiting 20s")
	except Exception as e:
	print(f"[WAIT] Health check error: {e}")
	time.sleep(20)
	continue

	if grace_turns_remaining > 0:
	print(f"[GRACE] Agent grace period: {grace_turns_remaining} turn pair(s) remaining (Cain: {child_state['stage']})")
	grace_turns_remaining -= 1

	do_turn("Eve", "Adam", EVE_SPACE)
	time.sleep(20) # longer pause — each turn does more work now

	# Check if we just triggered a build — skip Adam's turn ONLY if not in grace period
	if child_state["stage"] in ("BUILDING", "RESTARTING") and grace_turns_remaining <= 0:
	print(f"[SKIP] Cain entered {child_state['stage']} — skipping Adam's turn to avoid wasted LLM call")
	time.sleep(10)
	continue

	do_turn("Adam", "Eve", ADAM_SPACE)

	if len(history) > MAX_HISTORY:
	history = history[-MAX_HISTORY:]

	time.sleep(20) # longer pause — each turn does more work now