Spaces:
Paused
Paused
File size: 2,515 Bytes
5a81b95 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | #!/bin/bash
# WidgetTDC Agent Monitor - Runs every 15 minutes to ensure all agents are busy
MONITOR_LOG=".claude/logs/agent-monitor.log"
AGENT_STATE=".claude/agent-state.json"
CASCADE_STATE=".claude/agent-cascade-state.json"
# Create log directory
mkdir -p "$(dirname "$MONITOR_LOG")"
log() {
echo "[$(date '+%Y-%m-%dT%H:%M:%S')] $1" | tee -a "$MONITOR_LOG"
}
check_agent_status() {
log "=== AGENT MONITOR CHECK STARTED ==="
# Check if state files exist
if [[ ! -f "$AGENT_STATE" ]]; then
log "ERROR: Agent state file $AGENT_STATE not found"
return 1
fi
if [[ ! -f "$CASCADE_STATE" ]]; then
log "ERROR: Cascade state file $CASCADE_STATE not found"
return 1
fi
# Parse agent state
idle_count=$(jq -r '.runtime_agents.summary.idle_agents' "$AGENT_STATE" 2>/dev/null || echo "0")
loaded_count=$(jq -r '.runtime_agents.summary.loaded_agents' "$AGENT_STATE" 2>/dev/null || echo "0")
total_agents=$(jq -r '.runtime_agents.summary.total_agents' "$AGENT_STATE" 2>/dev/null || echo "0")
# Parse cascade state
cascade_status=$(jq -r '.cascade_status' "$CASCADE_STATE" 2>/dev/null || echo "UNKNOWN")
blocks_completed=$(jq -r '.blocks_completed | length' "$CASCADE_STATE" 2>/dev/null || echo "0")
log "Agent Status: $loaded_count loaded, $idle_count idle (out of $total_agents total)"
log "Cascade Status: $cascade_status with $blocks_completed blocks completed"
# Check if any agents are idle
if [[ "$idle_count" -gt 0 ]]; then
log "WARNING: $idle_count agents are idle - checking for available work..."
# Try to restart cascade if agents are idle but cascade should be running
if [[ "$cascade_status" == "RUNNING" || "$cascade_status" == "INITIALIZED" ]]; then
log "Attempting to restart cascade execution..."
python3 cascade-orchestrator.py 1 >> "$MONITOR_LOG" 2>&1
log "Cascade restart attempted"
fi
else
log "SUCCESS: All agents are busy (workload: $loaded_count/$total_agents)"
fi
# Check cascade health
if [[ "$cascade_status" == "BLOCKED" || "$cascade_status" == "FAILED" ]]; then
log "ALERT: Cascade is $cascade_status - requires intervention"
elif [[ "$cascade_status" == "COMPLETE" ]]; then
log "INFO: Cascade completed successfully"
fi
log "=== AGENT MONITOR CHECK COMPLETED ==="
echo ""
}
# Main execution
check_agent_status
exit 0 |