File size: 5,052 Bytes
3f6526a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | #!/bin/bash
# Check and analyze results
set -e
echo "=================================="
echo "π Results Analysis"
echo "=================================="
# Find the most recent results directory
if [ $# -eq 0 ]; then
RESULTS_DIR=$(find examples/circle_packing/results -type d -name "*eval_service*" | sort -r | head -1)
if [ -z "$RESULTS_DIR" ]; then
echo "β No results found"
echo ""
echo "Usage: $0 [results_directory]"
exit 1
fi
echo "π Using most recent results: $RESULTS_DIR"
else
RESULTS_DIR="$1"
fi
echo ""
# Check if results exist
if [ ! -d "$RESULTS_DIR" ]; then
echo "β Directory not found: $RESULTS_DIR"
exit 1
fi
echo "=" * 80
echo "π Results Directory: $RESULTS_DIR"
echo "=" * 80
echo ""
# 1. Check best program
if [ -f "$RESULTS_DIR/best/results/metrics.json" ]; then
echo "β
Best Program Found"
echo "---"
BEST_SCORE=$(jq -r '.combined_score' "$RESULTS_DIR/best/results/metrics.json")
BEST_CORRECT=$(jq -r '.correct' "$RESULTS_DIR/best/results/metrics.json")
BEST_GEN=$(jq -r '.generation // "N/A"' "$RESULTS_DIR/best/results/metrics.json")
echo " Score: $BEST_SCORE"
echo " Correct: $BEST_CORRECT"
echo " Generation: $BEST_GEN"
echo ""
else
echo "β No best program found"
echo ""
fi
# 2. Check eval agent memory
if [ -d "$RESULTS_DIR/eval_agent_memory" ]; then
echo "β
Eval Agent Memory Found"
echo "---"
if [ -f "$RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md" ]; then
echo " π EVAL_AGENTS.md: $(wc -l < "$RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md") lines"
fi
if [ -f "$RESULTS_DIR/eval_agent_memory/auxiliary_metrics.py" ]; then
echo " π auxiliary_metrics.py: Found"
# Count evaluate_ functions
NUM_METRICS=$(grep -c "^def evaluate_" "$RESULTS_DIR/eval_agent_memory/auxiliary_metrics.py" || echo "0")
echo " π Auxiliary metrics: $NUM_METRICS"
fi
if [ -f "$RESULTS_DIR/eval_agent_memory/service_state.json" ]; then
echo " πΎ service_state.json: Found"
fi
echo ""
else
echo "β οΈ No eval agent memory found"
echo ""
fi
# 3. Check generations
NUM_GENS=$(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | wc -l)
echo "π Generations"
echo "---"
echo " Total generations: $NUM_GENS"
echo ""
# 4. Sample a few metrics files
echo "π Sample Metrics (last 3 generations)"
echo "---"
for gen_dir in $(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | sort -V | tail -3); do
if [ -f "$gen_dir/results/metrics.json" ]; then
GEN_NUM=$(basename "$gen_dir" | sed 's/gen_//')
SCORE=$(jq -r '.combined_score' "$gen_dir/results/metrics.json")
CORRECT=$(jq -r '.correct // "N/A"' "$gen_dir/results/metrics.json")
HAS_AUX=$(jq -r '.auxiliary | length' "$gen_dir/results/metrics.json" 2>/dev/null || echo "0")
HAS_DESC=$(jq -r '.auxiliary_descriptions | length' "$gen_dir/results/metrics.json" 2>/dev/null || echo "0")
echo " Gen $GEN_NUM: score=$SCORE, correct=$CORRECT, aux_metrics=$HAS_AUX, descriptions=$HAS_DESC"
fi
done
echo ""
# 5. Check database
if [ -f "$RESULTS_DIR/evolution_db.sqlite" ]; then
echo "πΎ Database Statistics"
echo "---"
TOTAL_PROGRAMS=$(sqlite3 "$RESULTS_DIR/evolution_db.sqlite" "SELECT COUNT(*) FROM programs;" 2>/dev/null || echo "N/A")
CORRECT_PROGRAMS=$(sqlite3 "$RESULTS_DIR/evolution_db.sqlite" "SELECT COUNT(*) FROM programs WHERE correct=1;" 2>/dev/null || echo "N/A")
echo " Total programs: $TOTAL_PROGRAMS"
echo " Correct programs: $CORRECT_PROGRAMS"
echo ""
fi
# 6. Check if auxiliary metrics were used
echo "π Auxiliary Metrics Integration Check"
echo "---"
# Check a recent generation for auxiliary metrics in public_metrics
RECENT_GEN=$(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | sort -V | tail -1)
if [ -f "$RECENT_GEN/results/metrics.json" ]; then
HAS_AUX_IN_PRIMARY=$(jq -r '.primary.public | keys | map(select(startswith("aux_"))) | length' "$RECENT_GEN/results/metrics.json" 2>/dev/null || echo "0")
if [ "$HAS_AUX_IN_PRIMARY" -gt "0" ]; then
echo " β
Auxiliary metrics merged into primary.public"
echo " π Found $HAS_AUX_IN_PRIMARY aux_ metrics"
else
echo " β οΈ No aux_ metrics found in primary.public"
fi
HAS_TEXT_FEEDBACK=$(jq -r '.primary.text_feedback // "" | length' "$RECENT_GEN/results/metrics.json" 2>/dev/null || echo "0")
if [ "$HAS_TEXT_FEEDBACK" -gt "0" ]; then
echo " β
text_feedback present (likely contains metric descriptions)"
else
echo " β οΈ No text_feedback found"
fi
fi
echo ""
echo "=" * 80
echo "Analysis complete!"
echo "=" * 80
echo ""
echo "To view detailed results:"
echo " - Best program: $RESULTS_DIR/best/main.py"
echo " - Agent documentation: $RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md"
echo " - All metrics: find $RESULTS_DIR -name 'metrics.json'"
echo ""
|