#!/bin/bash # Check and analyze results set -e echo "==================================" echo "📊 Results Analysis" echo "==================================" # Find the most recent results directory if [ $# -eq 0 ]; then RESULTS_DIR=$(find examples/circle_packing/results -type d -name "*eval_service*" | sort -r | head -1) if [ -z "$RESULTS_DIR" ]; then echo "❌ No results found" echo "" echo "Usage: $0 [results_directory]" exit 1 fi echo "📁 Using most recent results: $RESULTS_DIR" else RESULTS_DIR="$1" fi echo "" # Check if results exist if [ ! -d "$RESULTS_DIR" ]; then echo "❌ Directory not found: $RESULTS_DIR" exit 1 fi echo "=" * 80 echo "📂 Results Directory: $RESULTS_DIR" echo "=" * 80 echo "" # 1. Check best program if [ -f "$RESULTS_DIR/best/results/metrics.json" ]; then echo "✅ Best Program Found" echo "---" BEST_SCORE=$(jq -r '.combined_score' "$RESULTS_DIR/best/results/metrics.json") BEST_CORRECT=$(jq -r '.correct' "$RESULTS_DIR/best/results/metrics.json") BEST_GEN=$(jq -r '.generation // "N/A"' "$RESULTS_DIR/best/results/metrics.json") echo " Score: $BEST_SCORE" echo " Correct: $BEST_CORRECT" echo " Generation: $BEST_GEN" echo "" else echo "❌ No best program found" echo "" fi # 2. Check eval agent memory if [ -d "$RESULTS_DIR/eval_agent_memory" ]; then echo "✅ Eval Agent Memory Found" echo "---" if [ -f "$RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md" ]; then echo " 📄 EVAL_AGENTS.md: $(wc -l < "$RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md") lines" fi if [ -f "$RESULTS_DIR/eval_agent_memory/auxiliary_metrics.py" ]; then echo " 🐍 auxiliary_metrics.py: Found" # Count evaluate_ functions NUM_METRICS=$(grep -c "^def evaluate_" "$RESULTS_DIR/eval_agent_memory/auxiliary_metrics.py" || echo "0") echo " 📊 Auxiliary metrics: $NUM_METRICS" fi if [ -f "$RESULTS_DIR/eval_agent_memory/service_state.json" ]; then echo " 💾 service_state.json: Found" fi echo "" else echo "⚠️ No eval agent memory found" echo "" fi # 3. Check generations NUM_GENS=$(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | wc -l) echo "📈 Generations" echo "---" echo " Total generations: $NUM_GENS" echo "" # 4. Sample a few metrics files echo "📊 Sample Metrics (last 3 generations)" echo "---" for gen_dir in $(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | sort -V | tail -3); do if [ -f "$gen_dir/results/metrics.json" ]; then GEN_NUM=$(basename "$gen_dir" | sed 's/gen_//') SCORE=$(jq -r '.combined_score' "$gen_dir/results/metrics.json") CORRECT=$(jq -r '.correct // "N/A"' "$gen_dir/results/metrics.json") HAS_AUX=$(jq -r '.auxiliary | length' "$gen_dir/results/metrics.json" 2>/dev/null || echo "0") HAS_DESC=$(jq -r '.auxiliary_descriptions | length' "$gen_dir/results/metrics.json" 2>/dev/null || echo "0") echo " Gen $GEN_NUM: score=$SCORE, correct=$CORRECT, aux_metrics=$HAS_AUX, descriptions=$HAS_DESC" fi done echo "" # 5. Check database if [ -f "$RESULTS_DIR/evolution_db.sqlite" ]; then echo "💾 Database Statistics" echo "---" TOTAL_PROGRAMS=$(sqlite3 "$RESULTS_DIR/evolution_db.sqlite" "SELECT COUNT(*) FROM programs;" 2>/dev/null || echo "N/A") CORRECT_PROGRAMS=$(sqlite3 "$RESULTS_DIR/evolution_db.sqlite" "SELECT COUNT(*) FROM programs WHERE correct=1;" 2>/dev/null || echo "N/A") echo " Total programs: $TOTAL_PROGRAMS" echo " Correct programs: $CORRECT_PROGRAMS" echo "" fi # 6. Check if auxiliary metrics were used echo "🔍 Auxiliary Metrics Integration Check" echo "---" # Check a recent generation for auxiliary metrics in public_metrics RECENT_GEN=$(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | sort -V | tail -1) if [ -f "$RECENT_GEN/results/metrics.json" ]; then HAS_AUX_IN_PRIMARY=$(jq -r '.primary.public | keys | map(select(startswith("aux_"))) | length' "$RECENT_GEN/results/metrics.json" 2>/dev/null || echo "0") if [ "$HAS_AUX_IN_PRIMARY" -gt "0" ]; then echo " ✅ Auxiliary metrics merged into primary.public" echo " 📊 Found $HAS_AUX_IN_PRIMARY aux_ metrics" else echo " ⚠️ No aux_ metrics found in primary.public" fi HAS_TEXT_FEEDBACK=$(jq -r '.primary.text_feedback // "" | length' "$RECENT_GEN/results/metrics.json" 2>/dev/null || echo "0") if [ "$HAS_TEXT_FEEDBACK" -gt "0" ]; then echo " ✅ text_feedback present (likely contains metric descriptions)" else echo " ⚠️ No text_feedback found" fi fi echo "" echo "=" * 80 echo "Analysis complete!" echo "=" * 80 echo "" echo "To view detailed results:" echo " - Best program: $RESULTS_DIR/best/main.py" echo " - Agent documentation: $RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md" echo " - All metrics: find $RESULTS_DIR -name 'metrics.json'" echo ""