shinka-backup / scripts /dev /4_check_results.sh
JustinTX's picture
Add files using upload-large-folder tool
3f6526a verified
#!/bin/bash
# Check and analyze results
set -e
echo "=================================="
echo "πŸ“Š Results Analysis"
echo "=================================="
# Find the most recent results directory
if [ $# -eq 0 ]; then
RESULTS_DIR=$(find examples/circle_packing/results -type d -name "*eval_service*" | sort -r | head -1)
if [ -z "$RESULTS_DIR" ]; then
echo "❌ No results found"
echo ""
echo "Usage: $0 [results_directory]"
exit 1
fi
echo "πŸ“ Using most recent results: $RESULTS_DIR"
else
RESULTS_DIR="$1"
fi
echo ""
# Check if results exist
if [ ! -d "$RESULTS_DIR" ]; then
echo "❌ Directory not found: $RESULTS_DIR"
exit 1
fi
echo "=" * 80
echo "πŸ“‚ Results Directory: $RESULTS_DIR"
echo "=" * 80
echo ""
# 1. Check best program
if [ -f "$RESULTS_DIR/best/results/metrics.json" ]; then
echo "βœ… Best Program Found"
echo "---"
BEST_SCORE=$(jq -r '.combined_score' "$RESULTS_DIR/best/results/metrics.json")
BEST_CORRECT=$(jq -r '.correct' "$RESULTS_DIR/best/results/metrics.json")
BEST_GEN=$(jq -r '.generation // "N/A"' "$RESULTS_DIR/best/results/metrics.json")
echo " Score: $BEST_SCORE"
echo " Correct: $BEST_CORRECT"
echo " Generation: $BEST_GEN"
echo ""
else
echo "❌ No best program found"
echo ""
fi
# 2. Check eval agent memory
if [ -d "$RESULTS_DIR/eval_agent_memory" ]; then
echo "βœ… Eval Agent Memory Found"
echo "---"
if [ -f "$RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md" ]; then
echo " πŸ“„ EVAL_AGENTS.md: $(wc -l < "$RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md") lines"
fi
if [ -f "$RESULTS_DIR/eval_agent_memory/auxiliary_metrics.py" ]; then
echo " 🐍 auxiliary_metrics.py: Found"
# Count evaluate_ functions
NUM_METRICS=$(grep -c "^def evaluate_" "$RESULTS_DIR/eval_agent_memory/auxiliary_metrics.py" || echo "0")
echo " πŸ“Š Auxiliary metrics: $NUM_METRICS"
fi
if [ -f "$RESULTS_DIR/eval_agent_memory/service_state.json" ]; then
echo " πŸ’Ύ service_state.json: Found"
fi
echo ""
else
echo "⚠️ No eval agent memory found"
echo ""
fi
# 3. Check generations
NUM_GENS=$(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | wc -l)
echo "πŸ“ˆ Generations"
echo "---"
echo " Total generations: $NUM_GENS"
echo ""
# 4. Sample a few metrics files
echo "πŸ“Š Sample Metrics (last 3 generations)"
echo "---"
for gen_dir in $(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | sort -V | tail -3); do
if [ -f "$gen_dir/results/metrics.json" ]; then
GEN_NUM=$(basename "$gen_dir" | sed 's/gen_//')
SCORE=$(jq -r '.combined_score' "$gen_dir/results/metrics.json")
CORRECT=$(jq -r '.correct // "N/A"' "$gen_dir/results/metrics.json")
HAS_AUX=$(jq -r '.auxiliary | length' "$gen_dir/results/metrics.json" 2>/dev/null || echo "0")
HAS_DESC=$(jq -r '.auxiliary_descriptions | length' "$gen_dir/results/metrics.json" 2>/dev/null || echo "0")
echo " Gen $GEN_NUM: score=$SCORE, correct=$CORRECT, aux_metrics=$HAS_AUX, descriptions=$HAS_DESC"
fi
done
echo ""
# 5. Check database
if [ -f "$RESULTS_DIR/evolution_db.sqlite" ]; then
echo "πŸ’Ύ Database Statistics"
echo "---"
TOTAL_PROGRAMS=$(sqlite3 "$RESULTS_DIR/evolution_db.sqlite" "SELECT COUNT(*) FROM programs;" 2>/dev/null || echo "N/A")
CORRECT_PROGRAMS=$(sqlite3 "$RESULTS_DIR/evolution_db.sqlite" "SELECT COUNT(*) FROM programs WHERE correct=1;" 2>/dev/null || echo "N/A")
echo " Total programs: $TOTAL_PROGRAMS"
echo " Correct programs: $CORRECT_PROGRAMS"
echo ""
fi
# 6. Check if auxiliary metrics were used
echo "πŸ” Auxiliary Metrics Integration Check"
echo "---"
# Check a recent generation for auxiliary metrics in public_metrics
RECENT_GEN=$(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | sort -V | tail -1)
if [ -f "$RECENT_GEN/results/metrics.json" ]; then
HAS_AUX_IN_PRIMARY=$(jq -r '.primary.public | keys | map(select(startswith("aux_"))) | length' "$RECENT_GEN/results/metrics.json" 2>/dev/null || echo "0")
if [ "$HAS_AUX_IN_PRIMARY" -gt "0" ]; then
echo " βœ… Auxiliary metrics merged into primary.public"
echo " πŸ“Š Found $HAS_AUX_IN_PRIMARY aux_ metrics"
else
echo " ⚠️ No aux_ metrics found in primary.public"
fi
HAS_TEXT_FEEDBACK=$(jq -r '.primary.text_feedback // "" | length' "$RECENT_GEN/results/metrics.json" 2>/dev/null || echo "0")
if [ "$HAS_TEXT_FEEDBACK" -gt "0" ]; then
echo " βœ… text_feedback present (likely contains metric descriptions)"
else
echo " ⚠️ No text_feedback found"
fi
fi
echo ""
echo "=" * 80
echo "Analysis complete!"
echo "=" * 80
echo ""
echo "To view detailed results:"
echo " - Best program: $RESULTS_DIR/best/main.py"
echo " - Agent documentation: $RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md"
echo " - All metrics: find $RESULTS_DIR -name 'metrics.json'"
echo ""