augustocsc's picture
GPT-2 Medium trained on prefix dataset (682K)
3742716 verified
#!/bin/bash
# Automatic Model Analysis Script
# Runs evaluation and generation analysis after training
set -e
# Colors
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_status() { echo -e "${GREEN}[INFO]${NC} $1"; }
print_header() { echo -e "\n${BLUE}========================================\n$1\n========================================${NC}\n"; }
# Parameters
MODEL_PATH="${1:-./output/Se124M_700K_infix}"
DATA_COLUMN="${2:-i_prompt_n}"
DATASET_REPO="augustocsc/sintetico_natural"
DATA_DIR="700K"
NUM_SAMPLES=500
NUM_GENERATIONS=100
# Directories
PROJECT_DIR="/home/ubuntu/seriguela"
OUTPUT_DIR="$HOME/analysis_results_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$OUTPUT_DIR"
cd "$PROJECT_DIR"
source venv/bin/activate
print_header "Automatic Model Analysis"
print_status "Model: $MODEL_PATH"
print_status "Output: $OUTPUT_DIR"
echo ""
# =============================================================================
# 1. EVALUATE MODEL
# =============================================================================
print_header "Step 1: Model Evaluation"
print_status "Running evaluation on $NUM_SAMPLES samples..."
python scripts/evaluate.py \
--model_path "$MODEL_PATH" \
--dataset_repo_id "$DATASET_REPO" \
--data_dir "$DATA_DIR" \
--data_column "$DATA_COLUMN" \
--num_samples "$NUM_SAMPLES" \
--output_dir "$OUTPUT_DIR/evaluation" \
--temperature 0.7 \
--seed 42 \
2>&1 | tee "$OUTPUT_DIR/evaluation.log"
if [ $? -eq 0 ]; then
print_status "โœ… Evaluation completed"
else
print_status "โš ๏ธ Evaluation had issues"
fi
# =============================================================================
# 2. GENERATE SAMPLES
# =============================================================================
print_header "Step 2: Sample Generation & Validation"
print_status "Generating $NUM_GENERATIONS samples with validation..."
python scripts/generate.py \
--model_path "$MODEL_PATH" \
--num_generations "$NUM_GENERATIONS" \
--validate \
--output_file "$OUTPUT_DIR/generations.txt" \
--temperature 0.8 \
--top_p 0.95 \
--seed 42 \
2>&1 | tee "$OUTPUT_DIR/generation.log"
if [ $? -eq 0 ]; then
print_status "โœ… Generation completed"
else
print_status "โš ๏ธ Generation had issues"
fi
# =============================================================================
# 3. ANALYZE TRAINING LOGS
# =============================================================================
print_header "Step 3: Training Log Analysis"
print_status "Extracting training metrics..."
TRAINING_LOG="$HOME/training_success.log"
if [ -f "$TRAINING_LOG" ]; then
# Extract loss values
grep -E "'loss':|train_loss|eval_loss" "$TRAINING_LOG" > "$OUTPUT_DIR/training_metrics.txt" 2>/dev/null || true
# Extract epoch summaries
grep -E "epoch.*loss" "$TRAINING_LOG" | tail -20 > "$OUTPUT_DIR/epoch_summary.txt" 2>/dev/null || true
# Count total steps
TOTAL_STEPS=$(grep -E "[0-9]+/21882" "$TRAINING_LOG" | tail -1 | sed 's/.*\([0-9]\+\)\/21882.*/\1/' || echo "0")
print_status "Total training steps: $TOTAL_STEPS"
fi
# =============================================================================
# 4. CREATE SUMMARY REPORT
# =============================================================================
print_header "Step 4: Creating Analysis Report"
cat > "$OUTPUT_DIR/ANALYSIS_REPORT.md" << 'EOFREPORT'
# Training Analysis Report
**Generated:** $(date)
## ๐Ÿ“Š Model Information
- **Architecture:** GPT-2 Small (124M parameters)
- **Training Method:** LoRA (294K trainable parameters, 0.24%)
- **Dataset:** 700K samples (infix notation)
- **Training Duration:** $(grep "Training Duration:" $HOME/training_notification.txt 2>/dev/null | head -1 || echo "N/A")
## ๐Ÿ“ˆ Training Metrics
### Loss Progression
```
$(tail -20 $OUTPUT_DIR/training_metrics.txt 2>/dev/null || echo "No metrics available")
```
### Epoch Summary
```
$(cat $OUTPUT_DIR/epoch_summary.txt 2>/dev/null || echo "No epoch data available")
```
## ๐ŸŽฏ Evaluation Results
### Performance Metrics
```
$(grep -E "Accuracy|Loss|Perplexity" $OUTPUT_DIR/evaluation.log 2>/dev/null || echo "Check evaluation.log for details")
```
### Sample Predictions
```
$(head -50 $OUTPUT_DIR/evaluation/*.txt 2>/dev/null | head -20 || echo "No evaluation samples found")
```
## ๐Ÿ”ฎ Generation Quality
### Validation Results
```
$(grep -E "Valid:|Success|Failed" $OUTPUT_DIR/generation.log | head -20 || echo "Check generation.log")
```
### Sample Generations
```
$(head -30 $OUTPUT_DIR/generations.txt 2>/dev/null || echo "No generations file found")
```
## ๐Ÿ“ Output Files
- Evaluation results: `evaluation/`
- Generated samples: `generations.txt`
- Full logs: `evaluation.log`, `generation.log`
- Training metrics: `training_metrics.txt`
## ๐Ÿ”— Resources
- **Wandb Dashboard:** https://wandb.ai/symbolic-gression/seriguela_700K_test
- **HuggingFace Model:** https://huggingface.co/augustocsc/Se124M_700K_infix
- **Analysis Directory:** $OUTPUT_DIR
---
*Generated automatically by analyze_model.sh*
EOFREPORT
# Evaluate the report with actual values
eval "cat > \"$OUTPUT_DIR/ANALYSIS_REPORT.md\" << 'EOFREPORT'
$(cat "$OUTPUT_DIR/ANALYSIS_REPORT.md")
EOFREPORT"
print_status "Report created: $OUTPUT_DIR/ANALYSIS_REPORT.md"
# =============================================================================
# 5. FINAL SUMMARY
# =============================================================================
print_header "Analysis Complete!"
echo ""
print_status "All results saved to: $OUTPUT_DIR"
print_status "Main report: $OUTPUT_DIR/ANALYSIS_REPORT.md"
echo ""
print_status "Key files:"
echo " - Evaluation: $OUTPUT_DIR/evaluation.log"
echo " - Generation: $OUTPUT_DIR/generation.log"
echo " - Metrics: $OUTPUT_DIR/training_metrics.txt"
echo " - Report: $OUTPUT_DIR/ANALYSIS_REPORT.md"
echo ""
print_status "View the full report with:"
echo " cat $OUTPUT_DIR/ANALYSIS_REPORT.md"
echo ""
# Create a quick summary
EVAL_SUCCESS=$(grep -c "โœ…" "$OUTPUT_DIR/evaluation.log" 2>/dev/null || echo "0")
GEN_SUCCESS=$(grep -c "Valid" "$OUTPUT_DIR/generation.log" 2>/dev/null || echo "0")
print_header "Quick Summary"
echo "Evaluation samples processed: $NUM_SAMPLES"
echo "Generations created: $NUM_GENERATIONS"
echo "Check logs for detailed metrics and quality assessment"
echo ""
print_status "Done!"