gpt2_medium_prefix_682k / scripts /aws /analyze_model.sh

GPT-2 Medium trained on prefix dataset (682K)

3742716 verified 5 days ago

6.4 kB

	#!/bin/bash
	# Automatic Model Analysis Script
	# Runs evaluation and generation analysis after training

	set -e

	# Colors
	GREEN='\033[0;32m'
	YELLOW='\033[1;33m'
	BLUE='\033[0;34m'
	NC='\033[0m'

	print_status() { echo -e "${GREEN}[INFO]${NC} $1"; }
	print_header() { echo -e "\n${BLUE}========================================\n$1\n========================================${NC}\n"; }

	# Parameters
	MODEL_PATH="${1:-./output/Se124M_700K_infix}"
	DATA_COLUMN="${2:-i_prompt_n}"
	DATASET_REPO="augustocsc/sintetico_natural"
	DATA_DIR="700K"
	NUM_SAMPLES=500
	NUM_GENERATIONS=100

	# Directories
	PROJECT_DIR="/home/ubuntu/seriguela"
	OUTPUT_DIR="$HOME/analysis_results_$(date +%Y%m%d_%H%M%S)"
	mkdir -p "$OUTPUT_DIR"

	cd "$PROJECT_DIR"
	source venv/bin/activate

	print_header "Automatic Model Analysis"
	print_status "Model: $MODEL_PATH"
	print_status "Output: $OUTPUT_DIR"
	echo ""

	# =============================================================================
	# 1. EVALUATE MODEL
	# =============================================================================
	print_header "Step 1: Model Evaluation"
	print_status "Running evaluation on $NUM_SAMPLES samples..."

	python scripts/evaluate.py \
	--model_path "$MODEL_PATH" \
	--dataset_repo_id "$DATASET_REPO" \
	--data_dir "$DATA_DIR" \
	--data_column "$DATA_COLUMN" \
	--num_samples "$NUM_SAMPLES" \
	--output_dir "$OUTPUT_DIR/evaluation" \
	--temperature 0.7 \
	--seed 42 \
	2>&1 \| tee "$OUTPUT_DIR/evaluation.log"

	if [ $? -eq 0 ]; then
	print_status "✅ Evaluation completed"
	else
	print_status "⚠️ Evaluation had issues"
	fi

	# =============================================================================
	# 2. GENERATE SAMPLES
	# =============================================================================
	print_header "Step 2: Sample Generation & Validation"
	print_status "Generating $NUM_GENERATIONS samples with validation..."

	python scripts/generate.py \
	--model_path "$MODEL_PATH" \
	--num_generations "$NUM_GENERATIONS" \
	--validate \
	--output_file "$OUTPUT_DIR/generations.txt" \
	--temperature 0.8 \
	--top_p 0.95 \
	--seed 42 \
	2>&1 \| tee "$OUTPUT_DIR/generation.log"

	if [ $? -eq 0 ]; then
	print_status "✅ Generation completed"
	else
	print_status "⚠️ Generation had issues"
	fi

	# =============================================================================
	# 3. ANALYZE TRAINING LOGS
	# =============================================================================
	print_header "Step 3: Training Log Analysis"
	print_status "Extracting training metrics..."

	TRAINING_LOG="$HOME/training_success.log"

	if [ -f "$TRAINING_LOG" ]; then
	# Extract loss values
	grep -E "'loss':\|train_loss\|eval_loss" "$TRAINING_LOG" > "$OUTPUT_DIR/training_metrics.txt" 2>/dev/null \|\| true

	# Extract epoch summaries
	grep -E "epoch.*loss" "$TRAINING_LOG" \| tail -20 > "$OUTPUT_DIR/epoch_summary.txt" 2>/dev/null \|\| true

	# Count total steps
	TOTAL_STEPS=$(grep -E "[0-9]+/21882" "$TRAINING_LOG" \| tail -1 \| sed 's/.$[0-9]\+$\/21882./\1/' \|\| echo "0")

	print_status "Total training steps: $TOTAL_STEPS"
	fi

	# =============================================================================
	# 4. CREATE SUMMARY REPORT
	# =============================================================================
	print_header "Step 4: Creating Analysis Report"

	cat > "$OUTPUT_DIR/ANALYSIS_REPORT.md" << 'EOFREPORT'
	# Training Analysis Report
	Generated: $(date)

	## 📊 Model Information
	- Architecture: GPT-2 Small (124M parameters)
	- Training Method: LoRA (294K trainable parameters, 0.24%)
	- Dataset: 700K samples (infix notation)
	- Training Duration: $(grep "Training Duration:" $HOME/training_notification.txt 2>/dev/null \| head -1 \|\| echo "N/A")

	## 📈 Training Metrics

	### Loss Progression
	```
	$(tail -20 $OUTPUT_DIR/training_metrics.txt 2>/dev/null \|\| echo "No metrics available")
	```

	### Epoch Summary
	```
	$(cat $OUTPUT_DIR/epoch_summary.txt 2>/dev/null \|\| echo "No epoch data available")
	```

	## 🎯 Evaluation Results

	### Performance Metrics
	```
	$(grep -E "Accuracy\|Loss\|Perplexity" $OUTPUT_DIR/evaluation.log 2>/dev/null \|\| echo "Check evaluation.log for details")
	```

	### Sample Predictions
	```
	$(head -50 $OUTPUT_DIR/evaluation/*.txt 2>/dev/null \| head -20 \|\| echo "No evaluation samples found")
	```

	## 🔮 Generation Quality

	### Validation Results
	```
	$(grep -E "Valid:\|Success\|Failed" $OUTPUT_DIR/generation.log \| head -20 \|\| echo "Check generation.log")
	```

	### Sample Generations
	```
	$(head -30 $OUTPUT_DIR/generations.txt 2>/dev/null \|\| echo "No generations file found")
	```

	## 📁 Output Files
	- Evaluation results: `evaluation/`
	- Generated samples: `generations.txt`
	- Full logs: `evaluation.log`, `generation.log`
	- Training metrics: `training_metrics.txt`

	## 🔗 Resources
	- Wandb Dashboard: https://wandb.ai/symbolic-gression/seriguela_700K_test
	- HuggingFace Model: https://huggingface.co/augustocsc/Se124M_700K_infix
	- Analysis Directory: $OUTPUT_DIR

	---
	Generated automatically by analyze_model.sh
	EOFREPORT

	# Evaluate the report with actual values
	eval "cat > \"$OUTPUT_DIR/ANALYSIS_REPORT.md\" << 'EOFREPORT'
	$(cat "$OUTPUT_DIR/ANALYSIS_REPORT.md")
	EOFREPORT"

	print_status "Report created: $OUTPUT_DIR/ANALYSIS_REPORT.md"

	# =============================================================================
	# 5. FINAL SUMMARY
	# =============================================================================
	print_header "Analysis Complete!"
	echo ""
	print_status "All results saved to: $OUTPUT_DIR"
	print_status "Main report: $OUTPUT_DIR/ANALYSIS_REPORT.md"
	echo ""
	print_status "Key files:"
	echo " - Evaluation: $OUTPUT_DIR/evaluation.log"
	echo " - Generation: $OUTPUT_DIR/generation.log"
	echo " - Metrics: $OUTPUT_DIR/training_metrics.txt"
	echo " - Report: $OUTPUT_DIR/ANALYSIS_REPORT.md"
	echo ""
	print_status "View the full report with:"
	echo " cat $OUTPUT_DIR/ANALYSIS_REPORT.md"
	echo ""

	# Create a quick summary
	EVAL_SUCCESS=$(grep -c "✅" "$OUTPUT_DIR/evaluation.log" 2>/dev/null \|\| echo "0")
	GEN_SUCCESS=$(grep -c "Valid" "$OUTPUT_DIR/generation.log" 2>/dev/null \|\| echo "0")

	print_header "Quick Summary"
	echo "Evaluation samples processed: $NUM_SAMPLES"
	echo "Generations created: $NUM_GENERATIONS"
	echo "Check logs for detailed metrics and quality assessment"
	echo ""
	print_status "Done!"