| #!/bin/bash |
| |
| |
| |
|
|
| set -e |
|
|
| echo "==========================================" |
| echo "Model Comparison: v1 vs v2" |
| echo "==========================================" |
| echo "Model 1: augustocsc/Se124M_700K_infix (original)" |
| echo "Model 2: augustocsc/Se124M_700K_infix_v2 (with <|endofex|> token)" |
| echo "==========================================" |
| echo "" |
|
|
| |
| source ~/seriguela/venv/bin/activate |
| cd ~/seriguela |
|
|
| |
| LOG_FILE="evaluation_$(date +%Y%m%d_%H%M%S).log" |
| exec > >(tee -a "$LOG_FILE") 2>&1 |
|
|
| echo "[$(date)] Starting evaluation..." |
| echo "" |
|
|
| |
| echo "Checking GPU..." |
| if nvidia-smi &> /dev/null; then |
| nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader |
| echo "" |
| else |
| echo "WARNING: No GPU detected. Evaluation will be slow." |
| echo "" |
| fi |
|
|
| |
| echo "Running model comparison..." |
| echo "This will evaluate both models on 500 samples from the test set." |
| echo "" |
|
|
| python scripts/compare_models.py \ |
| --model1 augustocsc/Se124M_700K_infix \ |
| --model2 augustocsc/Se124M_700K_infix_v2 \ |
| --model1_name "Original (no end token)" \ |
| --model2_name "V2 (with <|endofex|>)" \ |
| --num_samples 500 \ |
| --dataset_repo_id augustocsc/sintetico_natural \ |
| --data_dir 700K \ |
| --data_column i_prompt_n \ |
| --output_dir ./evaluation_results/comparison |
|
|
| echo "" |
| echo "==========================================" |
| echo "Evaluation Complete!" |
| echo "==========================================" |
| echo "Results saved to: ./evaluation_results/comparison" |
| echo "Log file: $LOG_FILE" |
| echo "" |
| echo "To view results:" |
| echo " cat ./evaluation_results/comparison/comparison_*.json | jq" |
| echo "" |
|
|