| | #!/bin/bash |
| | |
| | |
| |
|
| | set -e |
| |
|
| | echo "==============================================" |
| | echo "EXP-B: Complete Setup and Training" |
| | echo "EOS Format with <|endoftext|> marker" |
| | echo "==============================================" |
| | echo "Started: $(date)" |
| | echo "" |
| |
|
| | cd /home/ubuntu/seriguela |
| |
|
| | |
| | source venv/bin/activate |
| |
|
| | |
| | echo "[1/3] Preparing training data..." |
| | echo "This will download from HuggingFace Hub and convert to EOS format" |
| | echo "" |
| |
|
| | mkdir -p data/experiments |
| |
|
| | python scripts/data/prepare_experiment_data.py \ |
| | --dataset_repo_id augustocsc/sintetico_natural \ |
| | --data_dir 700K \ |
| | --data_column i_prompt_n \ |
| | --output_base_dir ./data/experiments |
| |
|
| | |
| | if [ ! -f "./data/experiments/exp_b_eos/train.csv" ]; then |
| | echo "ERROR: Data preparation failed!" |
| | exit 1 |
| | fi |
| |
|
| | TRAIN_COUNT=$(wc -l < ./data/experiments/exp_b_eos/train.csv) |
| | echo "Training samples: $TRAIN_COUNT" |
| |
|
| | |
| | echo "" |
| | echo "[2/3] Starting training..." |
| | echo "Output: ./output/exp_b_eos" |
| | echo "" |
| |
|
| | python scripts/train_experiment.py \ |
| | --experiment_name "exp_b_eos" \ |
| | --train_file ./data/experiments/exp_b_eos/train.csv \ |
| | --validation_file ./data/experiments/exp_b_eos/validation.csv \ |
| | --output_dir ./output/exp_b_eos \ |
| | --end_marker "<|endoftext|>" \ |
| | --use_native_eos \ |
| | --num_train_epochs 3 \ |
| | --per_device_train_batch_size 8 \ |
| | --gradient_accumulation_steps 4 \ |
| | --learning_rate 5e-5 \ |
| | --block_size 128 \ |
| | --fp16 \ |
| | --wandb_project seriguela_experiments \ |
| | --wandb_run_name "exp_b_eos_$(date +%Y%m%d_%H%M%S)" |
| |
|
| | |
| | echo "" |
| | echo "[3/3] Evaluating model..." |
| | echo "" |
| |
|
| | python scripts/evaluate_experiments.py \ |
| | --model_path ./output/exp_b_eos \ |
| | --experiment_type eos \ |
| | --num_samples 200 \ |
| | --output_file ./output/exp_b_eos/evaluation_results.json |
| |
|
| | echo "" |
| | echo "==============================================" |
| | echo "EXP-B Complete!" |
| | echo "==============================================" |
| | echo "Finished: $(date)" |
| | echo "Model: ./output/exp_b_eos" |
| | echo "Results: ./output/exp_b_eos/evaluation_results.json" |
| |
|
| | |
| | touch /home/ubuntu/.exp_b_complete |
| |
|