augustocsc
/

gpt2_medium_prefix_682k

Model card Files Files and versions

gpt2_medium_prefix_682k / scripts /aws /train_exp_b.sh

augustocsc's picture

GPT-2 Medium trained on prefix dataset (682K)

3742716 verified 29 days ago

history blame contribute delete

1.64 kB

	#!/bin/bash
	# EXP-B: Training with GPT-2 EOS token (<\|endoftext\|>)
	# Uses native GPT-2 EOS token (ID 50256)

	set -e

	echo "=============================================="
	echo "EXP-B: EOS Token Format Training"
	echo "=============================================="

	cd ~/seriguela

	# Activate virtual environment
	source venv/bin/activate

	# Check data exists
	if [ ! -f "./data/experiments/exp_b_eos/train.csv" ]; then
	echo "ERROR: Training data not found!"
	echo "Expected: ./data/experiments/exp_b_eos/train.csv"
	exit 1
	fi

	# Count samples
	TRAIN_COUNT=$(wc -l < ./data/experiments/exp_b_eos/train.csv)
	echo "Training samples: $TRAIN_COUNT"

	# Training configuration
	export WANDB_PROJECT="seriguela_experiments"
	export HF_TOKEN="${HF_TOKEN:-}"
	export WANDB_API_KEY="${WANDB_API_KEY:-}"

	# Run training
	echo ""
	echo "Starting training..."
	echo "Output: ./output/exp_b_eos"
	echo ""

	python scripts/train_experiment.py \
	--experiment_name "exp_b_eos" \
	--train_file ./data/experiments/exp_b_eos/train.csv \
	--validation_file ./data/experiments/exp_b_eos/validation.csv \
	--output_dir ./output/exp_b_eos \
	--end_marker "<\|endoftext\|>" \
	--use_native_eos \
	--num_train_epochs 3 \
	--per_device_train_batch_size 8 \
	--gradient_accumulation_steps 4 \
	--learning_rate 5e-5 \
	--block_size 128 \
	--fp16 \
	--wandb_project seriguela_experiments \
	--wandb_run_name "exp_b_eos_$(date +%Y%m%d_%H%M%S)"

	echo ""
	echo "=============================================="
	echo "EXP-B Training Complete!"
	echo "=============================================="
	echo "Model saved to: ./output/exp_b_eos"