| #!/bin/bash |
| |
| |
| |
| set -e |
|
|
| echo "==========================================" |
| echo "CogNet-1B Setup" |
| echo "==========================================" |
|
|
| |
| echo "[1/3] Installing dependencies..." |
| pip install torch --index-url https://download.pytorch.org/whl/cu121 2>/dev/null || pip install torch |
| pip install datasets huggingface_hub tokenizers safetensors |
|
|
| |
| echo "" |
| echo "[2/3] Setting up HuggingFace token..." |
| if [ -z "$HF_TOKEN" ]; then |
| echo "WARNING: HF_TOKEN not set!" |
| echo "Set it with: export HF_TOKEN=hf_your_token_here" |
| echo "Or login with: huggingface-cli login" |
| fi |
|
|
| |
| echo "" |
| echo "[3/3] Creating directories..." |
| mkdir -p data_1b checkpoints_1b |
|
|
| echo "" |
| echo "==========================================" |
| echo "Setup complete!" |
| echo "" |
| echo "Quick commands:" |
| echo " # Prepare datasets (downloads + tokenizes everything)" |
| echo " python train_ultra.py --skip-data-prep=False --max-steps 0" |
| echo "" |
| echo " # Train on single GPU" |
| echo " python train_ultra.py --max-steps 100000" |
| echo "" |
| echo " # Train on multi-GPU" |
| echo " torchrun --nproc_per_node=4 train_ultra.py --use-fsdp --max-steps 100000" |
| echo "" |
| echo " # Train on ACIL/RunPod" |
| echo " export HF_TOKEN=your_token" |
| echo " sbatch acil_submit.sh" |
| echo "==========================================" |
|
|