Upload run.sh with huggingface_hub
Browse files
run.sh
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
#SBATCH --job-name=lfm2vl # optional
|
| 3 |
+
#SBATCH --nodes=1
|
| 4 |
+
#SBATCH --gres=gpu:8 # <-- change to your GPU count
|
| 5 |
+
#SBATCH --ntasks-per-node=8
|
| 6 |
+
#SBATCH --time=08:00:00
|
| 7 |
+
|
| 8 |
+
export CUDA_DEVICE_MAX_CONNECTIONS=1 # NCCL optimal
|
| 9 |
+
export TOKENIZERS_PARALLELISM=false
|
| 10 |
+
|
| 11 |
+
NUM_GPU=8 # must match --gres above
|
| 12 |
+
MASTER_PORT=29500
|
| 13 |
+
|
| 14 |
+
MLFLOW_TRACKING_URI="./mlruns/finetune_lfm" MLFLOW_OFFLINE_MODE=true MLFLOW_EXPERIMENT_NAME="lfm_further_hf" torchrun \
|
| 15 |
+
--nproc_per_node=$NUM_GPU \
|
| 16 |
+
--master_port=$MASTER_PORT \
|
| 17 |
+
./finetune_custom_trainer.py
|