ambivalent02 commited on
Commit
4e569ae
·
verified ·
1 Parent(s): 080088d

Upload run.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. run.sh +17 -0
run.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #SBATCH --job-name=lfm2vl # optional
3
+ #SBATCH --nodes=1
4
+ #SBATCH --gres=gpu:8 # <-- change to your GPU count
5
+ #SBATCH --ntasks-per-node=8
6
+ #SBATCH --time=08:00:00
7
+
8
+ export CUDA_DEVICE_MAX_CONNECTIONS=1 # NCCL optimal
9
+ export TOKENIZERS_PARALLELISM=false
10
+
11
+ NUM_GPU=8 # must match --gres above
12
+ MASTER_PORT=29500
13
+
14
+ MLFLOW_TRACKING_URI="./mlruns/finetune_lfm" MLFLOW_OFFLINE_MODE=true MLFLOW_EXPERIMENT_NAME="lfm_further_hf" torchrun \
15
+ --nproc_per_node=$NUM_GPU \
16
+ --master_port=$MASTER_PORT \
17
+ ./finetune_custom_trainer.py