Refine CodonTranslator docs and public scripts
Browse files
slurm/rebuild_data_v3_cpu.sbatch
CHANGED
|
@@ -12,8 +12,10 @@
|
|
| 12 |
|
| 13 |
set -euo pipefail
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
| 17 |
MMSEQS_BIN=${MMSEQS_BIN:-$REPO_ROOT/MMseqs2/build/bin/mmseqs}
|
| 18 |
INPUT_GLOB=${INPUT_GLOB:-data_v2/*/*.parquet}
|
| 19 |
HELDOUT_GLOB=${HELDOUT_GLOB:-data_v2/test/*.parquet}
|
|
|
|
| 12 |
|
| 13 |
set -euo pipefail
|
| 14 |
|
| 15 |
+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
| 16 |
+
DEFAULT_REPO_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
|
| 17 |
+
REPO_ROOT=${REPO_ROOT:-${DEFAULT_REPO_ROOT}}
|
| 18 |
+
PYTHON_BIN=${PYTHON_BIN:-python}
|
| 19 |
MMSEQS_BIN=${MMSEQS_BIN:-$REPO_ROOT/MMseqs2/build/bin/mmseqs}
|
| 20 |
INPUT_GLOB=${INPUT_GLOB:-data_v2/*/*.parquet}
|
| 21 |
HELDOUT_GLOB=${HELDOUT_GLOB:-data_v2/test/*.parquet}
|
slurm/submit_train_v3_h200_8x_chain.sh
CHANGED
|
@@ -2,7 +2,9 @@
|
|
| 2 |
|
| 3 |
set -euo pipefail
|
| 4 |
|
| 5 |
-
cd
|
|
|
|
|
|
|
| 6 |
|
| 7 |
SEGMENTS=${SEGMENTS:-3}
|
| 8 |
SBATCH_SCRIPT=${SBATCH_SCRIPT:-slurm/train_v3_h200_8x_single.sbatch}
|
|
|
|
| 2 |
|
| 3 |
set -euo pipefail
|
| 4 |
|
| 5 |
+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
| 6 |
+
REPO_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
|
| 7 |
+
cd "${REPO_ROOT}"
|
| 8 |
|
| 9 |
SEGMENTS=${SEGMENTS:-3}
|
| 10 |
SBATCH_SCRIPT=${SBATCH_SCRIPT:-slurm/train_v3_h200_8x_single.sbatch}
|
slurm/train_v3_h200_8x_single.sbatch
CHANGED
|
@@ -2,13 +2,13 @@
|
|
| 2 |
# Single-node 8x H200 training entrypoint.
|
| 3 |
# Reserved single-node smoke-run example:
|
| 4 |
# sbatch --time=00:45:00 \
|
| 5 |
-
# --export=ALL,OUT_DIR=/
|
| 6 |
# slurm/train_v3_h200_8x_single.sbatch
|
| 7 |
# Full-run example:
|
| 8 |
# sbatch slurm/train_v3_h200_8x_single.sbatch
|
| 9 |
#
|
| 10 |
# Suggested W&B overrides:
|
| 11 |
-
# sbatch --export=ALL,WANDB_PROJECT=
|
| 12 |
# slurm/train_v3_h200_8x_single.sbatch
|
| 13 |
# If the environment is still configured for offline logging, override at submit time:
|
| 14 |
# sbatch --export=ALL,WANDB_MODE=online slurm/train_v3_h200_8x_single.sbatch
|
|
@@ -35,14 +35,16 @@ source ~/.bashrc
|
|
| 35 |
conda activate dna
|
| 36 |
set -u
|
| 37 |
|
| 38 |
-
cd
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
TRAIN_DATA=${TRAIN_DATA:-/
|
| 41 |
-
VAL_DATA=${VAL_DATA:-/
|
| 42 |
-
EMBED_DIR=${EMBED_DIR:-/
|
| 43 |
-
OUT_DIR=${OUT_DIR:-/
|
| 44 |
|
| 45 |
-
WANDB_PROJECT=${WANDB_PROJECT:-
|
| 46 |
WANDB_NAME=${WANDB_NAME:-$(basename "${OUT_DIR}")}
|
| 47 |
WANDB_RUN_ID=${WANDB_RUN_ID:-$(basename "${OUT_DIR}")}
|
| 48 |
WANDB_RESUME=${WANDB_RESUME:-allow}
|
|
|
|
| 2 |
# Single-node 8x H200 training entrypoint.
|
| 3 |
# Reserved single-node smoke-run example:
|
| 4 |
# sbatch --time=00:45:00 \
|
| 5 |
+
# --export=ALL,OUT_DIR=/path/to/outputs_codontranslator_smoke,MAX_STEPS=20,SAVE_STEPS=0,EVAL_INTERVAL=0 \
|
| 6 |
# slurm/train_v3_h200_8x_single.sbatch
|
| 7 |
# Full-run example:
|
| 8 |
# sbatch slurm/train_v3_h200_8x_single.sbatch
|
| 9 |
#
|
| 10 |
# Suggested W&B overrides:
|
| 11 |
+
# sbatch --export=ALL,WANDB_PROJECT=codontranslator,WANDB_NAME=codontranslator-run1 \
|
| 12 |
# slurm/train_v3_h200_8x_single.sbatch
|
| 13 |
# If the environment is still configured for offline logging, override at submit time:
|
| 14 |
# sbatch --export=ALL,WANDB_MODE=online slurm/train_v3_h200_8x_single.sbatch
|
|
|
|
| 35 |
conda activate dna
|
| 36 |
set -u
|
| 37 |
|
| 38 |
+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
| 39 |
+
REPO_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
|
| 40 |
+
cd "${REPO_ROOT}"
|
| 41 |
|
| 42 |
+
TRAIN_DATA=${TRAIN_DATA:-${REPO_ROOT}/data_v3_rebuild/train}
|
| 43 |
+
VAL_DATA=${VAL_DATA:-${REPO_ROOT}/data_v3_rebuild/val}
|
| 44 |
+
EMBED_DIR=${EMBED_DIR:-${REPO_ROOT}/embeddings_v2}
|
| 45 |
+
OUT_DIR=${OUT_DIR:-${REPO_ROOT}/outputs_codontranslator_h200_8x_bs48ga4}
|
| 46 |
|
| 47 |
+
WANDB_PROJECT=${WANDB_PROJECT:-codontranslator}
|
| 48 |
WANDB_NAME=${WANDB_NAME:-$(basename "${OUT_DIR}")}
|
| 49 |
WANDB_RUN_ID=${WANDB_RUN_ID:-$(basename "${OUT_DIR}")}
|
| 50 |
WANDB_RESUME=${WANDB_RESUME:-allow}
|