fix remote train processed cache path
Browse files- scripts/remote_train.sh +7 -4
scripts/remote_train.sh
CHANGED
|
@@ -41,8 +41,8 @@ export DEEPGENOPIX_NUM_WORKERS=4
|
|
| 41 |
|
| 42 |
MODE=$([ "${RESUME:-0}" = "1" ] && echo resume || echo train)
|
| 43 |
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
from deepgenopix.notebook_support import build_experiment_config
|
| 47 |
cfg = build_experiment_config('$PRESET')
|
| 48 |
parts = [
|
|
@@ -53,12 +53,15 @@ parts = [
|
|
| 53 |
f'tf{str(cfg.test_frac).replace(\".\", \"p\")}',
|
| 54 |
f'seed{cfg.split_seed}',
|
| 55 |
f'mf{cfg.min_family_size}',
|
| 56 |
-
'ds$DATA_REVISION_SHORT',
|
| 57 |
]
|
| 58 |
print('_'.join(parts))
|
| 59 |
")"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
PIXEL_STRIDE="$(uv run python -c "from deepgenopix.notebook_support import build_experiment_config; print(build_experiment_config('$PRESET').pixel_stride_bp)")"
|
| 61 |
-
PROCESSED_DIR="data/processed/te_visuals/
|
| 62 |
mkdir -p "$PROCESSED_DIR"
|
| 63 |
|
| 64 |
# Try to pull an existing ETL cache (LMDB + registry + classes) by signature.
|
|
|
|
| 41 |
|
| 42 |
MODE=$([ "${RESUME:-0}" = "1" ] && echo resume || echo train)
|
| 43 |
|
| 44 |
+
# Local processed directory signature used by the trainer.
|
| 45 |
+
LOCAL_PROCESSED_SIGNATURE="$(uv run python -c "
|
| 46 |
from deepgenopix.notebook_support import build_experiment_config
|
| 47 |
cfg = build_experiment_config('$PRESET')
|
| 48 |
parts = [
|
|
|
|
| 53 |
f'tf{str(cfg.test_frac).replace(\".\", \"p\")}',
|
| 54 |
f'seed{cfg.split_seed}',
|
| 55 |
f'mf{cfg.min_family_size}',
|
|
|
|
| 56 |
]
|
| 57 |
print('_'.join(parts))
|
| 58 |
")"
|
| 59 |
+
|
| 60 |
+
# HF dataset cache key. Include the dataset revision so corrected split uploads
|
| 61 |
+
# cannot hit an LMDB produced from a stale dataset snapshot.
|
| 62 |
+
PROCESSED_SIGNATURE="${LOCAL_PROCESSED_SIGNATURE}_ds${DATA_REVISION_SHORT}"
|
| 63 |
PIXEL_STRIDE="$(uv run python -c "from deepgenopix.notebook_support import build_experiment_config; print(build_experiment_config('$PRESET').pixel_stride_bp)")"
|
| 64 |
+
PROCESSED_DIR="data/processed/te_visuals/$LOCAL_PROCESSED_SIGNATURE"
|
| 65 |
mkdir -p "$PROCESSED_DIR"
|
| 66 |
|
| 67 |
# Try to pull an existing ETL cache (LMDB + registry + classes) by signature.
|