vedatonuryilmaz commited on
Commit
bcad905
·
verified ·
1 Parent(s): d70c48c

fix remote train processed cache path

Browse files
Files changed (1) hide show
  1. scripts/remote_train.sh +7 -4
scripts/remote_train.sh CHANGED
@@ -41,8 +41,8 @@ export DEEPGENOPIX_NUM_WORKERS=4
41
 
42
  MODE=$([ "${RESUME:-0}" = "1" ] && echo resume || echo train)
43
 
44
- # Canonical cache key emitted by `deepgenopix matrix --json` for each preset.
45
- PROCESSED_SIGNATURE="$(uv run python -c "
46
  from deepgenopix.notebook_support import build_experiment_config
47
  cfg = build_experiment_config('$PRESET')
48
  parts = [
@@ -53,12 +53,15 @@ parts = [
53
  f'tf{str(cfg.test_frac).replace(\".\", \"p\")}',
54
  f'seed{cfg.split_seed}',
55
  f'mf{cfg.min_family_size}',
56
- 'ds$DATA_REVISION_SHORT',
57
  ]
58
  print('_'.join(parts))
59
  ")"
 
 
 
 
60
  PIXEL_STRIDE="$(uv run python -c "from deepgenopix.notebook_support import build_experiment_config; print(build_experiment_config('$PRESET').pixel_stride_bp)")"
61
- PROCESSED_DIR="data/processed/te_visuals/auto"
62
  mkdir -p "$PROCESSED_DIR"
63
 
64
  # Try to pull an existing ETL cache (LMDB + registry + classes) by signature.
 
41
 
42
  MODE=$([ "${RESUME:-0}" = "1" ] && echo resume || echo train)
43
 
44
+ # Local processed directory signature used by the trainer.
45
+ LOCAL_PROCESSED_SIGNATURE="$(uv run python -c "
46
  from deepgenopix.notebook_support import build_experiment_config
47
  cfg = build_experiment_config('$PRESET')
48
  parts = [
 
53
  f'tf{str(cfg.test_frac).replace(\".\", \"p\")}',
54
  f'seed{cfg.split_seed}',
55
  f'mf{cfg.min_family_size}',
 
56
  ]
57
  print('_'.join(parts))
58
  ")"
59
+
60
+ # HF dataset cache key. Include the dataset revision so corrected split uploads
61
+ # cannot hit an LMDB produced from a stale dataset snapshot.
62
+ PROCESSED_SIGNATURE="${LOCAL_PROCESSED_SIGNATURE}_ds${DATA_REVISION_SHORT}"
63
  PIXEL_STRIDE="$(uv run python -c "from deepgenopix.notebook_support import build_experiment_config; print(build_experiment_config('$PRESET').pixel_stride_bp)")"
64
+ PROCESSED_DIR="data/processed/te_visuals/$LOCAL_PROCESSED_SIGNATURE"
65
  mkdir -p "$PROCESSED_DIR"
66
 
67
  # Try to pull an existing ETL cache (LMDB + registry + classes) by signature.