icarus112 commited on
Commit
a8a5806
·
verified ·
1 Parent(s): f94e4a7

Update Feather H200 training runtime image

Browse files
overlay/scripts/run_domain_expanded_pretrain.sh CHANGED
@@ -224,7 +224,13 @@ else
224
  fi
225
  log "note=train.py consumes HYDRA_RESUME_CKPT and HYDRA_CKPT_INTERVAL env vars; launcher exports them automatically"
226
 
227
- if [[ "$NEED_PREPARE" -eq 1 ]]; then
 
 
 
 
 
 
228
  PREPARE_CMD=("${PYTHON_CMD[@]}" prepare.py --num-shards "$PREPARE_NUM_SHARDS" --download-workers "$DOWNLOAD_WORKERS")
229
  log "prepare_action=run command=${PREPARE_CMD[*]}"
230
  if [[ "$DRY_RUN" -eq 0 ]]; then
 
224
  fi
225
  log "note=train.py consumes HYDRA_RESUME_CKPT and HYDRA_CKPT_INTERVAL env vars; launcher exports them automatically"
226
 
227
+ if [[ "${HYDRA_USE_NEMOTRON:-0}" == "1" ]]; then
228
+ # Streaming Nemotron path (Super3 recipe) pulls tokens directly from HF at
229
+ # train-time via prepare_nemotron.make_dataloader. The disk-shard prepare.py
230
+ # download phase is redundant in this mode and wastes 20-30 min of paid GPU
231
+ # time on shard parquet transfers we'll never read.
232
+ log "prepare_action=skip reason=HYDRA_USE_NEMOTRON=1 (streaming at train-time)"
233
+ elif [[ "$NEED_PREPARE" -eq 1 ]]; then
234
  PREPARE_CMD=("${PYTHON_CMD[@]}" prepare.py --num-shards "$PREPARE_NUM_SHARDS" --download-workers "$DOWNLOAD_WORKERS")
235
  log "prepare_action=run command=${PREPARE_CMD[*]}"
236
  if [[ "$DRY_RUN" -eq 0 ]]; then