shivam2k3 commited on
Commit
1f4e468
·
1 Parent(s): 3165871

resume script: fix dataset module names + opt-out of hf_transfer

Browse files

run_resume_stage2.sh referenced data.build_holdout, but the actual
modules are train.make_sft_dataset and eval.make_holdout. Also
export HF_HUB_ENABLE_HF_TRANSFER=0 in both pipeline scripts so a
missing hf_transfer wheel can't take down model loading.

Made-with: Cursor

scripts/run_full_pipeline.sh CHANGED
@@ -11,6 +11,11 @@
11
  #
12
  set -euo pipefail
13
 
 
 
 
 
 
14
  echo "[1/6] Installing GPU stack ..."
15
  pip install -q --upgrade pip setuptools wheel
16
  # Python-3.10-safe `future`; legacy versions fail with
 
11
  #
12
  set -euo pipefail
13
 
14
+ # HF Spaces flips on HF_HUB_ENABLE_HF_TRANSFER, but the matching wheel
15
+ # is not always installed; opt out so AutoConfig/from_pretrained don't
16
+ # crash when hf_transfer is missing.
17
+ export HF_HUB_ENABLE_HF_TRANSFER=0
18
+
19
  echo "[1/6] Installing GPU stack ..."
20
  pip install -q --upgrade pip setuptools wheel
21
  # Python-3.10-safe `future`; legacy versions fail with
scripts/run_resume_stage2.sh CHANGED
@@ -11,6 +11,11 @@
11
  # repo has been re-cloned. Safe to re-run.
12
  set -euo pipefail
13
 
 
 
 
 
 
14
  # Reuse the install logic from run_full_pipeline.sh by sourcing only
15
  # steps 1 and 2. We'd rather duplicate a few lines than risk source-ing
16
  # a script that exits early if SFT adapter is missing.
@@ -28,7 +33,8 @@ pip install -q hf_transfer msgspec "torchao>=0.13.0" cut_cross_entropy || true
28
  pip install -q -r requirements.txt
29
 
30
  echo "[2/6] Building / verifying datasets ..."
31
- python -m data.build_holdout
 
32
 
33
  echo "[3/6] Skipped (SFT + Stage 1 already on Hub)."
34
 
 
11
  # repo has been re-cloned. Safe to re-run.
12
  set -euo pipefail
13
 
14
+ # Belt-and-suspenders: HF Spaces sets HF_HUB_ENABLE_HF_TRANSFER=1 which
15
+ # requires the `hf_transfer` wheel; if any dep install drops it we'd
16
+ # rather slow downloads than crash, so disable here.
17
+ export HF_HUB_ENABLE_HF_TRANSFER=0
18
+
19
  # Reuse the install logic from run_full_pipeline.sh by sourcing only
20
  # steps 1 and 2. We'd rather duplicate a few lines than risk source-ing
21
  # a script that exits early if SFT adapter is missing.
 
33
  pip install -q -r requirements.txt
34
 
35
  echo "[2/6] Building / verifying datasets ..."
36
+ python -m train.make_sft_dataset --n 600 --out data/sft_train.jsonl
37
+ python -m eval.make_holdout --out data/holdout.jsonl
38
 
39
  echo "[3/6] Skipped (SFT + Stage 1 already on Hub)."
40