resume script: fix dataset module names + opt-out of hf_transfer
Browse filesrun_resume_stage2.sh referenced data.build_holdout, but the actual
modules are train.make_sft_dataset and eval.make_holdout. Also
export HF_HUB_ENABLE_HF_TRANSFER=0 in both pipeline scripts so a
missing hf_transfer wheel can't take down model loading.
Made-with: Cursor
scripts/run_full_pipeline.sh
CHANGED
|
@@ -11,6 +11,11 @@
|
|
| 11 |
#
|
| 12 |
set -euo pipefail
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
echo "[1/6] Installing GPU stack ..."
|
| 15 |
pip install -q --upgrade pip setuptools wheel
|
| 16 |
# Python-3.10-safe `future`; legacy versions fail with
|
|
|
|
| 11 |
#
|
| 12 |
set -euo pipefail
|
| 13 |
|
| 14 |
+
# HF Spaces flips on HF_HUB_ENABLE_HF_TRANSFER, but the matching wheel
|
| 15 |
+
# is not always installed; opt out so AutoConfig/from_pretrained don't
|
| 16 |
+
# crash when hf_transfer is missing.
|
| 17 |
+
export HF_HUB_ENABLE_HF_TRANSFER=0
|
| 18 |
+
|
| 19 |
echo "[1/6] Installing GPU stack ..."
|
| 20 |
pip install -q --upgrade pip setuptools wheel
|
| 21 |
# Python-3.10-safe `future`; legacy versions fail with
|
scripts/run_resume_stage2.sh
CHANGED
|
@@ -11,6 +11,11 @@
|
|
| 11 |
# repo has been re-cloned. Safe to re-run.
|
| 12 |
set -euo pipefail
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Reuse the install logic from run_full_pipeline.sh by sourcing only
|
| 15 |
# steps 1 and 2. We'd rather duplicate a few lines than risk source-ing
|
| 16 |
# a script that exits early if SFT adapter is missing.
|
|
@@ -28,7 +33,8 @@ pip install -q hf_transfer msgspec "torchao>=0.13.0" cut_cross_entropy || true
|
|
| 28 |
pip install -q -r requirements.txt
|
| 29 |
|
| 30 |
echo "[2/6] Building / verifying datasets ..."
|
| 31 |
-
python -m data.
|
|
|
|
| 32 |
|
| 33 |
echo "[3/6] Skipped (SFT + Stage 1 already on Hub)."
|
| 34 |
|
|
|
|
| 11 |
# repo has been re-cloned. Safe to re-run.
|
| 12 |
set -euo pipefail
|
| 13 |
|
| 14 |
+
# Belt-and-suspenders: HF Spaces sets HF_HUB_ENABLE_HF_TRANSFER=1 which
|
| 15 |
+
# requires the `hf_transfer` wheel; if any dep install drops it we'd
|
| 16 |
+
# rather slow downloads than crash, so disable here.
|
| 17 |
+
export HF_HUB_ENABLE_HF_TRANSFER=0
|
| 18 |
+
|
| 19 |
# Reuse the install logic from run_full_pipeline.sh by sourcing only
|
| 20 |
# steps 1 and 2. We'd rather duplicate a few lines than risk source-ing
|
| 21 |
# a script that exits early if SFT adapter is missing.
|
|
|
|
| 33 |
pip install -q -r requirements.txt
|
| 34 |
|
| 35 |
echo "[2/6] Building / verifying datasets ..."
|
| 36 |
+
python -m train.make_sft_dataset --n 600 --out data/sft_train.jsonl
|
| 37 |
+
python -m eval.make_holdout --out data/holdout.jsonl
|
| 38 |
|
| 39 |
echo "[3/6] Skipped (SFT + Stage 1 already on Hub)."
|
| 40 |
|