Spaces:
Sleeping
fix(kaggle): pin torch via constraints file in REPAIR cell
Browse filesStep [3/6] (`pip install --force-reinstall bitsandbytes`) and step [4/6]
(`unsloth unsloth_zoo trl`) were re-resolving torch from default PyPI,
which is currently torch 2.11.0. That clobbered the cu128 torch 2.10.0
installed in step [2/6], producing:
torchvision 0.25.0+cu128 requires torch==2.10.0,
but you have torch 2.11.0 which is incompatible.
Fix:
* Write `/tmp/ermap_constraints.txt` immediately after step 2 pinning
`torch==2.10.0 / torchvision==0.25.0`.
* Step 3 (bnb) now uses `--no-deps` (bnb dlopens torch at runtime, so
install-time deps are unnecessary).
* Steps 4 + 5 pass `-c /tmp/ermap_constraints.txt` so pip can never
upgrade torch out from under us, regardless of what unsloth or trl
declare in their requirement lists.
Made-with: Cursor
- kaggle/build_notebook.py +16 -4
- kaggle/train_ermap_grpo_kaggle.ipynb +16 -4
|
@@ -172,21 +172,33 @@ get_ipython().system('pip install -q --no-cache-dir --force-reinstall '
|
|
| 172 |
'torch==2.10.0 torchvision==0.25.0 '
|
| 173 |
'--index-url https://download.pytorch.org/whl/cu128')
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
# 3. Reinstall bitsandbytes against the now-pinned torch.
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
| 178 |
|
| 179 |
# 4. Upgrade unsloth + unsloth_zoo + trl in lockstep. unsloth and
|
| 180 |
# unsloth_zoo are released as a matched pair; if pip pulls a fresh
|
| 181 |
# unsloth_zoo against an old unsloth you get
|
| 182 |
# ImportError: cannot import name 'create_gradient_checkpointing_buffer'
|
| 183 |
-
|
|
|
|
| 184 |
get_ipython().system('pip install -q --upgrade --no-cache-dir '
|
|
|
|
| 185 |
'unsloth unsloth_zoo "trl>=0.18.2"')
|
| 186 |
|
| 187 |
# 5. ER-MAP runtime deps that aren't pre-installed on Kaggle.
|
| 188 |
-
print("[5/6] Installing ER-MAP runtime deps...")
|
| 189 |
get_ipython().system('pip install -q --no-cache-dir '
|
|
|
|
| 190 |
'"groq>=0.18.0" "huggingface_hub>=0.25.0" '
|
| 191 |
'"gymnasium>=0.29.0" "openenv-core>=0.1.0"')
|
| 192 |
|
|
|
|
| 172 |
'torch==2.10.0 torchvision==0.25.0 '
|
| 173 |
'--index-url https://download.pytorch.org/whl/cu128')
|
| 174 |
|
| 175 |
+
# Write a pip constraints file so subsequent installs (bnb, unsloth, trl, etc.)
|
| 176 |
+
# can NEVER pull a different torch from default PyPI. Without this, step 3's
|
| 177 |
+
# `--force-reinstall bitsandbytes` and step 4's `unsloth` upgrade re-resolve
|
| 178 |
+
# torch from PyPI (currently 2.11.0), which breaks the cu128 torchvision pair.
|
| 179 |
+
with open("/tmp/ermap_constraints.txt", "w") as _cf:
|
| 180 |
+
_cf.write("torch==2.10.0\\ntorchvision==0.25.0\\n")
|
| 181 |
+
|
| 182 |
# 3. Reinstall bitsandbytes against the now-pinned torch.
|
| 183 |
+
# --no-deps because bnb just needs torch at RUNTIME (it dlopens torch's
|
| 184 |
+
# C++ lib) — its install-time deps don't include torch.
|
| 185 |
+
print("[3/6] Reinstalling bitsandbytes (--no-deps to preserve torch)...")
|
| 186 |
+
get_ipython().system('pip install -q --no-cache-dir --force-reinstall --no-deps bitsandbytes')
|
| 187 |
|
| 188 |
# 4. Upgrade unsloth + unsloth_zoo + trl in lockstep. unsloth and
|
| 189 |
# unsloth_zoo are released as a matched pair; if pip pulls a fresh
|
| 190 |
# unsloth_zoo against an old unsloth you get
|
| 191 |
# ImportError: cannot import name 'create_gradient_checkpointing_buffer'
|
| 192 |
+
# The constraint file blocks them from moving torch.
|
| 193 |
+
print("[4/6] Upgrading unsloth + unsloth_zoo + trl (constrained)...")
|
| 194 |
get_ipython().system('pip install -q --upgrade --no-cache-dir '
|
| 195 |
+
'-c /tmp/ermap_constraints.txt '
|
| 196 |
'unsloth unsloth_zoo "trl>=0.18.2"')
|
| 197 |
|
| 198 |
# 5. ER-MAP runtime deps that aren't pre-installed on Kaggle.
|
| 199 |
+
print("[5/6] Installing ER-MAP runtime deps (constrained)...")
|
| 200 |
get_ipython().system('pip install -q --no-cache-dir '
|
| 201 |
+
'-c /tmp/ermap_constraints.txt '
|
| 202 |
'"groq>=0.18.0" "huggingface_hub>=0.25.0" '
|
| 203 |
'"gymnasium>=0.29.0" "openenv-core>=0.1.0"')
|
| 204 |
|
|
@@ -120,21 +120,33 @@
|
|
| 120 |
" 'torch==2.10.0 torchvision==0.25.0 '\n",
|
| 121 |
" '--index-url https://download.pytorch.org/whl/cu128')\n",
|
| 122 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
"# 3. Reinstall bitsandbytes against the now-pinned torch.\n",
|
| 124 |
-
"
|
| 125 |
-
"
|
|
|
|
|
|
|
| 126 |
"\n",
|
| 127 |
"# 4. Upgrade unsloth + unsloth_zoo + trl in lockstep. unsloth and\n",
|
| 128 |
"# unsloth_zoo are released as a matched pair; if pip pulls a fresh\n",
|
| 129 |
"# unsloth_zoo against an old unsloth you get\n",
|
| 130 |
"# ImportError: cannot import name 'create_gradient_checkpointing_buffer'\n",
|
| 131 |
-
"
|
|
|
|
| 132 |
"get_ipython().system('pip install -q --upgrade --no-cache-dir '\n",
|
|
|
|
| 133 |
" 'unsloth unsloth_zoo \"trl>=0.18.2\"')\n",
|
| 134 |
"\n",
|
| 135 |
"# 5. ER-MAP runtime deps that aren't pre-installed on Kaggle.\n",
|
| 136 |
-
"print(\"[5/6] Installing ER-MAP runtime deps...\")\n",
|
| 137 |
"get_ipython().system('pip install -q --no-cache-dir '\n",
|
|
|
|
| 138 |
" '\"groq>=0.18.0\" \"huggingface_hub>=0.25.0\" '\n",
|
| 139 |
" '\"gymnasium>=0.29.0\" \"openenv-core>=0.1.0\"')\n",
|
| 140 |
"\n",
|
|
|
|
| 120 |
" 'torch==2.10.0 torchvision==0.25.0 '\n",
|
| 121 |
" '--index-url https://download.pytorch.org/whl/cu128')\n",
|
| 122 |
"\n",
|
| 123 |
+
"# Write a pip constraints file so subsequent installs (bnb, unsloth, trl, etc.)\n",
|
| 124 |
+
"# can NEVER pull a different torch from default PyPI. Without this, step 3's\n",
|
| 125 |
+
"# `--force-reinstall bitsandbytes` and step 4's `unsloth` upgrade re-resolve\n",
|
| 126 |
+
"# torch from PyPI (currently 2.11.0), which breaks the cu128 torchvision pair.\n",
|
| 127 |
+
"with open(\"/tmp/ermap_constraints.txt\", \"w\") as _cf:\n",
|
| 128 |
+
" _cf.write(\"torch==2.10.0\\ntorchvision==0.25.0\\n\")\n",
|
| 129 |
+
"\n",
|
| 130 |
"# 3. Reinstall bitsandbytes against the now-pinned torch.\n",
|
| 131 |
+
"# --no-deps because bnb just needs torch at RUNTIME (it dlopens torch's\n",
|
| 132 |
+
"# C++ lib) — its install-time deps don't include torch.\n",
|
| 133 |
+
"print(\"[3/6] Reinstalling bitsandbytes (--no-deps to preserve torch)...\")\n",
|
| 134 |
+
"get_ipython().system('pip install -q --no-cache-dir --force-reinstall --no-deps bitsandbytes')\n",
|
| 135 |
"\n",
|
| 136 |
"# 4. Upgrade unsloth + unsloth_zoo + trl in lockstep. unsloth and\n",
|
| 137 |
"# unsloth_zoo are released as a matched pair; if pip pulls a fresh\n",
|
| 138 |
"# unsloth_zoo against an old unsloth you get\n",
|
| 139 |
"# ImportError: cannot import name 'create_gradient_checkpointing_buffer'\n",
|
| 140 |
+
"# The constraint file blocks them from moving torch.\n",
|
| 141 |
+
"print(\"[4/6] Upgrading unsloth + unsloth_zoo + trl (constrained)...\")\n",
|
| 142 |
"get_ipython().system('pip install -q --upgrade --no-cache-dir '\n",
|
| 143 |
+
" '-c /tmp/ermap_constraints.txt '\n",
|
| 144 |
" 'unsloth unsloth_zoo \"trl>=0.18.2\"')\n",
|
| 145 |
"\n",
|
| 146 |
"# 5. ER-MAP runtime deps that aren't pre-installed on Kaggle.\n",
|
| 147 |
+
"print(\"[5/6] Installing ER-MAP runtime deps (constrained)...\")\n",
|
| 148 |
"get_ipython().system('pip install -q --no-cache-dir '\n",
|
| 149 |
+
" '-c /tmp/ermap_constraints.txt '\n",
|
| 150 |
" '\"groq>=0.18.0\" \"huggingface_hub>=0.25.0\" '\n",
|
| 151 |
" '\"gymnasium>=0.29.0\" \"openenv-core>=0.1.0\"')\n",
|
| 152 |
"\n",
|