Uddiii commited on
Commit
112679c
·
1 Parent(s): 04688c1

fix(kaggle): pin torch via constraints file in REPAIR cell

Browse files

Step [3/6] (`pip install --force-reinstall bitsandbytes`) and step [4/6]
(`unsloth unsloth_zoo trl`) were re-resolving torch from default PyPI,
which is currently torch 2.11.0. That clobbered the cu128 torch 2.10.0
installed in step [2/6], producing:

torchvision 0.25.0+cu128 requires torch==2.10.0,
but you have torch 2.11.0 which is incompatible.

Fix:
* Write `/tmp/ermap_constraints.txt` immediately after step 2 pinning
`torch==2.10.0 / torchvision==0.25.0`.
* Step 3 (bnb) now uses `--no-deps` (bnb dlopens torch at runtime, so
install-time deps are unnecessary).
* Steps 4 + 5 pass `-c /tmp/ermap_constraints.txt` so pip can never
upgrade torch out from under us, regardless of what unsloth or trl
declare in their requirement lists.

Made-with: Cursor

kaggle/build_notebook.py CHANGED
@@ -172,21 +172,33 @@ get_ipython().system('pip install -q --no-cache-dir --force-reinstall '
172
  'torch==2.10.0 torchvision==0.25.0 '
173
  '--index-url https://download.pytorch.org/whl/cu128')
174
 
 
 
 
 
 
 
 
175
  # 3. Reinstall bitsandbytes against the now-pinned torch.
176
- print("[3/6] Reinstalling bitsandbytes...")
177
- get_ipython().system('pip install -q --no-cache-dir --force-reinstall bitsandbytes')
 
 
178
 
179
  # 4. Upgrade unsloth + unsloth_zoo + trl in lockstep. unsloth and
180
  # unsloth_zoo are released as a matched pair; if pip pulls a fresh
181
  # unsloth_zoo against an old unsloth you get
182
  # ImportError: cannot import name 'create_gradient_checkpointing_buffer'
183
- print("[4/6] Upgrading unsloth + unsloth_zoo + trl...")
 
184
  get_ipython().system('pip install -q --upgrade --no-cache-dir '
 
185
  'unsloth unsloth_zoo "trl>=0.18.2"')
186
 
187
  # 5. ER-MAP runtime deps that aren't pre-installed on Kaggle.
188
- print("[5/6] Installing ER-MAP runtime deps...")
189
  get_ipython().system('pip install -q --no-cache-dir '
 
190
  '"groq>=0.18.0" "huggingface_hub>=0.25.0" '
191
  '"gymnasium>=0.29.0" "openenv-core>=0.1.0"')
192
 
 
172
  'torch==2.10.0 torchvision==0.25.0 '
173
  '--index-url https://download.pytorch.org/whl/cu128')
174
 
175
+ # Write a pip constraints file so subsequent installs (bnb, unsloth, trl, etc.)
176
+ # can NEVER pull a different torch from default PyPI. Without this, step 3's
177
+ # `--force-reinstall bitsandbytes` and step 4's `unsloth` upgrade re-resolve
178
+ # torch from PyPI (currently 2.11.0), which breaks the cu128 torchvision pair.
179
+ with open("/tmp/ermap_constraints.txt", "w") as _cf:
180
+ _cf.write("torch==2.10.0\\ntorchvision==0.25.0\\n")
181
+
182
  # 3. Reinstall bitsandbytes against the now-pinned torch.
183
+ # --no-deps because bnb just needs torch at RUNTIME (it dlopens torch's
184
+ # C++ lib) its install-time deps don't include torch.
185
+ print("[3/6] Reinstalling bitsandbytes (--no-deps to preserve torch)...")
186
+ get_ipython().system('pip install -q --no-cache-dir --force-reinstall --no-deps bitsandbytes')
187
 
188
  # 4. Upgrade unsloth + unsloth_zoo + trl in lockstep. unsloth and
189
  # unsloth_zoo are released as a matched pair; if pip pulls a fresh
190
  # unsloth_zoo against an old unsloth you get
191
  # ImportError: cannot import name 'create_gradient_checkpointing_buffer'
192
+ # The constraint file blocks them from moving torch.
193
+ print("[4/6] Upgrading unsloth + unsloth_zoo + trl (constrained)...")
194
  get_ipython().system('pip install -q --upgrade --no-cache-dir '
195
+ '-c /tmp/ermap_constraints.txt '
196
  'unsloth unsloth_zoo "trl>=0.18.2"')
197
 
198
  # 5. ER-MAP runtime deps that aren't pre-installed on Kaggle.
199
+ print("[5/6] Installing ER-MAP runtime deps (constrained)...")
200
  get_ipython().system('pip install -q --no-cache-dir '
201
+ '-c /tmp/ermap_constraints.txt '
202
  '"groq>=0.18.0" "huggingface_hub>=0.25.0" '
203
  '"gymnasium>=0.29.0" "openenv-core>=0.1.0"')
204
 
kaggle/train_ermap_grpo_kaggle.ipynb CHANGED
@@ -120,21 +120,33 @@
120
  " 'torch==2.10.0 torchvision==0.25.0 '\n",
121
  " '--index-url https://download.pytorch.org/whl/cu128')\n",
122
  "\n",
 
 
 
 
 
 
 
123
  "# 3. Reinstall bitsandbytes against the now-pinned torch.\n",
124
- "print(\"[3/6] Reinstalling bitsandbytes...\")\n",
125
- "get_ipython().system('pip install -q --no-cache-dir --force-reinstall bitsandbytes')\n",
 
 
126
  "\n",
127
  "# 4. Upgrade unsloth + unsloth_zoo + trl in lockstep. unsloth and\n",
128
  "# unsloth_zoo are released as a matched pair; if pip pulls a fresh\n",
129
  "# unsloth_zoo against an old unsloth you get\n",
130
  "# ImportError: cannot import name 'create_gradient_checkpointing_buffer'\n",
131
- "print(\"[4/6] Upgrading unsloth + unsloth_zoo + trl...\")\n",
 
132
  "get_ipython().system('pip install -q --upgrade --no-cache-dir '\n",
 
133
  " 'unsloth unsloth_zoo \"trl>=0.18.2\"')\n",
134
  "\n",
135
  "# 5. ER-MAP runtime deps that aren't pre-installed on Kaggle.\n",
136
- "print(\"[5/6] Installing ER-MAP runtime deps...\")\n",
137
  "get_ipython().system('pip install -q --no-cache-dir '\n",
 
138
  " '\"groq>=0.18.0\" \"huggingface_hub>=0.25.0\" '\n",
139
  " '\"gymnasium>=0.29.0\" \"openenv-core>=0.1.0\"')\n",
140
  "\n",
 
120
  " 'torch==2.10.0 torchvision==0.25.0 '\n",
121
  " '--index-url https://download.pytorch.org/whl/cu128')\n",
122
  "\n",
123
+ "# Write a pip constraints file so subsequent installs (bnb, unsloth, trl, etc.)\n",
124
+ "# can NEVER pull a different torch from default PyPI. Without this, step 3's\n",
125
+ "# `--force-reinstall bitsandbytes` and step 4's `unsloth` upgrade re-resolve\n",
126
+ "# torch from PyPI (currently 2.11.0), which breaks the cu128 torchvision pair.\n",
127
+ "with open(\"/tmp/ermap_constraints.txt\", \"w\") as _cf:\n",
128
+ " _cf.write(\"torch==2.10.0\\ntorchvision==0.25.0\\n\")\n",
129
+ "\n",
130
  "# 3. Reinstall bitsandbytes against the now-pinned torch.\n",
131
+ "# --no-deps because bnb just needs torch at RUNTIME (it dlopens torch's\n",
132
+ "# C++ lib) its install-time deps don't include torch.\n",
133
+ "print(\"[3/6] Reinstalling bitsandbytes (--no-deps to preserve torch)...\")\n",
134
+ "get_ipython().system('pip install -q --no-cache-dir --force-reinstall --no-deps bitsandbytes')\n",
135
  "\n",
136
  "# 4. Upgrade unsloth + unsloth_zoo + trl in lockstep. unsloth and\n",
137
  "# unsloth_zoo are released as a matched pair; if pip pulls a fresh\n",
138
  "# unsloth_zoo against an old unsloth you get\n",
139
  "# ImportError: cannot import name 'create_gradient_checkpointing_buffer'\n",
140
+ "# The constraint file blocks them from moving torch.\n",
141
+ "print(\"[4/6] Upgrading unsloth + unsloth_zoo + trl (constrained)...\")\n",
142
  "get_ipython().system('pip install -q --upgrade --no-cache-dir '\n",
143
+ " '-c /tmp/ermap_constraints.txt '\n",
144
  " 'unsloth unsloth_zoo \"trl>=0.18.2\"')\n",
145
  "\n",
146
  "# 5. ER-MAP runtime deps that aren't pre-installed on Kaggle.\n",
147
+ "print(\"[5/6] Installing ER-MAP runtime deps (constrained)...\")\n",
148
  "get_ipython().system('pip install -q --no-cache-dir '\n",
149
+ " '-c /tmp/ermap_constraints.txt '\n",
150
  " '\"groq>=0.18.0\" \"huggingface_hub>=0.25.0\" '\n",
151
  " '\"gymnasium>=0.29.0\" \"openenv-core>=0.1.0\"')\n",
152
  "\n",