IQuest-Coder-V1-40B-Instruct-heretic / patch_iquestcoder.py
trohrbaugh's picture
Upload patch_iquestcoder.py with huggingface_hub
4a382cf verified
#!/usr/bin/env python3
"""
Patch IQuestLab/IQuest-Coder models for transformers 5.x compatibility.
Fixes the meta-device RoPE bug where accelerate zeros out inv_freq during
model initialization, causing the model to produce only newlines/garbage.
Usage:
python patch_iquestcoder.py
This will find and patch all cached IQuest-Coder modeling files automatically.
Run this AFTER downloading the model (e.g. after a failed heretic run or
after running `huggingface-cli download IQuestLab/IQuest-Coder-V1-40B-Instruct`).
"""
import glob
import os
import re
import sys
# Pattern to find the forward method that needs patching
ORIGINAL_PATTERN = re.compile(
r'( @torch\.no_grad\(\)\n'
r' @dynamic_rope_update\n'
r' def forward\(self, x: torch\.Tensor, position_ids: torch\.Tensor\)'
r' -> Tuple\[torch\.Tensor, torch\.Tensor\]:\n)'
r'( inv_freq_expanded = self\.inv_freq\[None, :, None\]\.float\(\)\.expand\(position_ids\.shape\[0\], -1, 1\)\.to\(x\.device\))'
)
REPLACEMENT = (
r'\1'
r' # Lazy recompute: accelerate meta-device init leaves inv_freq as zeros\n'
r' if self.inv_freq is not None and self.inv_freq.numel() > 0 and (self.inv_freq == 0).all():\n'
r' inv_freq, self.attention_scaling = self.rope_init_fn(self.config, None)\n'
r' self.inv_freq = inv_freq.to(device=x.device, dtype=self.inv_freq.dtype)\n'
r' self.original_inv_freq = self.inv_freq\n'
r'\2'
)
# Check string to see if already patched
PATCH_MARKER = "Lazy recompute: accelerate meta-device init"
# Search locations for cached model files
SEARCH_PATHS = [
os.path.expanduser("~/.cache/huggingface/hub/models--IQuestLab--*/**/modeling_iquestcoder.py"),
"/llm/huggingface/modules/transformers_modules/IQuestLab/**/modeling_iquestcoder.py",
# Common alternate HF cache locations
"/data/huggingface/**/modeling_iquestcoder.py",
"/scratch/**/modeling_iquestcoder.py",
]
def find_model_files():
"""Find all cached IQuest-Coder modeling files."""
found = []
# Also check HF_HOME / TRANSFORMERS_CACHE env vars
for env_var in ["HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"]:
val = os.environ.get(env_var)
if val:
SEARCH_PATHS.append(os.path.join(val, "**/modeling_iquestcoder.py"))
for pattern in SEARCH_PATHS:
found.extend(glob.glob(pattern, recursive=True))
# Deduplicate (resolve symlinks)
seen = set()
unique = []
for f in found:
real = os.path.realpath(f)
if real not in seen:
seen.add(real)
unique.append(f)
return unique
def patch_file(filepath):
"""Apply the RoPE lazy-recompute patch to a modeling file."""
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
if PATCH_MARKER in content:
print(f" SKIP (already patched): {filepath}")
return False
new_content, count = ORIGINAL_PATTERN.subn(REPLACEMENT, content)
if count == 0:
print(f" WARN (pattern not found — may need manual patching): {filepath}")
return False
with open(filepath, "w", encoding="utf-8") as f:
f.write(new_content)
print(f" OK (patched {count} location(s)): {filepath}")
return True
def main():
print("IQuest-Coder RoPE patch for transformers 5.x")
print("=" * 50)
print()
files = find_model_files()
if not files:
print("No IQuest-Coder model files found in cache.")
print("Download the model first, then re-run this script.")
print()
print("Searched:")
for p in SEARCH_PATHS:
print(f" {p}")
sys.exit(1)
print(f"Found {len(files)} file(s):\n")
patched = 0
for f in files:
if patch_file(f):
patched += 1
print()
if patched:
print(f"Done — patched {patched} file(s).")
else:
print("No files needed patching.")
if __name__ == "__main__":
main()