File size: 4,014 Bytes

4a382cf

#!/usr/bin/env python3
"""
Patch IQuestLab/IQuest-Coder models for transformers 5.x compatibility.

Fixes the meta-device RoPE bug where accelerate zeros out inv_freq during
model initialization, causing the model to produce only newlines/garbage.

Usage:
    python patch_iquestcoder.py

This will find and patch all cached IQuest-Coder modeling files automatically.
Run this AFTER downloading the model (e.g. after a failed heretic run or
after running `huggingface-cli download IQuestLab/IQuest-Coder-V1-40B-Instruct`).
"""

import glob
import os
import re
import sys

# Pattern to find the forward method that needs patching
ORIGINAL_PATTERN = re.compile(
    r'(    @torch\.no_grad\(\)\n'
    r'    @dynamic_rope_update\n'
    r'    def forward\(self, x: torch\.Tensor, position_ids: torch\.Tensor\)'
    r' -> Tuple\[torch\.Tensor, torch\.Tensor\]:\n)'
    r'(        inv_freq_expanded = self\.inv_freq\[None, :, None\]\.float\(\)\.expand\(position_ids\.shape\[0\], -1, 1\)\.to\(x\.device\))'
)

REPLACEMENT = (
    r'\1'
    r'        # Lazy recompute: accelerate meta-device init leaves inv_freq as zeros\n'
    r'        if self.inv_freq is not None and self.inv_freq.numel() > 0 and (self.inv_freq == 0).all():\n'
    r'            inv_freq, self.attention_scaling = self.rope_init_fn(self.config, None)\n'
    r'            self.inv_freq = inv_freq.to(device=x.device, dtype=self.inv_freq.dtype)\n'
    r'            self.original_inv_freq = self.inv_freq\n'
    r'\2'
)

# Check string to see if already patched
PATCH_MARKER = "Lazy recompute: accelerate meta-device init"

# Search locations for cached model files
SEARCH_PATHS = [
    os.path.expanduser("~/.cache/huggingface/hub/models--IQuestLab--*/**/modeling_iquestcoder.py"),
    "/llm/huggingface/modules/transformers_modules/IQuestLab/**/modeling_iquestcoder.py",
    # Common alternate HF cache locations
    "/data/huggingface/**/modeling_iquestcoder.py",
    "/scratch/**/modeling_iquestcoder.py",
]


def find_model_files():
    """Find all cached IQuest-Coder modeling files."""
    found = []
    # Also check HF_HOME / TRANSFORMERS_CACHE env vars
    for env_var in ["HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"]:
        val = os.environ.get(env_var)
        if val:
            SEARCH_PATHS.append(os.path.join(val, "**/modeling_iquestcoder.py"))

    for pattern in SEARCH_PATHS:
        found.extend(glob.glob(pattern, recursive=True))

    # Deduplicate (resolve symlinks)
    seen = set()
    unique = []
    for f in found:
        real = os.path.realpath(f)
        if real not in seen:
            seen.add(real)
            unique.append(f)
    return unique


def patch_file(filepath):
    """Apply the RoPE lazy-recompute patch to a modeling file."""
    with open(filepath, "r", encoding="utf-8") as f:
        content = f.read()

    if PATCH_MARKER in content:
        print(f"  SKIP (already patched): {filepath}")
        return False

    new_content, count = ORIGINAL_PATTERN.subn(REPLACEMENT, content)
    if count == 0:
        print(f"  WARN (pattern not found — may need manual patching): {filepath}")
        return False

    with open(filepath, "w", encoding="utf-8") as f:
        f.write(new_content)

    print(f"  OK   (patched {count} location(s)): {filepath}")
    return True


def main():
    print("IQuest-Coder RoPE patch for transformers 5.x")
    print("=" * 50)
    print()

    files = find_model_files()
    if not files:
        print("No IQuest-Coder model files found in cache.")
        print("Download the model first, then re-run this script.")
        print()
        print("Searched:")
        for p in SEARCH_PATHS:
            print(f"  {p}")
        sys.exit(1)

    print(f"Found {len(files)} file(s):\n")
    patched = 0
    for f in files:
        if patch_file(f):
            patched += 1

    print()
    if patched:
        print(f"Done — patched {patched} file(s).")
    else:
        print("No files needed patching.")


if __name__ == "__main__":
    main()