#!/usr/bin/env python3
"""
Convert chromadb/context-1 to MLX MXFP4 format.

chromadb/context-1 uses a different weight format than openai/gpt-oss-20b:
- Dense BF16 tensors (not quantized blocks)
- gate_up_proj shape: (experts, hidden, intermediate*2) - interleaved

This script handles the correct weight transformation for MLX.
"""

import os
import shutil
from huggingface_hub import snapshot_download
from safetensors import safe_open
from safetensors.torch import save_file
import subprocess

def convert_context1_to_mlx(output_dir: str, quantize: bool = True, q_bits: int = 4):
    """Convert chromadb/context-1 to MLX format."""

    temp_dir = "/tmp/context1-mlx-converted"
    os.makedirs(temp_dir, exist_ok=True)

    print("=== Step 1: Download chromadb/context-1 ===")
    model_path = snapshot_download("chromadb/context-1")
    sf_path = os.path.join(model_path, "model.safetensors")

    print("\n=== Step 2: Transform weights ===")
    new_weights = {}
    with safe_open(sf_path, framework="pt") as f:
        for key in f.keys():
            tensor = f.get_tensor(key)

            if "mlp.experts.gate_up_proj_bias" in key:
                # Interleaved split: even indices = gate, odd = up
                gate_bias = tensor[:, ::2].contiguous()
                up_bias = tensor[:, 1::2].contiguous()
                new_weights[key.replace("gate_up_proj_bias", "gate_proj.bias")] = gate_bias
                new_weights[key.replace("gate_up_proj_bias", "up_proj.bias")] = up_bias

            elif "mlp.experts.gate_up_proj" in key and "bias" not in key:
                # Transpose: (experts, hidden, intermediate*2) -> (experts, intermediate*2, hidden)
                # Then interleaved split on middle dimension
                t = tensor.transpose(1, 2)
                gate_weight = t[:, ::2, :].contiguous()
                up_weight = t[:, 1::2, :].contiguous()
                new_weights[key.replace("gate_up_proj", "gate_proj.weight")] = gate_weight
                new_weights[key.replace("gate_up_proj", "up_proj.weight")] = up_weight

            elif "mlp.experts.down_proj_bias" in key:
                new_weights[key.replace("down_proj_bias", "down_proj.bias")] = tensor

            elif "mlp.experts.down_proj" in key and "bias" not in key:
                # Transpose down_proj
                t = tensor.transpose(1, 2).contiguous()
                new_weights[key.replace("down_proj", "down_proj.weight")] = t

            else:
                new_weights[key] = tensor

    # Save transformed weights
    save_file(new_weights, os.path.join(temp_dir, "model.safetensors"))

    # Copy config files
    for fname in ["config.json", "tokenizer.json", "tokenizer_config.json", "generation_config.json"]:
        src = os.path.join(model_path, fname)
        if os.path.exists(src):
            shutil.copy(src, temp_dir)

    print("\n=== Step 3: Convert to MLX ===")
    cmd = ["python", "-m", "mlx_lm", "convert", "--hf-path", temp_dir, "--mlx-path", output_dir]
    if quantize:
        cmd.extend(["-q", "--q-bits", str(q_bits)])

    subprocess.run(cmd, check=True)

    print(f"\n=== Done! Model saved to {output_dir} ===")


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Convert chromadb/context-1 to MLX")
    parser.add_argument("--output", "-o", default="./context1-mlx-mxfp4", help="Output directory")
    parser.add_argument("--no-quantize", action="store_true", help="Skip quantization (save as FP16)")
    parser.add_argument("--q-bits", type=int, default=4, help="Quantization bits (default: 4)")
    args = parser.parse_args()

    convert_context1_to_mlx(args.output, quantize=not args.no_quantize, q_bits=args.q_bits)