# /// script
# dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"]
# ///

import os
import subprocess
import shutil
from pathlib import Path
from huggingface_hub import HfApi, snapshot_download, create_repo
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

# Config
ADAPTER_REPO = "kingjux/ffmpeg-command-generator"
OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf"
BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
QUANTIZATIONS = ["Q4_K_M", "Q8_0"]  # Good balance of size/quality

print("=" * 50)
print("GGUF Conversion for LM Studio")
print("=" * 50)

# Step 1: Load and merge LoRA with base model
print("\n[1/4] Loading adapter and merging with base model...")
model = AutoPeftModelForCausalLM.from_pretrained(
    ADAPTER_REPO,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)

# Merge LoRA weights into base model
print("Merging LoRA weights...")
merged_model = model.merge_and_unload()

# Save merged model
merged_path = Path("/tmp/merged_model")
merged_path.mkdir(exist_ok=True)
print(f"Saving merged model to {merged_path}...")
merged_model.save_pretrained(merged_path)
tokenizer.save_pretrained(merged_path)
print("Merged model saved!")

# Step 2: Clone llama.cpp for conversion
print("\n[2/4] Setting up llama.cpp converter...")
llama_cpp_path = Path("/tmp/llama.cpp")
if not llama_cpp_path.exists():
    subprocess.run([
        "git", "clone", "--depth", "1",
        "https://github.com/ggerganov/llama.cpp.git",
        str(llama_cpp_path)
    ], check=True)

# Install conversion requirements
subprocess.run([
    "pip", "install", "-r",
    str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt")
], check=True, capture_output=True)

# Step 3: Convert to GGUF
print("\n[3/4] Converting to GGUF format...")
gguf_output_dir = Path("/tmp/gguf_output")
gguf_output_dir.mkdir(exist_ok=True)

# Convert to F16 GGUF first
f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf"
subprocess.run([
    "python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
    str(merged_path),
    "--outfile", str(f16_path),
    "--outtype", "f16"
], check=True)
print(f"Created: {f16_path}")

# Build llama.cpp for quantization
print("\nBuilding llama.cpp for quantization...")
subprocess.run(["make", "-C", str(llama_cpp_path), "llama-quantize"], check=True, capture_output=True)

# Quantize to different formats
quantized_files = []
for quant in QUANTIZATIONS:
    quant_path = gguf_output_dir / f"ffmpeg-command-generator-{quant.lower()}.gguf"
    print(f"Quantizing to {quant}...")
    subprocess.run([
        str(llama_cpp_path / "llama-quantize"),
        str(f16_path),
        str(quant_path),
        quant
    ], check=True)
    quantized_files.append(quant_path)
    print(f"Created: {quant_path}")

# Step 4: Upload to Hub
print("\n[4/4] Uploading to Hugging Face Hub...")
api = HfApi()

# Create repo
create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)

# Create model card
model_card = """---
license: apache-2.0
base_model: Qwen/Qwen2.5-0.5B-Instruct
tags:
  - gguf
  - ffmpeg
  - command-generation
  - lm-studio
  - ollama
---

# FFMPEG Command Generator (GGUF)

A fine-tuned model that generates FFMPEG commands from natural language descriptions with chain-of-thought reasoning.

## Usage

### LM Studio
```bash
lms import kingjux/ffmpeg-command-generator-gguf
```

### Ollama
```bash
ollama run hf.co/kingjux/ffmpeg-command-generator-gguf
```

## Example

**Input:** "Convert video.mp4 to webm format"

**Output:**
```
<think>
Task: Convert MP4 to WebM
- WebM container uses VP9 video codec and Opus audio
- Use -c:v libvpx-vp9 for video encoding
- Use -c:a libopus for audio encoding
</think>

ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm
```

## Files

- `ffmpeg-command-generator-q4_k_m.gguf` - 4-bit quantized (smallest, fastest)
- `ffmpeg-command-generator-q8_0.gguf` - 8-bit quantized (better quality)

## Training

Fine-tuned from Qwen2.5-0.5B-Instruct on 30 FFMPEG command examples with CoT reasoning.
"""

# Save and upload model card
card_path = gguf_output_dir / "README.md"
card_path.write_text(model_card)

# Upload all files
for file in [card_path] + quantized_files:
    print(f"Uploading {file.name}...")
    api.upload_file(
        path_or_fileobj=str(file),
        path_in_repo=file.name,
        repo_id=OUTPUT_REPO,
        repo_type="model"
    )

print("\n" + "=" * 50)
print("DONE!")
print(f"Model available at: https://huggingface.co/{OUTPUT_REPO}")
print("\nTo use in LM Studio:")
print(f"  lms import {OUTPUT_REPO}")
print("=" * 50)