File size: 10,462 Bytes

7040a6f

#!/usr/bin/env python3
"""
Alkaid A — Fine-tuning Script
Base Model: Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled
Framework: Unsloth + TRL (SFTTrainer)
Method: LoRA (16-bit) with train_on_responses_only

Requirements:
    pip install unsloth transformers trl datasets peft accelerate bitsandbytes --break-system-packages

Usage:
    # 1. Login to Hugging Face first:
    huggingface-cli login --token YOUR_HF_TOKEN

    # 2. Run training:
    python train_alkaid_a.py

    # 3. Push to Hugging Face Hub:
    python train_alkaid_a.py --push --hub_id "YourUsername/Alkaid-A"
"""

import argparse
import json
import os
from pathlib import Path

# =============================================================================
# CONFIGURATION — Edit these values for your setup
# =============================================================================

CONFIG = {
    # Model
    "base_model": "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled",
    "max_seq_length": 4096,
    "load_in_4bit": True,       # Set False if you have 56GB+ VRAM for 16-bit

    # LoRA
    "lora_r": 16,
    "lora_alpha": 16,
    "lora_dropout": 0,
    "target_modules": [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],

    # Training
    "per_device_train_batch_size": 1,
    "gradient_accumulation_steps": 4,
    "warmup_steps": 10,
    "num_train_epochs": 3,
    "max_steps": -1,            # Set to positive number to override epochs
    "learning_rate": 2e-4,
    "optim": "adamw_8bit",
    "lr_scheduler_type": "cosine",
    "fp16": False,
    "bf16": True,
    "logging_steps": 10,
    "save_steps": 50,
    "seed": 42,

    # Data
    "example_dataset": "nohurry/Opus-4.6-Reasoning-3000x-filtered",
    "custom_data_path": "alkaid_a_training_data.jsonl",

    # Output
    "output_dir": "./alkaid_a_checkpoints",
    "final_model_dir": "./alkaid_a_final",
}


# =============================================================================
# DATA PREPARATION
# =============================================================================

def format_example_dataset(example):
    """
    Convert the Opus reasoning dataset into chat format.
    Columns: problem, thinking, solution → system/user/assistant messages
    """
    system_msg = (
        "You are Alkaid A, an advanced AI coding and deployment assistant. "
        "You follow a rigorous multi-phase workflow including code review, "
        "iterative debugging, deployment planning, security audits, versioned "
        "releases, and comprehensive documentation."
    )

    # Build assistant response with thinking tags (matching the base model's format)
    assistant_content = f"<think>\n{example['thinking']}\n</think>\n\n{example['solution']}"

    return {
        "messages": [
            {"role": "system", "content": system_msg},
            {"role": "user", "content": example["problem"]},
            {"role": "assistant", "content": assistant_content},
        ]
    }


def load_custom_data(path):
    """Load custom JSONL training data (already in messages format)."""
    data = []
    with open(path, "r") as f:
        for line in f:
            line = line.strip()
            if line:
                data.append(json.loads(line))
    return data


def prepare_datasets(tokenizer):
    """Combine example dataset + custom data into a single training set."""
    from datasets import Dataset, concatenate_datasets, load_dataset

    # --- Load example dataset from Hugging Face ---
    print("📦 Loading example dataset: nohurry/Opus-4.6-Reasoning-3000x-filtered")
    example_ds = load_dataset(CONFIG["example_dataset"], split="train")

    # Filter to coding/reasoning examples for best alignment
    example_ds = example_ds.filter(
        lambda x: x.get("category", "") in ["code", "math", "reasoning", "logic", ""]
    )
    print(f"   → {len(example_ds)} examples after filtering")

    # Convert to chat format
    example_ds = example_ds.map(format_example_dataset, remove_columns=example_ds.column_names)

    # --- Load custom data ---
    custom_path = CONFIG["custom_data_path"]
    if os.path.exists(custom_path):
        print(f"📦 Loading custom data: {custom_path}")
        custom_data = load_custom_data(custom_path)
        custom_ds = Dataset.from_list(custom_data)
        print(f"   → {len(custom_ds)} custom examples loaded")
    else:
        print(f"⚠️  Custom data not found at {custom_path}, using example dataset only")
        custom_ds = None

    # --- Apply chat template ---
    def apply_template(example):
        text = tokenizer.apply_chat_template(
            example["messages"],
            tokenize=False,
            add_generation_prompt=False,
        )
        return {"text": text}

    example_ds = example_ds.map(apply_template)

    if custom_ds is not None:
        custom_ds = custom_ds.map(apply_template)
        # Combine: custom data is repeated 3x to increase its weight
        combined = concatenate_datasets([example_ds, custom_ds, custom_ds, custom_ds])
    else:
        combined = example_ds

    combined = combined.shuffle(seed=CONFIG["seed"])
    print(f"✅ Total training examples: {len(combined)}")
    return combined


# =============================================================================
# MODEL SETUP
# =============================================================================

def setup_model():
    """Load the base model with Unsloth optimizations and attach LoRA."""
    from unsloth import FastLanguageModel

    print(f"🔧 Loading model: {CONFIG['base_model']}")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=CONFIG["base_model"],
        max_seq_length=CONFIG["max_seq_length"],
        load_in_4bit=CONFIG["load_in_4bit"],
        dtype=None,  # Auto-detect
    )

    print("🔧 Attaching LoRA adapters")
    model = FastLanguageModel.get_peft_model(
        model,
        r=CONFIG["lora_r"],
        target_modules=CONFIG["target_modules"],
        lora_alpha=CONFIG["lora_alpha"],
        lora_dropout=CONFIG["lora_dropout"],
        bias="none",
        use_gradient_checkpointing="unsloth",  # 30% less VRAM
        random_state=CONFIG["seed"],
    )

    return model, tokenizer


# =============================================================================
# TRAINING
# =============================================================================

def train(model, tokenizer, dataset):
    """Run SFT training with TRL's SFTTrainer."""
    from trl import SFTTrainer, SFTConfig

    print("🚀 Starting training...")

    training_args = SFTConfig(
        output_dir=CONFIG["output_dir"],
        per_device_train_batch_size=CONFIG["per_device_train_batch_size"],
        gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
        warmup_steps=CONFIG["warmup_steps"],
        num_train_epochs=CONFIG["num_train_epochs"],
        max_steps=CONFIG["max_steps"],
        learning_rate=CONFIG["learning_rate"],
        optim=CONFIG["optim"],
        lr_scheduler_type=CONFIG["lr_scheduler_type"],
        fp16=CONFIG["fp16"],
        bf16=CONFIG["bf16"],
        logging_steps=CONFIG["logging_steps"],
        save_steps=CONFIG["save_steps"],
        save_total_limit=3,
        seed=CONFIG["seed"],
        max_seq_length=CONFIG["max_seq_length"],
        dataset_text_field="text",
        report_to="none",       # Set to "wandb" if you use Weights & Biases
    )

    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=dataset,
        args=training_args,
    )

    # Train
    stats = trainer.train()
    print(f"✅ Training complete! Loss: {stats.training_loss:.4f}")

    return trainer


# =============================================================================
# EXPORT & PUSH
# =============================================================================

def save_model(model, tokenizer, push=False, hub_id=None):
    """Save locally and optionally push to Hugging Face Hub."""
    from unsloth import FastLanguageModel

    final_dir = CONFIG["final_model_dir"]

    # Save LoRA adapters (small, fast)
    print(f"💾 Saving LoRA adapters to {final_dir}")
    model.save_pretrained(final_dir)
    tokenizer.save_pretrained(final_dir)

    # Save merged model in 16-bit (for deployment)
    merged_dir = f"{final_dir}_merged_16bit"
    print(f"💾 Saving merged 16-bit model to {merged_dir}")
    model.save_pretrained_merged(merged_dir, tokenizer, save_method="merged_16bit")

    # Export GGUF for local inference (llama.cpp / Ollama / LM Studio)
    gguf_dir = f"{final_dir}_gguf"
    print(f"💾 Exporting GGUF (Q4_K_M) to {gguf_dir}")
    try:
        model.save_pretrained_gguf(gguf_dir, tokenizer, quantization_method="q4_k_m")
    except Exception as e:
        print(f"⚠️  GGUF export failed (non-critical): {e}")

    # Push to Hub
    if push and hub_id:
        print(f"🚀 Pushing to Hugging Face Hub: {hub_id}")
        model.push_to_hub(hub_id, tokenizer, save_method="merged_16bit")
        print(f"✅ Model live at: https://huggingface.co/{hub_id}")
    elif push:
        print("⚠️  --push requires --hub_id (e.g., --hub_id YourName/Alkaid-A)")


# =============================================================================
# MAIN
# =============================================================================

def main():
    parser = argparse.ArgumentParser(description="Train Alkaid A")
    parser.add_argument("--push", action="store_true", help="Push to Hugging Face Hub")
    parser.add_argument("--hub_id", type=str, default=None, help="Hub repo ID (e.g., YourName/Alkaid-A)")
    args = parser.parse_args()

    # Step 1: Load model
    model, tokenizer = setup_model()

    # Step 2: Prepare data
    dataset = prepare_datasets(tokenizer)

    # Step 3: Train
    trainer = train(model, tokenizer, dataset)

    # Step 4: Save & export
    save_model(model, tokenizer, push=args.push, hub_id=args.hub_id)

    print("\n" + "=" * 60)
    print("🎉 Alkaid A training pipeline complete!")
    print("=" * 60)
    print(f"  Checkpoints: {CONFIG['output_dir']}")
    print(f"  Final model: {CONFIG['final_model_dir']}")
    print(f"  GGUF export: {CONFIG['final_model_dir']}_gguf")
    if args.push and args.hub_id:
        print(f"  Hub: https://huggingface.co/{args.hub_id}")
    print("=" * 60)


if __name__ == "__main__":
    main()