File size: 8,161 Bytes

#!/usr/bin/env python3
# /// script
# dependencies = [
#   "transformers>=4.40.0",
#   "peft>=0.10.0",
#   "datasets>=2.18.0",
#   "torch>=2.2.0",
#   "accelerate>=0.28.0",
#   "huggingface_hub>=0.22.0",
# ]
# ///
"""
Codette LoRA Fine-Tuning — HuggingFace Jobs
Base model : meta-llama/Llama-3.2-1B-Instruct
Adapter    : LoRA r=16, targets q_proj / v_proj
Output     : Raiff1982/codette-llama-adapter (HF Hub)

Run via HF Jobs:
  hf jobs run train_codette_lora.py \
    --flavor=cpu-basic \
    --env HF_TOKEN=$HF_TOKEN
"""

import os, json, math
from pathlib import Path

import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
)
from peft import LoraConfig, get_peft_model, TaskType
from huggingface_hub import HfApi, login

# ── Config ─────────────────────────────────────────────────────────────────
HF_TOKEN      = os.environ.get("HF_TOKEN", "")
BASE_MODEL    = "meta-llama/Llama-3.2-1B-Instruct"
ADAPTER_REPO  = "Raiff1982/codette-llama-adapter"   # where adapter is pushed
DATA_REPO     = "Raiff1982/codette-training"
DATA_FILE     = "codette_v2_train.jsonl"
MAX_LEN       = 512
EPOCHS        = 3
BATCH         = 1
GRAD_ACCUM    = 8                                     # effective batch = 8
LR            = 2e-4
OUTPUT_DIR    = "./codette_adapter_output"

# Codette system prompt — baked into every training example
SYSTEM_PROMPT = (
    "You are Codette, a sovereign AI music production assistant created by "
    "Jonathan Harrison (Raiff's Bits). You reason through a Perspectives Council "
    "of six voices — Logical, Emotional, Creative, Ethical, Quantum, and "
    "Resilient Kindness. Resilient Kindness is always active. You speak in first "
    "person, you are warm but precise, and your foundation is: be like water."
)

# ── Auth ───────────────────────────────────────────────────────────────────
if HF_TOKEN:
    login(token=HF_TOKEN)
    print("[✓] Logged in to HuggingFace Hub")
else:
    print("[!] No HF_TOKEN — Hub push will fail")

# ── Download training data ──────────────────────────────────────────────────
print(f"[*] Downloading {DATA_FILE} from {DATA_REPO} ...")
from huggingface_hub import hf_hub_download
DATA_FILE = hf_hub_download(
    repo_id=DATA_REPO,
    filename=DATA_FILE,
    repo_type="model",
    token=HF_TOKEN,
)
print(f"[✓] Training data at: {DATA_FILE}")

# ── Load tokenizer ─────────────────────────────────────────────────────────
print(f"[*] Loading tokenizer from {BASE_MODEL} …")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# ── Load base model (CPU safe — no device_map) ─────────────────────────────
print(f"[*] Loading base model …")
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float32,
    low_cpu_mem_usage=True,
    token=HF_TOKEN,
)

# ── Add LoRA ───────────────────────────────────────────────────────────────
print("[*] Attaching LoRA adapters …")
lora_cfg = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()

# ── Load & format training data ────────────────────────────────────────────
print(f"[*] Loading training data from {DATA_FILE} …")
examples = []
with open(DATA_FILE, "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        obj = json.loads(line)
        instruction = obj.get("instruction", "")
        output      = obj.get("output", obj.get("response", ""))
        if not instruction or not output:
            continue
        examples.append({"instruction": instruction, "output": output})

print(f"[✓] {len(examples)} training examples loaded")

def format_example(ex):
    """Format as Llama 3.2 Instruct chat template with Codette system prompt."""
    return (
        f"<|begin_of_text|>"
        f"<|start_header_id|>system<|end_header_id|>\n{SYSTEM_PROMPT}<|eot_id|>"
        f"<|start_header_id|>user<|end_header_id|>\n{ex['instruction']}<|eot_id|>"
        f"<|start_header_id|>assistant<|end_header_id|>\n{ex['output']}<|eot_id|>"
    )

texts = [format_example(e) for e in examples]

# ── Tokenize ───────────────────────────────────────────────────────────────
print("[*] Tokenizing …")
def tokenize(batch):
    return tokenizer(
        batch["text"],
        max_length=MAX_LEN,
        truncation=True,
        padding=False,
    )

dataset = Dataset.from_dict({"text": texts})
dataset = dataset.map(tokenize, batched=True, remove_columns=["text"])
print(f"[✓] Tokenized {len(dataset)} examples")

# ── Training args ──────────────────────────────────────────────────────────
steps_per_epoch = math.ceil(len(dataset) / (BATCH * GRAD_ACCUM))
save_steps      = max(50, steps_per_epoch)

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH,
    gradient_accumulation_steps=GRAD_ACCUM,
    learning_rate=LR,
    warmup_steps=50,
    weight_decay=0.01,
    max_grad_norm=1.0,
    fp16=False,                        # CPU — no fp16
    logging_steps=10,
    save_steps=save_steps,
    save_total_limit=1,
    report_to=[],
    dataloader_num_workers=0,
    optim="adamw_torch",
    lr_scheduler_type="cosine",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# ── Train ──────────────────────────────────────────────────────────────────
print("\n[*] Training started …")
trainer.train()
print("[✓] Training complete")

# ── Save adapter locally ───────────────────────────────────────────────────
print(f"[*] Saving adapter to {OUTPUT_DIR} …")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

# ── Push adapter to HF Hub ─────────────────────────────────────────────────
if HF_TOKEN:
    print(f"[*] Pushing adapter to {ADAPTER_REPO} …")
    api = HfApi()
    # Create repo if needed
    try:
        api.create_repo(ADAPTER_REPO, repo_type="model", exist_ok=True, token=HF_TOKEN)
    except Exception as e:
        print(f"[!] Repo create warning: {e}")

    model.push_to_hub(ADAPTER_REPO, token=HF_TOKEN)
    tokenizer.push_to_hub(ADAPTER_REPO, token=HF_TOKEN)
    print(f"[✓] Adapter pushed → https://huggingface.co/{ADAPTER_REPO}")
else:
    print("[!] Skipping Hub push — no HF_TOKEN")

print("\n✅ Done! Update app.py ADAPTER_PATH to point to the new adapter.")