# Qwen2.5-7B QLoRA Training on Colab

Google Colab Pro (A100) での学習用ノートブック

**推奨**: Colab Pro ($10/月) 以上、A100 GPU

## 1. 環境セットアップ

In [None]:
# GPU確認
!nvidia-smi

In [None]:
# Google Driveマウント(チェックポイント保存用)
from google.colab import drive
drive.mount('/content/drive')

# 作業ディレクトリ作成
!mkdir -p /content/drive/MyDrive/qwen-training/checkpoints
!mkdir -p /content/drive/MyDrive/qwen-training/output

In [None]:
# 依存関係インストール
!pip install -q torch==2.2.0 torchvision==0.17.0
!pip install -q transformers==4.46.0 datasets peft==0.13.0 trl==0.11.0
!pip install -q bitsandbytes accelerate huggingface_hub safetensors

In [None]:
# HuggingFaceログイン
from huggingface_hub import login
login() # トークンを入力

## 2. 設定

In [None]:
# 設定
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
OUTPUT_MODEL_ID = "hajimemat/qwen2.5-7b-glaive-fc-lora-colab" # 変更可
DATASET_NAME = "glaiveai/glaive-function-calling-v2"

# Google Driveに保存
CHECKPOINT_DIR = "/content/drive/MyDrive/qwen-training/checkpoints"
FINAL_OUTPUT_DIR = "/content/drive/MyDrive/qwen-training/output"

## 3. データセット準備

In [None]:
from datasets import load_dataset

def convert_glaive_to_chatml(example):
 parts = []
 if example.get("system"):
 parts.append(f"<|im_start|>system\n{example['system']}<|im_end|>")
 
 chat = example.get("chat", "")
 if chat:
 current_role = None
 current_content = []
 for line in chat.split("\n"):
 line = line.strip()
 if line.startswith("USER:"):
 if current_role and current_content:
 content = "\n".join(current_content).strip()
 if content:
 parts.append(f"<|im_start|>{current_role}\n{content}<|im_end|>")
 current_role = "user"
 current_content = [line[5:].strip()]
 elif line.startswith("ASSISTANT:"):
 if current_role and current_content:
 content = "\n".join(current_content).strip()
 if content:
 parts.append(f"<|im_start|>{current_role}\n{content}<|im_end|>")
 current_role = "assistant"
 current_content = [line[10:].strip()]
 elif current_role:
 current_content.append(line)
 if current_role and current_content:
 content = "\n".join(current_content).strip()
 if content:
 parts.append(f"<|im_start|>{current_role}\n{content}<|im_end|>")
 return {"text": "\n".join(parts)}

print(f"Loading dataset: {DATASET_NAME}")
dataset = load_dataset(DATASET_NAME, split="train")
print(f"Original: {len(dataset)} examples")

dataset = dataset.map(convert_glaive_to_chatml, remove_columns=dataset.column_names, num_proc=4)
dataset = dataset.filter(lambda x: len(x["text"]) > 50)
dataset = dataset.shuffle(seed=42)
split = dataset.train_test_split(test_size=0.02, seed=42)

print(f"Train: {len(split['train'])}, Test: {len(split['test'])}")

## 4. モデル準備

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# QLoRA量子化設定
bnb_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_compute_dtype=torch.bfloat16,
 bnb_4bit_quant_type="nf4",
 bnb_4bit_use_double_quant=True,
)

# LoRA設定
lora_config = LoraConfig(
 r=64,
 lora_alpha=16,
 lora_dropout=0.05,
 target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
 bias="none",
 task_type="CAUSAL_LM",
)

# トークナイザー
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.padding_side = "right"
if tokenizer.pad_token is None:
 tokenizer.pad_token = tokenizer.eos_token

# モデル
print(f"Loading model: {BASE_MODEL}")
model = AutoModelForCausalLM.from_pretrained(
 BASE_MODEL,
 quantization_config=bnb_config,
 device_map="auto",
 attn_implementation="sdpa",
 trust_remote_code=True,
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

## 5. 学習実行

In [None]:
from trl import SFTTrainer

training_args = TrainingArguments(
 output_dir=CHECKPOINT_DIR,
 num_train_epochs=1,
 per_device_train_batch_size=4,
 per_device_eval_batch_size=4,
 gradient_accumulation_steps=4,
 learning_rate=2e-4,
 weight_decay=0.01,
 warmup_ratio=0.03,
 lr_scheduler_type="cosine",
 optim="paged_adamw_8bit",
 bf16=True,
 logging_steps=10,
 save_steps=200,
 save_total_limit=3,
 eval_strategy="steps",
 eval_steps=200,
 report_to="none",
 gradient_checkpointing=True,
 save_safetensors=True,
)

trainer = SFTTrainer(
 model=model,
 train_dataset=split["train"],
 eval_dataset=split["test"],
 args=training_args,
 peft_config=lora_config,
 tokenizer=tokenizer,
 max_seq_length=1024,
 packing=False,
 dataset_text_field="text",
)

# チェックポイントから再開
import os
resume_from = None
if os.path.exists(CHECKPOINT_DIR):
 checkpoints = [d for d in os.listdir(CHECKPOINT_DIR) if d.startswith("checkpoint-")]
 if checkpoints:
 latest = max(checkpoints, key=lambda x: int(x.split("-")[1]))
 resume_from = os.path.join(CHECKPOINT_DIR, latest)
 print(f"Resuming from: {resume_from}")

# 学習開始
trainer.train(resume_from_checkpoint=resume_from)

## 6. 保存とアップロード

In [None]:
# ローカル保存
print(f"Saving to {FINAL_OUTPUT_DIR}")
trainer.save_model(FINAL_OUTPUT_DIR)
tokenizer.save_pretrained(FINAL_OUTPUT_DIR)

# HuggingFaceにアップロード
print(f"Uploading to {OUTPUT_MODEL_ID}")
try:
 trainer.model.push_to_hub(OUTPUT_MODEL_ID, private=True)
 tokenizer.push_to_hub(OUTPUT_MODEL_ID, private=True)
 print(f"Done! https://huggingface.co/{OUTPUT_MODEL_ID}")
except Exception as e:
 print(f"Upload failed: {e}")
 print("Model saved locally in Google Drive")

## 7. クイックテスト(オプション)

In [None]:
# 簡単な推論テスト
from peft import PeftModel

test_prompt = """<|im_start|>system
You are a helpful assistant with access to functions.
<|im_end|>
<|im_start|>user
What's the weather in Tokyo?
<|im_end|>
<|im_start|>assistant
"""

inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=False))