lfm_complete_code / finetune_lfm2.6b.py
Techiiot's picture
Upload folder using huggingface_hub
0446288 verified
# finetune_lfm2_2.6b_FIXED.py
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
Trainer,
BitsAndBytesConfig,
GPT2Tokenizer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_from_disk
from dataclasses import dataclass
from typing import Any, Dict, List
import wandb
import os
import warnings
warnings.filterwarnings('ignore')
print("=" * 80)
print("LFM2-2.6B FINE-TUNING - FIXED VERSION")
print("=" * 80)
print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")
if torch.cuda.is_available():
gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
print(f"GPU Memory: {gpu_memory_gb:.1f} GB")
import bitsandbytes as bnb
print("βœ… BitsAndBytes OK")
# Initialize W&B
wandb.init(
project="liquid-ai-hackathon-kokorochat",
name="LFM2-2.6B-counselor-FIXED",
config={
"model": "LFM2-2.6B",
"dataset": "KokoroChat-MultiTurn",
"task": "psychological-counseling"
}
)
print("\n" + "=" * 80)
print("LOADING MODEL (WITH FALLBACK)")
print("=" * 80)
LOCAL_MODEL_PATH = "./models/LFM2-2.6B"
HF_MODEL_NAME = "LiquidAI/LFM2-2.6B"
# 1. Load tokenizer with GPT2 fallback
print("\n1. Loading tokenizer...")
try:
tokenizer = AutoTokenizer.from_pretrained(
LOCAL_MODEL_PATH,
trust_remote_code=True,
local_files_only=True
)
print(" βœ… LFM2 tokenizer loaded!")
except Exception as e:
print(f" ⚠️ LFM2 tokenizer failed")
print(" πŸ”„ Using GPT2 tokenizer...")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
print(" βœ… GPT2 tokenizer loaded!")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# 2. QLoRA config
print("\n2. Configuring QLoRA...")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
# 3. Load model with proper fallback
print("\n3. Loading LFM2-2.6B model...")
# First, try to ensure we have the custom model files
print(" πŸ“₯ Checking for custom model files...")
# Check if modeling files exist
custom_files = ["modeling_lfm2.py", "configuration_lfm2.py"]
has_custom_files = all(
os.path.exists(os.path.join(LOCAL_MODEL_PATH, f))
for f in custom_files
)
if not has_custom_files:
print(" ⚠️ Custom model files missing in local directory")
print(" πŸ“₯ Need to download from HuggingFace with custom code...")
# Download with custom code
from huggingface_hub import snapshot_download
print(" ⏳ Downloading model with custom code (one-time)...")
snapshot_download(
repo_id=HF_MODEL_NAME,
local_dir=LOCAL_MODEL_PATH,
local_dir_use_symlinks=False,
ignore_patterns=[] # Don't ignore anything
)
print(" βœ… Model downloaded with custom code!")
# Now load the model
print(" ⏳ Loading model (~2-4 minutes)...")
try:
# Try local first with trust_remote_code
model = AutoModelForCausalLM.from_pretrained(
LOCAL_MODEL_PATH,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True, # CRITICAL!
torch_dtype=torch.bfloat16,
local_files_only=False # Allow downloading custom code if needed
)
print(" βœ… Model loaded from local!")
except Exception as e:
print(f" ⚠️ Local load failed: {str(e)[:100]}")
print(" πŸ“₯ Loading directly from HuggingFace...")
# Load from HuggingFace Hub
model = AutoModelForCausalLM.from_pretrained(
HF_MODEL_NAME,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
torch_dtype=torch.bfloat16
)
print(" βœ… Model loaded from HuggingFace!")
model = prepare_model_for_kbit_training(model)
model.config.use_cache = False
print(" βœ… Model prepared!")
# 4. LoRA - 2.6B configuration
print("\n4. Applying LoRA (2.6B config)...")
lora_config = LoraConfig(
r=64, # Higher for 2.6B
lora_alpha=128,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
print("\nπŸ“Š Trainable Parameters:")
model.print_trainable_parameters()
# 5. Load dataset
print("\n5. Loading dataset...")
dataset = load_from_disk("./kokorochat_processed_multiturn")
print(f" βœ… Training: {len(dataset['train']):,}, Val: {len(dataset['test']):,}")
# 6. Data Collator (same as 1.2B)
@dataclass
class DataCollatorForCausalLM:
tokenizer: Any
max_length: int = 2048
def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
texts = [f["text"] for f in features]
batch = self.tokenizer(
texts,
max_length=self.max_length,
padding=True,
truncation=True,
return_tensors="pt"
)
batch["labels"] = batch["input_ids"].clone()
batch["labels"][batch["labels"] == self.tokenizer.pad_token_id] = -100
return batch
data_collator = DataCollatorForCausalLM(tokenizer=tokenizer)
# 7. Training Configuration - 2.6B optimized
print("\n6. Configuring training (2.6B optimized)...")
gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
if gpu_memory_gb >= 70:
per_device_batch = 2
grad_accum = 16
print(f" πŸš€ {gpu_memory_gb:.0f}GB GPU β†’ batch=2, accum=16")
else:
per_device_batch = 1
grad_accum = 32
print(f" ⚑ {gpu_memory_gb:.0f}GB GPU β†’ batch=1, accum=32")
training_args = TrainingArguments(
output_dir="./lfm2-2.6b-checkpoints-fixed",
# Batch (memory-adjusted for 2.6B)
per_device_train_batch_size=per_device_batch,
per_device_eval_batch_size=per_device_batch,
gradient_accumulation_steps=grad_accum,
# Learning (optimized for 2.6B)
num_train_epochs=3, # 2.6B learns faster
learning_rate=2e-4, # Lower for stability
warmup_steps=200,
lr_scheduler_type="cosine",
# Optimization
fp16=False,
bf16=True,
logging_steps=10,
eval_strategy="steps",
eval_steps=50,
save_strategy="steps",
save_steps=100,
save_total_limit=5,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
optim="paged_adamw_8bit",
report_to="wandb",
gradient_checkpointing=True,
max_grad_norm=0.3,
logging_dir="./logs",
remove_unused_columns=False,
dataloader_num_workers=4,
dataloader_pin_memory=True,
)
effective_batch = per_device_batch * grad_accum
steps_per_epoch = len(dataset['train']) // effective_batch
total_steps = steps_per_epoch * 3
print("\n" + "=" * 80)
print("πŸ“Š 2.6B TRAINING CONFIGURATION")
print("=" * 80)
print(f"\nβœ… Batch Config:")
print(f" Per-device: {per_device_batch}")
print(f" Gradient accum: {grad_accum}")
print(f" β†’ Effective: {effective_batch}")
print(f"\nβœ… Learning Config:")
print(f" Learning rate: 2e-4 (vs 3e-4 for 1.2B)")
print(f" Epochs: 3 (vs 4 for 1.2B)")
print(f" LoRA rank: 64 (vs 32 for 1.2B)")
print(f"\nβœ… Training Stats:")
print(f" Training samples: {len(dataset['train']):,}")
print(f" Steps per epoch: {steps_per_epoch:,}")
print(f" Total steps: {total_steps:,}")
print(f"\n⏱️ Estimated Time:")
if gpu_memory_gb >= 80:
print(f" ~5-8 hours on {gpu_memory_gb:.0f}GB GPU")
else:
print(f" ~8-12 hours on {gpu_memory_gb:.0f}GB GPU")
# 8. Trainer (same as 1.2B)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
data_collator=data_collator,
)
# 9. Start training
print("\n" + "=" * 80)
print("πŸš€ STARTING 2.6B TRAINING")
print("=" * 80)
print(f"πŸ“Š Monitor: https://wandb.ai/sandeeptechiot-ai/liquid-ai-hackathon-kokorochat\n")
try:
trainer.train()
print("\nβœ… TRAINING COMPLETE!")
except KeyboardInterrupt:
print("\n⚠️ Interrupted - saving...")
trainer.save_model("./lfm2-2.6b-interrupted")
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
raise
# 10. Save
output_dir = "./lfm2-2.6b-counselor-final"
lora_dir = "./lfm2-2.6b-counselor-lora"
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
model.save_pretrained(lora_dir)
print(f"\nβœ… Model saved to: {output_dir}")
wandb.finish()
print("\n" + "=" * 80)
print("πŸŽ‰ 2.6B TRAINING COMPLETE!")
print("=" * 80)