| |
| import torch |
| from transformers import ( |
| AutoTokenizer, |
| AutoModelForCausalLM, |
| TrainingArguments, |
| Trainer, |
| BitsAndBytesConfig, |
| GPT2Tokenizer |
| ) |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
| from datasets import load_from_disk |
| from dataclasses import dataclass |
| from typing import Any, Dict, List |
| import wandb |
| import os |
| import warnings |
| warnings.filterwarnings('ignore') |
|
|
| print("=" * 80) |
| print("LFM2-2.6B FINE-TUNING - FIXED VERSION") |
| print("=" * 80) |
| print(f"PyTorch: {torch.__version__}") |
| print(f"CUDA: {torch.cuda.is_available()}") |
| print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}") |
|
|
| if torch.cuda.is_available(): |
| gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3) |
| print(f"GPU Memory: {gpu_memory_gb:.1f} GB") |
|
|
| import bitsandbytes as bnb |
| print("β
BitsAndBytes OK") |
|
|
| |
| wandb.init( |
| project="liquid-ai-hackathon-kokorochat", |
| name="LFM2-2.6B-counselor-FIXED", |
| config={ |
| "model": "LFM2-2.6B", |
| "dataset": "KokoroChat-MultiTurn", |
| "task": "psychological-counseling" |
| } |
| ) |
|
|
| print("\n" + "=" * 80) |
| print("LOADING MODEL (WITH FALLBACK)") |
| print("=" * 80) |
|
|
| LOCAL_MODEL_PATH = "./models/LFM2-2.6B" |
| HF_MODEL_NAME = "LiquidAI/LFM2-2.6B" |
|
|
| |
| print("\n1. Loading tokenizer...") |
| try: |
| tokenizer = AutoTokenizer.from_pretrained( |
| LOCAL_MODEL_PATH, |
| trust_remote_code=True, |
| local_files_only=True |
| ) |
| print(" β
LFM2 tokenizer loaded!") |
| except Exception as e: |
| print(f" β οΈ LFM2 tokenizer failed") |
| print(" π Using GPT2 tokenizer...") |
| tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
| print(" β
GPT2 tokenizer loaded!") |
|
|
| tokenizer.pad_token = tokenizer.eos_token |
| tokenizer.padding_side = "right" |
|
|
| |
| print("\n2. Configuring QLoRA...") |
| bnb_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_quant_type="nf4", |
| bnb_4bit_compute_dtype=torch.bfloat16, |
| bnb_4bit_use_double_quant=True, |
| ) |
|
|
| |
| print("\n3. Loading LFM2-2.6B model...") |
|
|
| |
| print(" π₯ Checking for custom model files...") |
|
|
| |
| custom_files = ["modeling_lfm2.py", "configuration_lfm2.py"] |
| has_custom_files = all( |
| os.path.exists(os.path.join(LOCAL_MODEL_PATH, f)) |
| for f in custom_files |
| ) |
|
|
| if not has_custom_files: |
| print(" β οΈ Custom model files missing in local directory") |
| print(" π₯ Need to download from HuggingFace with custom code...") |
| |
| |
| from huggingface_hub import snapshot_download |
| |
| print(" β³ Downloading model with custom code (one-time)...") |
| snapshot_download( |
| repo_id=HF_MODEL_NAME, |
| local_dir=LOCAL_MODEL_PATH, |
| local_dir_use_symlinks=False, |
| ignore_patterns=[] |
| ) |
| print(" β
Model downloaded with custom code!") |
|
|
| |
| print(" β³ Loading model (~2-4 minutes)...") |
|
|
| try: |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| LOCAL_MODEL_PATH, |
| quantization_config=bnb_config, |
| device_map="auto", |
| trust_remote_code=True, |
| torch_dtype=torch.bfloat16, |
| local_files_only=False |
| ) |
| print(" β
Model loaded from local!") |
| |
| except Exception as e: |
| print(f" β οΈ Local load failed: {str(e)[:100]}") |
| print(" π₯ Loading directly from HuggingFace...") |
| |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| HF_MODEL_NAME, |
| quantization_config=bnb_config, |
| device_map="auto", |
| trust_remote_code=True, |
| torch_dtype=torch.bfloat16 |
| ) |
| print(" β
Model loaded from HuggingFace!") |
|
|
| model = prepare_model_for_kbit_training(model) |
| model.config.use_cache = False |
| print(" β
Model prepared!") |
|
|
| |
| print("\n4. Applying LoRA (2.6B config)...") |
| lora_config = LoraConfig( |
| r=64, |
| lora_alpha=128, |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj"], |
| lora_dropout=0.05, |
| bias="none", |
| task_type="CAUSAL_LM" |
| ) |
|
|
| model = get_peft_model(model, lora_config) |
| print("\nπ Trainable Parameters:") |
| model.print_trainable_parameters() |
|
|
| |
| print("\n5. Loading dataset...") |
| dataset = load_from_disk("./kokorochat_processed_multiturn") |
| print(f" β
Training: {len(dataset['train']):,}, Val: {len(dataset['test']):,}") |
|
|
| |
| @dataclass |
| class DataCollatorForCausalLM: |
| tokenizer: Any |
| max_length: int = 2048 |
| |
| def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: |
| texts = [f["text"] for f in features] |
| batch = self.tokenizer( |
| texts, |
| max_length=self.max_length, |
| padding=True, |
| truncation=True, |
| return_tensors="pt" |
| ) |
| batch["labels"] = batch["input_ids"].clone() |
| batch["labels"][batch["labels"] == self.tokenizer.pad_token_id] = -100 |
| return batch |
|
|
| data_collator = DataCollatorForCausalLM(tokenizer=tokenizer) |
|
|
| |
| print("\n6. Configuring training (2.6B optimized)...") |
|
|
| gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3) |
|
|
| if gpu_memory_gb >= 70: |
| per_device_batch = 2 |
| grad_accum = 16 |
| print(f" π {gpu_memory_gb:.0f}GB GPU β batch=2, accum=16") |
| else: |
| per_device_batch = 1 |
| grad_accum = 32 |
| print(f" β‘ {gpu_memory_gb:.0f}GB GPU β batch=1, accum=32") |
|
|
| training_args = TrainingArguments( |
| output_dir="./lfm2-2.6b-checkpoints-fixed", |
| |
| |
| per_device_train_batch_size=per_device_batch, |
| per_device_eval_batch_size=per_device_batch, |
| gradient_accumulation_steps=grad_accum, |
| |
| |
| num_train_epochs=3, |
| learning_rate=2e-4, |
| warmup_steps=200, |
| lr_scheduler_type="cosine", |
| |
| |
| fp16=False, |
| bf16=True, |
| logging_steps=10, |
| eval_strategy="steps", |
| eval_steps=50, |
| save_strategy="steps", |
| save_steps=100, |
| save_total_limit=5, |
| load_best_model_at_end=True, |
| metric_for_best_model="eval_loss", |
| optim="paged_adamw_8bit", |
| report_to="wandb", |
| gradient_checkpointing=True, |
| max_grad_norm=0.3, |
| logging_dir="./logs", |
| remove_unused_columns=False, |
| dataloader_num_workers=4, |
| dataloader_pin_memory=True, |
| ) |
|
|
| effective_batch = per_device_batch * grad_accum |
| steps_per_epoch = len(dataset['train']) // effective_batch |
| total_steps = steps_per_epoch * 3 |
|
|
| print("\n" + "=" * 80) |
| print("π 2.6B TRAINING CONFIGURATION") |
| print("=" * 80) |
| print(f"\nβ
Batch Config:") |
| print(f" Per-device: {per_device_batch}") |
| print(f" Gradient accum: {grad_accum}") |
| print(f" β Effective: {effective_batch}") |
|
|
| print(f"\nβ
Learning Config:") |
| print(f" Learning rate: 2e-4 (vs 3e-4 for 1.2B)") |
| print(f" Epochs: 3 (vs 4 for 1.2B)") |
| print(f" LoRA rank: 64 (vs 32 for 1.2B)") |
|
|
| print(f"\nβ
Training Stats:") |
| print(f" Training samples: {len(dataset['train']):,}") |
| print(f" Steps per epoch: {steps_per_epoch:,}") |
| print(f" Total steps: {total_steps:,}") |
|
|
| print(f"\nβ±οΈ Estimated Time:") |
| if gpu_memory_gb >= 80: |
| print(f" ~5-8 hours on {gpu_memory_gb:.0f}GB GPU") |
| else: |
| print(f" ~8-12 hours on {gpu_memory_gb:.0f}GB GPU") |
|
|
| |
| trainer = Trainer( |
| model=model, |
| args=training_args, |
| train_dataset=dataset["train"], |
| eval_dataset=dataset["test"], |
| data_collator=data_collator, |
| ) |
|
|
| |
| print("\n" + "=" * 80) |
| print("π STARTING 2.6B TRAINING") |
| print("=" * 80) |
| print(f"π Monitor: https://wandb.ai/sandeeptechiot-ai/liquid-ai-hackathon-kokorochat\n") |
|
|
| try: |
| trainer.train() |
| print("\nβ
TRAINING COMPLETE!") |
| |
| except KeyboardInterrupt: |
| print("\nβ οΈ Interrupted - saving...") |
| trainer.save_model("./lfm2-2.6b-interrupted") |
| |
| except Exception as e: |
| print(f"\nβ Error: {e}") |
| import traceback |
| traceback.print_exc() |
| raise |
|
|
| |
| output_dir = "./lfm2-2.6b-counselor-final" |
| lora_dir = "./lfm2-2.6b-counselor-lora" |
|
|
| trainer.save_model(output_dir) |
| tokenizer.save_pretrained(output_dir) |
| model.save_pretrained(lora_dir) |
|
|
| print(f"\nβ
Model saved to: {output_dir}") |
|
|
| wandb.finish() |
|
|
| print("\n" + "=" * 80) |
| print("π 2.6B TRAINING COMPLETE!") |
| print("=" * 80) |
|
|