Spaces:
Runtime error
Runtime error
| import os | |
| import argparse | |
| import json | |
| from datetime import datetime | |
| from typing import Dict, List, Any | |
| try: | |
| import datasets | |
| from transformers import AutoTokenizer, TrainingArguments | |
| from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training | |
| from trl import SFTTrainer | |
| import torch | |
| except ImportError: | |
| print("Installing required packages...") | |
| import subprocess | |
| subprocess.check_call(["pip", "install", | |
| "transformers>=4.36.0", | |
| "peft>=0.7.0", | |
| "datasets>=2.14.0", | |
| "accelerate>=0.25.0", | |
| "trl>=0.7.1", | |
| "bitsandbytes>=0.40.0", | |
| "torch>=2.0.0"]) | |
| import datasets | |
| from transformers import AutoTokenizer, TrainingArguments | |
| from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training | |
| from trl import SFTTrainer | |
| import torch | |
| def load_model_and_tokenizer(model_name_or_path: str, | |
| adapter_path: str = None, | |
| quantize: bool = True, | |
| token: str = None): | |
| """ | |
| Load the model and tokenizer, with optional adapter and quantization. | |
| This will load the model in 4-bit quantization by default (which is needed | |
| for such a large model) and can optionally load an existing adapter. | |
| """ | |
| from transformers import BitsAndBytesConfig, AutoModelForCausalLM | |
| print(f"Loading model: {model_name_or_path}") | |
| # Configure for quantization | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_4bit=quantize, | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_use_double_quant=True | |
| ) if quantize else None | |
| # Load the model | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name_or_path, | |
| quantization_config=quantization_config, | |
| device_map="auto", | |
| token=token | |
| ) | |
| # Load adapter if provided | |
| if adapter_path: | |
| print(f"Loading adapter from {adapter_path}") | |
| from peft import PeftModel | |
| model = PeftModel.from_pretrained(model, adapter_path) | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, token=token) | |
| # Ensure we have a pad token | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| return model, tokenizer | |
| def prepare_dataset(data_path: str): | |
| """Load and prepare datasets from JSON files.""" | |
| # Load datasets | |
| if os.path.isdir(data_path): | |
| train_path = os.path.join(data_path, "train.json") | |
| val_path = os.path.join(data_path, "validation.json") | |
| if not (os.path.exists(train_path) and os.path.exists(val_path)): | |
| raise ValueError(f"Training data files not found in {data_path}") | |
| else: | |
| raise ValueError(f"Data path {data_path} is not a directory") | |
| # Load JSON files | |
| with open(train_path, 'r', encoding='utf-8') as f: | |
| train_data = json.load(f) | |
| with open(val_path, 'r', encoding='utf-8') as f: | |
| val_data = json.load(f) | |
| # Convert to datasets | |
| train_dataset = datasets.Dataset.from_list(train_data) | |
| eval_dataset = datasets.Dataset.from_list(val_data) | |
| print(f"Loaded {len(train_dataset)} training examples and {len(eval_dataset)} validation examples") | |
| return train_dataset, eval_dataset | |
| def finetune( | |
| model_name: str, | |
| dataset_path: str, | |
| output_dir: str, | |
| hub_model_id: str = None, | |
| hf_token: str = None, | |
| use_peft: bool = True, | |
| num_train_epochs: int = 3, | |
| learning_rate: float = 2e-5, | |
| bf16: bool = True, | |
| quantize: bool = True, | |
| max_seq_length: int = 2048, | |
| gradient_accumulation_steps: int = 2 | |
| ): | |
| """Fine-tune the model with PEFT on the provided dataset.""" | |
| # Set up output directory | |
| if not output_dir: | |
| output_dir = f"llama3-finetuned-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Load datasets | |
| train_dataset, eval_dataset = prepare_dataset(dataset_path) | |
| # Load base model | |
| model, tokenizer = load_model_and_tokenizer( | |
| model_name, | |
| quantize=quantize, | |
| token=hf_token | |
| ) | |
| # Set up PEFT configuration if using PEFT | |
| if use_peft: | |
| print("Setting up PEFT (Parameter-Efficient Fine-Tuning)") | |
| # Prepare model for k-bit training if quantized | |
| if quantize: | |
| model = prepare_model_for_kbit_training(model) | |
| # Set up LoRA configuration | |
| peft_config = LoraConfig( | |
| r=16, # Rank dimension | |
| lora_alpha=32, # Scale parameter | |
| lora_dropout=0.05, | |
| bias="none", | |
| task_type="CAUSAL_LM", | |
| target_modules=[ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| ) | |
| else: | |
| peft_config = None | |
| # Training arguments | |
| training_args = TrainingArguments( | |
| output_dir=output_dir, | |
| num_train_epochs=num_train_epochs, | |
| per_device_train_batch_size=1, # Adjust based on GPU memory | |
| gradient_accumulation_steps=gradient_accumulation_steps, | |
| learning_rate=learning_rate, | |
| weight_decay=0.01, | |
| max_grad_norm=0.3, | |
| logging_steps=10, | |
| optim="paged_adamw_32bit", | |
| lr_scheduler_type="cosine", | |
| warmup_ratio=0.03, | |
| evaluation_strategy="steps", | |
| eval_steps=0.1, # Evaluate every 10% of training | |
| save_strategy="steps", | |
| save_steps=0.1, # Save every 10% of training | |
| save_total_limit=3, | |
| bf16=bf16, # Use bfloat16 precision if available | |
| push_to_hub=bool(hub_model_id), | |
| hub_model_id=hub_model_id, | |
| hub_token=hf_token, | |
| ) | |
| # Initialize the SFT trainer | |
| trainer = SFTTrainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| eval_dataset=eval_dataset, | |
| peft_config=peft_config, | |
| tokenizer=tokenizer, | |
| max_seq_length=max_seq_length, | |
| ) | |
| # Train the model | |
| print("Starting training...") | |
| trainer.train() | |
| # Save the fine-tuned model | |
| print(f"Saving model to {output_dir}") | |
| trainer.save_model() | |
| # Push to hub if specified | |
| if hub_model_id and hf_token: | |
| print(f"Pushing model to Hugging Face Hub: {hub_model_id}") | |
| trainer.push_to_hub() | |
| return output_dir | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Fine-tune Llama 3.3 with your data") | |
| parser.add_argument("--model_name", type=str, default="nvidia/Llama-3_3-Nemotron-Super-49B-v1", | |
| help="Base model to fine-tune") | |
| parser.add_argument("--dataset_path", type=str, required=True, | |
| help="Path to the directory containing train.json and validation.json") | |
| parser.add_argument("--output_dir", type=str, default=None, | |
| help="Directory to save the fine-tuned model") | |
| parser.add_argument("--hub_model_id", type=str, default=None, | |
| help="Hugging Face Hub model ID to push the model to") | |
| parser.add_argument("--hf_token", type=str, default=None, | |
| help="Hugging Face token for accessing gated models and pushing to hub") | |
| parser.add_argument("--no_peft", action='store_true', | |
| help="Disable PEFT/LoRA (not recommended for large models)") | |
| parser.add_argument("--no_quantize", action='store_true', | |
| help="Disable quantization (requires much more VRAM)") | |
| parser.add_argument("--no_bf16", action='store_true', | |
| help="Disable bf16 precision") | |
| parser.add_argument("--epochs", type=int, default=3, | |
| help="Number of training epochs") | |
| parser.add_argument("--learning_rate", type=float, default=2e-5, | |
| help="Learning rate") | |
| parser.add_argument("--max_seq_length", type=int, default=2048, | |
| help="Maximum sequence length for training") | |
| parser.add_argument("--gradient_accumulation_steps", type=int, default=2, | |
| help="Gradient accumulation steps") | |
| args = parser.parse_args() | |
| # Get token from environment if not provided | |
| hf_token = args.hf_token or os.environ.get("HF_TOKEN") | |
| finetune( | |
| model_name=args.model_name, | |
| dataset_path=args.dataset_path, | |
| output_dir=args.output_dir, | |
| hub_model_id=args.hub_model_id, | |
| hf_token=hf_token, | |
| use_peft=not args.no_peft, | |
| num_train_epochs=args.epochs, | |
| learning_rate=args.learning_rate, | |
| bf16=not args.no_bf16, | |
| quantize=not args.no_quantize, | |
| max_seq_length=args.max_seq_length, | |
| gradient_accumulation_steps=args.gradient_accumulation_steps | |
| ) |