Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| from datasets import load_dataset | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| BitsAndBytesConfig, | |
| TrainingArguments, | |
| pipeline, | |
| ) | |
| from peft import LoraConfig, PeftModel | |
| from trl import SFTTrainer | |
| # Model to fine-tune - you can change this to any of the models you want to train | |
| # 'meta-llama/Meta-Llama-3-70B-Instruct' | |
| # 'meta-llama/Llama-3.3-70B-Instruct' | |
| # 'meta-llama/Meta-Llama-3-8B-Instruct' | |
| base_model = "meta-llama/Meta-Llama-3-8B-Instruct" | |
| new_model = "llama-3-8b-custom" # A name for your fine-tuned model | |
| # Load the datasets | |
| # Make sure your CSVs are in the same directory as this script | |
| dataset = load_dataset('csv', data_files=['data_training.csv', 'data_training_1.csv'], split="train") | |
| # 4-bit quantization configuration | |
| compute_dtype = getattr(torch, "float16") | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=compute_dtype, | |
| bnb_4bit_use_double_quant=False, | |
| ) | |
| # Load the base model | |
| model = AutoModelForCausalLM.from_pretrained( | |
| base_model, | |
| quantization_config=quant_config, | |
| device_map={"": 0}, | |
| token=os.environ.get("HF_TOKEN") # Get token from secrets | |
| ) | |
| model.config.use_cache = False | |
| model.config.pretraining_tp = 1 | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True, token=os.environ.get("HF_TOKEN")) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| tokenizer.padding_side = "right" | |
| # PEFT configuration for LoRA | |
| peft_params = LoraConfig( | |
| lora_alpha=16, | |
| lora_dropout=0.1, | |
| r=64, | |
| bias="none", | |
| task_type="CAUSAL_LM", | |
| ) | |
| # Training parameters | |
| training_params = TrainingArguments( | |
| output_dir="./results", | |
| num_train_epochs=1, | |
| per_device_train_batch_size=4, | |
| gradient_accumulation_steps=1, | |
| optim="paged_adamw_32bit", | |
| save_steps=25, | |
| logging_steps=25, | |
| learning_rate=2e-4, | |
| weight_decay=0.001, | |
| fp16=False, | |
| bf16=False, | |
| max_grad_norm=0.3, | |
| max_steps=-1, | |
| warmup_ratio=0.03, | |
| group_by_length=True, | |
| lr_scheduler_type="constant", | |
| report_to="tensorboard" | |
| ) | |
| # Create the trainer | |
| trainer = SFTTrainer( | |
| model=model, | |
| train_dataset=dataset, | |
| peft_config=peft_params, | |
| dataset_text_field="text", # IMPORTANT: Change "text" to the name of the column in your CSV that contains the training data | |
| max_seq_length=None, | |
| tokenizer=tokenizer, | |
| args=training_params, | |
| packing=False, | |
| ) | |
| # Train the model | |
| trainer.train() | |
| # Save the fine-tuned model | |
| trainer.model.save_pretrained(new_model) |