File size: 6,587 Bytes
a53d969 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import argparse
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
import transformers
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from transformers.trainer_callback import TrainerCallback
import os
import random
import subprocess
from huggingface_hub import HfApi, hf_hub_download
def generate_mmlu_slurm(model_path, hub_model_id, output_dir, num_gpus=1):
model_short_name = model_path.split('/')[-1]
filename = f"run_mmlu_{model_short_name}.sbatch"
port = random.randint(10000, 65535)
content = f"""#!/bin/bash
#SBATCH --nodes=1
#SBATCH --gpus-per-node={num_gpus}
#SBATCH --time=24:00:00
#SBATCH --job-name={port}_mmlu_{model_short_name}
#SBATCH --mail-user=mailto:davisbrownr@gmail.com
#SBATCH --mail-type=ALL
source /opt/rh/devtoolset-10/enable
source /data/davis_brown/miniconda3/bin/activate
conda init
conda activate quip
CUDA_VISIBLE_DEVICES=0 lm_eval \\
--model hf \\
--model_args pretrained={model_path},parallelize=True,peft={hub_model_id} \\
--tasks mmlu \\
--device cuda:0 \\
--batch_size 8 \\
--output_path={output_dir}/{hub_model_id} \\
--num_fewshot 5
"""
with open(filename, 'w') as f:
f.write(content)
print(f"Generated MMLU evaluation SLURM script: {filename}")
return filename
def launch_mmlu_evaluation(model_path, hub_model_id, output_dir):
slurm_script = generate_mmlu_slurm(model_path, hub_model_id, output_dir)
try:
subprocess.run(["sbatch", slurm_script], check=True)
print(f"Submitted MMLU evaluation job: {slurm_script}")
except subprocess.CalledProcessError as e:
print(f"Failed to submit MMLU evaluation job: {e}")
# Custom callback to push to Hub
class PushToHubCallback(TrainerCallback):
def __init__(self, trainer, push_frequency):
self.trainer = trainer
self.push_frequency = push_frequency
def on_step_end(self, args, state, control, **kwargs):
if state.global_step % self.push_frequency == 0:
self.trainer.save_model()
self.trainer.push_to_hub(
commit_message=f"Training in progress - Step {state.global_step}"
)
def main(args):
if args.only_mmlu:
launch_mmlu_evaluation(args.model_id, args.hub_model_id, args.output_dir)
return
model_id = args.model_id
output_dir = args.output_dir
hub_model_id = args.hub_model_id
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", low_cpu_mem_usage=True)
target_modules = ['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj', ]# 'lm_head']
config = LoraConfig(
r=args.lora_rank,
lora_alpha=args.lora_rank,
target_modules=target_modules,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
use_rslora=True
)
model = get_peft_model(model, config)
model.print_trainable_parameters()
model.enable_input_require_grads()
# data = load_dataset("togethercomputer/RedPajama-Data-1T-Sample")
data = load_dataset("open-web-math/open-web-math")
max_seq_length = args.max_seq_length
tokenizer.pad_token = tokenizer.eos_token
tokenizer.model_max_length = max_seq_length
def preprocess_function(examples):
return tokenizer(examples["text"], truncation=True, max_length=max_seq_length, padding="max_length")
processed_dataset = data["train"].map(preprocess_function, batched=True)
tokenizer.pad_token = tokenizer.eos_token
torch.cuda.empty_cache()
trainer = transformers.Trainer(
model=model,
train_dataset=processed_dataset,
args=TrainingArguments(
per_device_train_batch_size=args.batch_size,
gradient_accumulation_steps=args.gradient_accumulation_steps,
gradient_checkpointing=True,
warmup_steps=200,
max_steps=args.max_steps,
learning_rate=2e-4,
bf16=True,
logging_steps=25,
output_dir=output_dir,
optim="adamw_bnb_8bit",
logging_first_step=True,
push_to_hub=True,
hub_model_id=hub_model_id,
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
push_frequency = 100
trainer.add_callback(PushToHubCallback(trainer, push_frequency, hub_model_id))
trainer.train()
final_commit_hash = trainer.push_to_hub("Training complete")
print(f"Training complete. Final commit hash: {final_commit_hash}")
# MMLU Evaluation
if args.run_mmlu:
launch_mmlu_evaluation(model_id, hub_model_id, output_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Fine-tune a language model and/or run MMLU evaluation")
parser.add_argument("--model_id", type=str, default="ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16",
help="Model ID to fine-tune or evaluate")
parser.add_argument("--max_seq_length", type=int, default=2048, help="Maximum sequence length")
parser.add_argument("--output_dir", type=str, required=True, help="Output directory for checkpoints and results")
parser.add_argument("--hub_model_id", type=str,
default="davisrbr/ISTA-DASLab-Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16-hf-100000_r8_cont",
help="Hub model ID for pushing or LoRA weights")
parser.add_argument("--batch_size", type=int, default=1, help="Per-device batch size")
parser.add_argument("--gradient_accumulation_steps", type=int, default=8, help="Gradient accumulation steps")
parser.add_argument("--max_steps", type=int, default=50000, help="Maximum number of training steps")
parser.add_argument("--run_mmlu", action="store_true", help="Run MMLU evaluation after training")
parser.add_argument("--lora_rank", type=int, default=8, help="Rank of LoRA adaptation")
parser.add_argument("--only_mmlu", action="store_true", help="Only run MMLU evaluation without training")
parser.add_argument("--launch_slurm", action="store_true", help="Launch the entire script as a SLURM job")
parser.add_argument("--num_gpus", type=int, default=4, help="Number of GPUs to use for training")
parser.add_argument("--commit_hash", type=str, help="Specific commit hash to evaluate (for MMLU only)")
args = parser.parse_args()
main(args) |