| | |
| | """ |
| | Training script for GoodGlinda-7B |
| | Simplified reproduction skeleton - I ran this for 72 hours straight on my i7-12700 + RTX 4060/5070 Ti Overclocked and Undervoltaged. |
| | At hour 14, this threw OOM errors until I fixed the 83°C thermal throttling with a paste replacement. |
| | Advised is to use Watercooled setup. |
| | """ |
| |
|
| | import torch |
| | import deepspeed |
| | from transformers import ( |
| | AutoModelForCausalLM, |
| | AutoTokenizer, |
| | TrainingArguments, |
| | Trainer |
| | ) |
| | from peft import LoraConfig, get_peft_model, TaskType |
| | import argparse |
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument("--model_name", type=str, default="Qwen/Qwen2.5-7B-Instruct") |
| | parser.add_argument("--output_dir", type=str, default="./output") |
| | parser.add_argument("--deepspeed", type=str, default=None) |
| | args = parser.parse_args() |
| | |
| | |
| | |
| | model = AutoModelForCausalLM.from_pretrained( |
| | args.model_name, |
| | load_in_4bit=True, |
| | bnb_4bit_quant_type="nf4", |
| | bnb_4bit_use_double_quant=True, |
| | torch_dtype=torch.bfloat16, |
| | device_map="auto" |
| | ) |
| | |
| | |
| | |
| | lora_config = LoraConfig( |
| | r=64, |
| | lora_alpha=16, |
| | target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], |
| | lora_dropout=0.05, |
| | bias="none", |
| | task_type=TaskType.CAUSAL_LM |
| | ) |
| | model = get_peft_model(model, lora_config) |
| | |
| | |
| | tokenizer = AutoTokenizer.from_pretrained(args.model_name) |
| | tokenizer.pad_token = tokenizer.eos_token |
| | |
| | |
| | |
| | |
| | training_args = TrainingArguments( |
| | output_dir=args.output_dir, |
| | num_train_epochs=3, |
| | per_device_train_batch_size=2, |
| | gradient_accumulation_steps=2, |
| | learning_rate=2e-4, |
| | warmup_steps=500, |
| | logging_steps=10, |
| | save_steps=500, |
| | bf16=True, |
| | deepspeed=args.deepspeed, |
| | gradient_checkpointing=True, |
| | optim="adamw_torch" |
| | ) |
| | |
| | print("Model loaded. Ready for training.") |
| | print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}") |
| | print("Warning: This is a simplified skeleton. I trained for 72h on 50k samples.") |
| | print("Watch your thermals. I hit 83°C at hour 14 and had to repaste.") |
| |
|
| | if __name__ == "__main__": |
| | main() |