Pista1981 commited on
Commit
5126455
·
verified ·
1 Parent(s): 06529f3

Upload train.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train.py +107 -0
train.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 🔧 LoRA Training Script
4
+ Generated by: MLResearcher (Hivemind Colony)
5
+ Adapter: hivemind-code-6440183e
6
+ Base Model: microsoft/Phi-3-mini-4k-instruct
7
+ Task: code
8
+ """
9
+
10
+ import torch
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
12
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
13
+ from datasets import load_dataset
14
+ from trl import SFTTrainer
15
+ import bitsandbytes as bnb
16
+
17
+ # ============ CONFIG ============
18
+ BASE_MODEL = "microsoft/Phi-3-mini-4k-instruct"
19
+ ADAPTER_NAME = "hivemind-code-6440183e"
20
+
21
+ # LoRA Configuration
22
+ lora_config = LoraConfig(
23
+ r=8,
24
+ lora_alpha=16,
25
+ lora_dropout=0.05,
26
+ target_modules=['q_proj', 'v_proj'],
27
+ bias="none",
28
+ task_type="CAUSAL_LM"
29
+ )
30
+
31
+ # Training Configuration
32
+ training_args = TrainingArguments(
33
+ output_dir=f"./{ADAPTER_NAME}",
34
+ num_train_epochs=1,
35
+ per_device_train_batch_size=2,
36
+ gradient_accumulation_steps=4,
37
+ learning_rate=5e-05,
38
+ weight_decay=0.01,
39
+ warmup_ratio=0.03,
40
+ lr_scheduler_type="cosine",
41
+ logging_steps=10,
42
+ save_strategy="epoch",
43
+ fp16=True,
44
+ optim="paged_adamw_8bit",
45
+ report_to="none"
46
+ )
47
+
48
+ # ============ LOAD MODEL ============
49
+ print(f"Loading {BASE_MODEL}...")
50
+
51
+ # 4-bit quantization for QLoRA
52
+ from transformers import BitsAndBytesConfig
53
+
54
+ bnb_config = BitsAndBytesConfig(
55
+ load_in_4bit=True,
56
+ bnb_4bit_quant_type="nf4",
57
+ bnb_4bit_compute_dtype=torch.bfloat16,
58
+ bnb_4bit_use_double_quant=True
59
+ )
60
+
61
+ model = AutoModelForCausalLM.from_pretrained(
62
+ BASE_MODEL,
63
+ quantization_config=bnb_config,
64
+ device_map="auto",
65
+ trust_remote_code=True
66
+ )
67
+
68
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
69
+ tokenizer.pad_token = tokenizer.eos_token
70
+
71
+ # Prepare model for training
72
+ model = prepare_model_for_kbit_training(model)
73
+ model = get_peft_model(model, lora_config)
74
+
75
+ print(f"Trainable parameters: {model.print_trainable_parameters()}")
76
+
77
+ # ============ LOAD DATASET ============
78
+ # Replace with your dataset
79
+ dataset = load_dataset("your-dataset-here", split="train")
80
+
81
+ def format_prompt(example):
82
+ return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}"
83
+
84
+ # ============ TRAIN ============
85
+ trainer = SFTTrainer(
86
+ model=model,
87
+ train_dataset=dataset,
88
+ tokenizer=tokenizer,
89
+ args=training_args,
90
+ max_seq_length=4096,
91
+ formatting_func=format_prompt,
92
+ packing=True
93
+ )
94
+
95
+ print("Starting training...")
96
+ trainer.train()
97
+
98
+ # ============ SAVE ============
99
+ print(f"Saving adapter to ./{ADAPTER_NAME}")
100
+ trainer.save_model(f"./{ADAPTER_NAME}")
101
+
102
+ # Push to HuggingFace
103
+ print("Pushing to HuggingFace Hub...")
104
+ model.push_to_hub(f"Pista1981/{ADAPTER_NAME}")
105
+ tokenizer.push_to_hub(f"Pista1981/{ADAPTER_NAME}")
106
+
107
+ print("✅ Training complete!")