KevinKeller commited on
Commit
27d693a
·
verified ·
1 Parent(s): 24e10bf

Upload train_pattern_selector.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_pattern_selector.py +81 -0
train_pattern_selector.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["trl>=0.17.0", "peft>=0.15.0", "datasets", "transformers", "accelerate", "bitsandbytes"]
3
+ # ///
4
+
5
+ import os
6
+ from datasets import load_dataset
7
+ from peft import LoraConfig
8
+ from trl import SFTTrainer, SFTConfig
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
10
+ import torch
11
+
12
+ print("Loading dataset...")
13
+ dataset = load_dataset("KevinKeller/cognitive-pattern-selector-v1")
14
+ train_dataset = dataset["train"]
15
+ eval_dataset = dataset.get("validation")
16
+
17
+ print(f"Train samples: {len(train_dataset)}")
18
+ if eval_dataset:
19
+ print(f"Eval samples: {len(eval_dataset)}")
20
+
21
+ print("Loading model: Qwen/Qwen2.5-7B-Instruct...")
22
+ model_id = "Qwen/Qwen2.5-7B-Instruct"
23
+
24
+ # 4-bit quantization for fitting on A10G
25
+ bnb_config = BitsAndBytesConfig(
26
+ load_in_4bit=True,
27
+ bnb_4bit_quant_type="nf4",
28
+ bnb_4bit_compute_dtype=torch.bfloat16,
29
+ )
30
+
31
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
32
+ tokenizer.pad_token = tokenizer.eos_token
33
+
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ model_id,
36
+ quantization_config=bnb_config,
37
+ device_map="auto",
38
+ trust_remote_code=True,
39
+ )
40
+
41
+ # LoRA config
42
+ peft_config = LoraConfig(
43
+ r=16,
44
+ lora_alpha=32,
45
+ lora_dropout=0.05,
46
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
47
+ bias="none",
48
+ task_type="CAUSAL_LM",
49
+ )
50
+
51
+ # Training config
52
+ training_args = SFTConfig(
53
+ output_dir="./pattern-selector-output",
54
+ num_train_epochs=3,
55
+ per_device_train_batch_size=2,
56
+ gradient_accumulation_steps=4,
57
+ learning_rate=2e-4,
58
+ logging_steps=10,
59
+ save_strategy="epoch",
60
+ eval_strategy="epoch" if eval_dataset else "no",
61
+ bf16=True,
62
+ push_to_hub=True,
63
+ hub_model_id="KevinKeller/cognitive-pattern-selector-qwen2.5-7b",
64
+ report_to="none",
65
+ max_seq_length=4096,
66
+ )
67
+
68
+ print("Starting training...")
69
+ trainer = SFTTrainer(
70
+ model=model,
71
+ train_dataset=train_dataset,
72
+ eval_dataset=eval_dataset,
73
+ peft_config=peft_config,
74
+ processing_class=tokenizer,
75
+ args=training_args,
76
+ )
77
+
78
+ trainer.train()
79
+ print("Training complete! Pushing to Hub...")
80
+ trainer.push_to_hub()
81
+ print("Done!")