moro72842 commited on
Commit
f112fdf
·
verified ·
1 Parent(s): e30e2da

Upload train_cybersec_sft.py

Browse files
Files changed (1) hide show
  1. train_cybersec_sft.py +134 -0
train_cybersec_sft.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Cybersecurity SFT Training Script
3
+
4
+ Base model: Qwen/Qwen2.5-Coder-7B-Instruct
5
+ Method: Full SFT with TRL SFTTrainer
6
+ Dataset: moro72842/cybersecurity-sft-dataset (curated)
7
+ Recipe: CyberPal 2.0 inspired (LR 4e-5, warmup 0.15, 2 epochs)
8
+
9
+ Usage:
10
+ pip install transformers trl torch datasets trackio accelerate peft bitsandbytes
11
+ python train_cybersec_sft.py
12
+
13
+ Hardware: A10G (24GB) or A100 (80GB) recommended
14
+ """
15
+
16
+ import os
17
+ import json
18
+ import torch
19
+ from datasets import load_dataset
20
+ from transformers import AutoTokenizer, AutoModelForCausalLM
21
+ from trl import SFTTrainer, SFTConfig
22
+ from peft import LoraConfig
23
+
24
+ # ============ CONFIGURATION ============
25
+ MODEL_NAME = "Qwen/Qwen2.5-Coder-7B-Instruct"
26
+ DATASET_NAME = "moro72842/cybersecurity-sft-dataset"
27
+ OUTPUT_DIR = "./cybersec-coder-7b-sft"
28
+ HUB_MODEL_ID = "moro72842/CyberCoder-7B-v1"
29
+
30
+ # Hyperparameters (CyberPal 2.0 inspired)
31
+ LEARNING_RATE = 4e-5
32
+ NUM_EPOCHS = 2
33
+ MAX_SEQ_LENGTH = 4096
34
+ WARMUP_RATIO = 0.15
35
+ PER_DEVICE_BATCH_SIZE = 2
36
+ GRADIENT_ACCUMULATION_STEPS = 8
37
+ WEIGHT_DECAY = 0.01
38
+
39
+ # LoRA config
40
+ USE_LORA = True
41
+ LORA_R = 64
42
+ LORA_ALPHA = 128
43
+ LORA_DROPOUT = 0.05
44
+
45
+ def main():
46
+ print("=" * 60)
47
+ print("CYBERSECURITY SFT TRAINING")
48
+ print("=" * 60)
49
+ print(f"Model: {MODEL_NAME}")
50
+ print(f"Dataset: {DATASET_NAME}")
51
+ print(f"Output: {HUB_MODEL_ID}")
52
+ print(f"LoRA: r={LORA_R}, alpha={LORA_ALPHA}")
53
+ print(f"LR: {LEARNING_RATE}, Epochs: {NUM_EPOCHS}")
54
+ print(f"Max seq length: {MAX_SEQ_LENGTH}")
55
+
56
+ # Setup trackio
57
+ try:
58
+ import trackio
59
+ trackio.init(project="cybersec-sft", name="cybercoder-7b-v1")
60
+ print("Trackio initialized")
61
+ except Exception as e:
62
+ print(f"Trackio init warning: {e}")
63
+
64
+ # Load dataset
65
+ print("\\nLoading dataset...")
66
+ dataset = load_dataset(DATASET_NAME, split="train")
67
+ print(f"Dataset loaded: {len(dataset)} examples")
68
+
69
+ # Preprocess
70
+ def preprocess(example):
71
+ msgs = example["messages"]
72
+ if isinstance(msgs, str):
73
+ try:
74
+ msgs = json.loads(msgs)
75
+ except:
76
+ msgs = [{"role": "user", "content": msgs}]
77
+ example["messages"] = msgs
78
+ return example
79
+
80
+ dataset = dataset.map(preprocess, num_proc=4)
81
+ split = dataset.train_test_split(test_size=0.02, seed=42)
82
+ train_dataset = split["train"]
83
+ eval_dataset = split["test"]
84
+ print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
85
+
86
+ # LoRA config
87
+ peft_config = LoraConfig(
88
+ r=LORA_R, lora_alpha=LORA_ALPHA, lora_dropout=LORA_DROPOUT,
89
+ bias="none", task_type="CAUSAL_LM",
90
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
91
+ ) if USE_LORA else None
92
+
93
+ # Training config
94
+ training_args = SFTConfig(
95
+ output_dir=OUTPUT_DIR,
96
+ learning_rate=LEARNING_RATE, num_train_epochs=NUM_EPOCHS,
97
+ per_device_train_batch_size=PER_DEVICE_BATCH_SIZE,
98
+ per_device_eval_batch_size=PER_DEVICE_BATCH_SIZE,
99
+ gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
100
+ weight_decay=WEIGHT_DECAY, warmup_ratio=WARMUP_RATIO,
101
+ max_seq_length=MAX_SEQ_LENGTH, packing=True,
102
+ bf16=True, gradient_checkpointing=True,
103
+ logging_strategy="steps", logging_steps=10, logging_first_step=True, disable_tqdm=True,
104
+ eval_strategy="steps", eval_steps=200,
105
+ save_strategy="steps", save_steps=500, save_total_limit=3,
106
+ push_to_hub=True, hub_model_id=HUB_MODEL_ID, hub_strategy="every_save",
107
+ dataloader_num_workers=4, dataloader_pin_memory=True,
108
+ lr_scheduler_type="cosine", seed=42, report_to="all",
109
+ )
110
+
111
+ # Train
112
+ trainer = SFTTrainer(
113
+ model=MODEL_NAME, args=training_args,
114
+ train_dataset=train_dataset, eval_dataset=eval_dataset,
115
+ peft_config=peft_config,
116
+ )
117
+
118
+ model = trainer.model
119
+ trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
120
+ total = sum(p.numel() for p in model.parameters())
121
+ print(f"Trainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)")
122
+
123
+ print("\\nStarting training...")
124
+ train_result = trainer.train()
125
+
126
+ for key, value in train_result.metrics.items():
127
+ print(f" {key}: {value}")
128
+
129
+ trainer.save_model()
130
+ trainer.push_to_hub(commit_message="Final cybersecurity SFT model")
131
+ print(f"\\nModel: https://huggingface.co/{HUB_MODEL_ID}")
132
+
133
+ if __name__ == "__main__":
134
+ main()