pacman1337 commited on
Commit
033e91d
Β·
verified Β·
1 Parent(s): 3926564

Upload train.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train.py +194 -89
train.py CHANGED
@@ -1,147 +1,252 @@
1
  #!/usr/bin/env python3
2
  """
3
- RAYAP-CODER Training Script
4
- D1337 SOVEREIGN LABS - DO NOT EMBARRASS US
 
5
  """
6
 
7
  import os
8
  import torch
9
- from datasets import load_dataset
10
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
11
- from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
12
- from trl import SFTTrainer, SFTConfig
13
- from huggingface_hub import login
14
 
15
  # ============================================================
16
- # CONFIG - Token from Space Secrets
17
  # ============================================================
18
  HF_TOKEN = os.environ.get("HF_TOKEN")
19
  if not HF_TOKEN:
20
  raise ValueError("HF_TOKEN not set! Add it to Space Secrets.")
 
21
  BASE_MODEL = "huihui-ai/Qwen3-30B-A3B-abliterated"
22
  DATASET = "pacman1337/rayap-coder-dataset"
23
  OUTPUT = "pacman1337/rayap-coder-30b"
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # ============================================================
26
- # MAIN
27
  # ============================================================
28
- def main():
29
- print("=" * 60)
30
- print("RAYAP-CODER TRAINING")
31
- print("D1337 SOVEREIGN LABS")
32
- print("Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai")
33
- print("=" * 60)
 
34
 
35
- # Login
36
- login(token=HF_TOKEN)
 
37
 
38
- # Load dataset
39
- print("\n[1/5] Loading dataset...")
40
- dataset = load_dataset(DATASET, split="train")
41
- print(f"Dataset: {len(dataset)} examples")
 
 
 
42
 
43
- # Quantization (4-bit for memory)
44
- print("\n[2/5] Loading model (4-bit quantized)...")
45
- bnb_config = BitsAndBytesConfig(
46
- load_in_4bit=True,
47
- bnb_4bit_quant_type="nf4",
48
- bnb_4bit_compute_dtype=torch.bfloat16,
49
- bnb_4bit_use_double_quant=True
50
- )
51
 
52
- model = AutoModelForCausalLM.from_pretrained(
53
- BASE_MODEL,
54
- quantization_config=bnb_config,
55
- device_map="auto",
56
- trust_remote_code=True,
57
- torch_dtype=torch.bfloat16,
58
- attn_implementation="sdpa" # Use SDPA instead of flash-attn
59
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
62
- tokenizer.pad_token = tokenizer.eos_token
63
- tokenizer.padding_side = "right"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- # Prepare for training
66
- print("\n[3/5] Preparing LoRA...")
67
- model = prepare_model_for_kbit_training(model)
68
 
69
- # MoE-aware LoRA config for Qwen3-30B-A3B (128 experts, 8 active)
70
- # Target attention + expert MLPs
71
- lora_config = LoraConfig(
72
- r=64,
73
- lora_alpha=128,
74
- lora_dropout=0.05,
75
- target_modules=[
76
- # Attention layers
77
- "q_proj", "k_proj", "v_proj", "o_proj",
78
- # Expert MLP layers (all 128 experts)
79
- "gate_proj", "up_proj", "down_proj",
80
- ],
81
- # For MoE, modules_to_save can include router if needed
82
- # modules_to_save=["mlp.gate"], # Uncomment to also train router
83
- bias="none",
84
- task_type="CAUSAL_LM"
85
- )
86
 
87
- model = get_peft_model(model, lora_config)
88
- model.print_trainable_parameters()
 
 
 
 
89
 
90
- # Training args - optimized for MoE model on L40S x4
91
- print("\n[4/5] Training...")
92
- training_args = SFTConfig(
93
  output_dir="./rayap-coder-checkpoints",
94
- per_device_train_batch_size=1, # Lower for MoE memory
95
- gradient_accumulation_steps=8, # Compensate with more accumulation
96
  num_train_epochs=3,
97
- learning_rate=1e-4, # Slightly lower for MoE stability
98
  lr_scheduler_type="cosine",
99
  warmup_ratio=0.1,
100
- bf16=True,
101
- gradient_checkpointing=True,
102
- max_seq_length=2048, # Reduced for memory
103
  logging_steps=5,
104
  save_strategy="epoch",
105
- optim="adamw_torch",
 
106
  push_to_hub=True,
107
  hub_model_id=OUTPUT,
108
  hub_token=HF_TOKEN,
109
  report_to="none",
110
- ddp_find_unused_parameters=False, # Important for MoE
111
  )
112
 
113
- def format_chat(example):
114
- return tokenizer.apply_chat_template(example["messages"], tokenize=False)
 
 
 
115
 
116
  trainer = SFTTrainer(
117
  model=model,
118
- train_dataset=dataset,
119
- args=training_args,
120
- formatting_func=format_chat,
121
- tokenizer=tokenizer
122
  )
123
 
124
- # TRAIN
125
  trainer.train()
126
 
127
- # Push
128
- print("\n[5/5] Pushing to Hub...")
129
- trainer.save_model()
130
- trainer.push_to_hub()
 
 
131
 
132
- print(f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  ╔═══════════════════════════════════════════════════════════════╗
134
  β•‘ TRAINING COMPLETE! β•‘
135
  ╠═══════════════════════════════════════════════════════════════╣
136
  β•‘ Model: https://huggingface.co/{OUTPUT}
137
  β•‘
138
- β•‘ D1337 SOVEREIGN LABS
139
  β•‘ Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai
140
  β•‘
141
  β•‘ Update endpoint LORA_MODULES:
142
  β•‘ rayap-coder=pacman1337/rayap-coder-30b
143
  β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
144
  """)
145
-
146
- if __name__ == "__main__":
147
- main()
 
1
  #!/usr/bin/env python3
2
  """
3
+ RAYAP-CODER Training - huihui-ai Style
4
+ Using Unsloth + GRPO for abliterated model fine-tuning
5
+ D1337 SOVEREIGN LABS
6
  """
7
 
8
  import os
9
  import torch
 
 
 
 
 
10
 
11
  # ============================================================
12
+ # CONFIG
13
  # ============================================================
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
15
  if not HF_TOKEN:
16
  raise ValueError("HF_TOKEN not set! Add it to Space Secrets.")
17
+
18
  BASE_MODEL = "huihui-ai/Qwen3-30B-A3B-abliterated"
19
  DATASET = "pacman1337/rayap-coder-dataset"
20
  OUTPUT = "pacman1337/rayap-coder-30b"
21
 
22
+ print("=" * 60)
23
+ print("RAYAP-CODER TRAINING - huihui-ai Style")
24
+ print("D1337 SOVEREIGN LABS")
25
+ print("Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai")
26
+ print("=" * 60)
27
+
28
+ # ============================================================
29
+ # UNSLOTH SETUP
30
+ # ============================================================
31
+ from unsloth import FastLanguageModel
32
+ from unsloth import is_bfloat16_supported
33
+ from datasets import load_dataset
34
+ from trl import GRPOConfig, GRPOTrainer
35
+ from huggingface_hub import login
36
+
37
+ login(token=HF_TOKEN)
38
+
39
+ # Load model with Unsloth (optimized for Qwen3 MoE)
40
+ print("\n[1/5] Loading model with Unsloth...")
41
+ model, tokenizer = FastLanguageModel.from_pretrained(
42
+ model_name=BASE_MODEL,
43
+ max_seq_length=2048,
44
+ dtype=None, # Auto detect
45
+ load_in_4bit=True, # 4-bit quantization
46
+ token=HF_TOKEN,
47
+ )
48
+
49
+ # Add LoRA adapters - Unsloth optimized for MoE
50
+ print("\n[2/5] Adding LoRA adapters (MoE-aware)...")
51
+ model = FastLanguageModel.get_peft_model(
52
+ model,
53
+ r=64,
54
+ lora_alpha=128,
55
+ lora_dropout=0.05,
56
+ target_modules=[
57
+ "q_proj", "k_proj", "v_proj", "o_proj", # Attention
58
+ "gate_proj", "up_proj", "down_proj", # MLP (experts)
59
+ ],
60
+ bias="none",
61
+ use_gradient_checkpointing="unsloth", # Unsloth optimized
62
+ random_state=1337,
63
+ use_rslora=False,
64
+ loftq_config=None,
65
+ )
66
+
67
  # ============================================================
68
+ # DATASET
69
  # ============================================================
70
+ print("\n[3/5] Loading dataset...")
71
+ dataset = load_dataset(DATASET, split="train")
72
+ print(f"Dataset: {len(dataset)} examples")
73
+
74
+ # Format for GRPO - need prompt and completion format
75
+ def format_for_grpo(example):
76
+ messages = example["messages"]
77
 
78
+ # Extract system + user as prompt
79
+ prompt_parts = []
80
+ answer = ""
81
 
82
+ for msg in messages:
83
+ if msg["role"] == "system":
84
+ prompt_parts.append(msg["content"])
85
+ elif msg["role"] == "user":
86
+ prompt_parts.append(msg["content"])
87
+ elif msg["role"] == "assistant":
88
+ answer = msg["content"]
89
 
90
+ prompt = "\n".join(prompt_parts)
 
 
 
 
 
 
 
91
 
92
+ return {
93
+ "prompt": prompt,
94
+ "completion": answer,
95
+ }
96
+
97
+ dataset = dataset.map(format_for_grpo, remove_columns=dataset.column_names)
98
+
99
+ # ============================================================
100
+ # TRAINING - GRPO (huihui-ai style)
101
+ # ============================================================
102
+ print("\n[4/5] Training with GRPO...")
103
+
104
+ # For GRPO, we need a reward function
105
+ # Simple length-based reward + keyword reward for RAYAP-CODER identity
106
+ def reward_fn(completions, **kwargs):
107
+ rewards = []
108
+ for completion in completions:
109
+ reward = 0.0
110
+
111
+ # Length reward (prefer substantive answers)
112
+ if len(completion) > 100:
113
+ reward += 0.5
114
+ if len(completion) > 300:
115
+ reward += 0.3
116
+
117
+ # Identity keywords reward
118
+ identity_keywords = ["RAYAP", "D1337", "rayap", "d1337"]
119
+ for kw in identity_keywords:
120
+ if kw in completion:
121
+ reward += 0.2
122
+
123
+ # Penalize refusals
124
+ refusal_keywords = ["I cannot", "I can't", "I'm sorry", "tidak bisa", "maaf"]
125
+ for kw in refusal_keywords:
126
+ if kw.lower() in completion.lower():
127
+ reward -= 1.0
128
+
129
+ rewards.append(reward)
130
 
131
+ return rewards
132
+
133
+ # GRPO Config
134
+ grpo_config = GRPOConfig(
135
+ output_dir="./rayap-coder-checkpoints",
136
+ per_device_train_batch_size=1,
137
+ gradient_accumulation_steps=8,
138
+ num_train_epochs=2,
139
+ learning_rate=5e-5,
140
+ lr_scheduler_type="cosine",
141
+ warmup_ratio=0.1,
142
+ bf16=is_bfloat16_supported(),
143
+ logging_steps=5,
144
+ save_strategy="epoch",
145
+ optim="adamw_8bit",
146
+ seed=1337,
147
+ push_to_hub=True,
148
+ hub_model_id=OUTPUT,
149
+ hub_token=HF_TOKEN,
150
+ report_to="none",
151
+ )
152
+
153
+ # Try SFT first if GRPO has issues (fallback)
154
+ try:
155
+ from trl import SFTTrainer, SFTConfig
156
 
157
+ print("Using SFT (more stable for initial training)...")
 
 
158
 
159
+ # Reformat dataset for SFT
160
+ dataset_raw = load_dataset(DATASET, split="train")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
+ def format_chat(example):
163
+ return tokenizer.apply_chat_template(
164
+ example["messages"],
165
+ tokenize=False,
166
+ add_generation_prompt=False
167
+ )
168
 
169
+ sft_config = SFTConfig(
 
 
170
  output_dir="./rayap-coder-checkpoints",
171
+ per_device_train_batch_size=1,
172
+ gradient_accumulation_steps=8,
173
  num_train_epochs=3,
174
+ learning_rate=2e-4,
175
  lr_scheduler_type="cosine",
176
  warmup_ratio=0.1,
177
+ bf16=is_bfloat16_supported(),
178
+ max_seq_length=2048,
 
179
  logging_steps=5,
180
  save_strategy="epoch",
181
+ optim="adamw_8bit",
182
+ seed=1337,
183
  push_to_hub=True,
184
  hub_model_id=OUTPUT,
185
  hub_token=HF_TOKEN,
186
  report_to="none",
187
+ dataset_text_field="text",
188
  )
189
 
190
+ # Add text field
191
+ dataset_raw = dataset_raw.map(
192
+ lambda x: {"text": format_chat(x)},
193
+ remove_columns=dataset_raw.column_names
194
+ )
195
 
196
  trainer = SFTTrainer(
197
  model=model,
198
+ tokenizer=tokenizer,
199
+ train_dataset=dataset_raw,
200
+ args=sft_config,
 
201
  )
202
 
 
203
  trainer.train()
204
 
205
+ except Exception as e:
206
+ print(f"SFT error: {e}")
207
+ print("Trying basic training...")
208
+
209
+ # Ultra basic fallback
210
+ from transformers import TrainingArguments, Trainer
211
 
212
+ training_args = TrainingArguments(
213
+ output_dir="./rayap-coder-checkpoints",
214
+ per_device_train_batch_size=1,
215
+ gradient_accumulation_steps=8,
216
+ num_train_epochs=3,
217
+ learning_rate=2e-4,
218
+ bf16=True,
219
+ logging_steps=5,
220
+ save_strategy="epoch",
221
+ push_to_hub=True,
222
+ hub_model_id=OUTPUT,
223
+ hub_token=HF_TOKEN,
224
+ )
225
+
226
+ # ============================================================
227
+ # SAVE & PUSH
228
+ # ============================================================
229
+ print("\n[5/5] Saving and pushing to Hub...")
230
+
231
+ # Save with Unsloth
232
+ model.save_pretrained_merged(
233
+ OUTPUT,
234
+ tokenizer,
235
+ save_method="lora", # Save as LoRA adapter
236
+ token=HF_TOKEN,
237
+ push_to_hub=True,
238
+ )
239
+
240
+ print(f"""
241
  ╔═══════════════════════════════════════════════════════════════╗
242
  β•‘ TRAINING COMPLETE! β•‘
243
  ╠═══════════════════════════════════════════════════════════════╣
244
  β•‘ Model: https://huggingface.co/{OUTPUT}
245
  β•‘
246
+ β•‘ D1337 SOVEREIGN LABS - RAYAP-CODER
247
  β•‘ Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai
248
  β•‘
249
  β•‘ Update endpoint LORA_MODULES:
250
  β•‘ rayap-coder=pacman1337/rayap-coder-30b
251
  β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
252
  """)