|
|
|
|
|
""" |
|
|
RAYAP-CODER Training - huihui-ai Style |
|
|
Using Unsloth + GRPO for abliterated model fine-tuning |
|
|
D1337 SOVEREIGN LABS |
|
|
""" |
|
|
|
|
|
import os |
|
|
import torch |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
if not HF_TOKEN: |
|
|
raise ValueError("HF_TOKEN not set! Add it to Space Secrets.") |
|
|
|
|
|
BASE_MODEL = "huihui-ai/Qwen3-30B-A3B-abliterated" |
|
|
DATASET = "pacman1337/rayap-coder-dataset" |
|
|
OUTPUT = "pacman1337/rayap-coder-30b" |
|
|
|
|
|
print("=" * 60) |
|
|
print("RAYAP-CODER TRAINING - huihui-ai Style") |
|
|
print("D1337 SOVEREIGN LABS") |
|
|
print("Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from unsloth import FastLanguageModel |
|
|
from unsloth import is_bfloat16_supported |
|
|
from datasets import load_dataset |
|
|
from trl import GRPOConfig, GRPOTrainer |
|
|
from huggingface_hub import login |
|
|
|
|
|
login(token=HF_TOKEN) |
|
|
|
|
|
|
|
|
print("\n[1/5] Loading model with Unsloth...") |
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
|
model_name=BASE_MODEL, |
|
|
max_seq_length=2048, |
|
|
dtype=None, |
|
|
load_in_4bit=True, |
|
|
token=HF_TOKEN, |
|
|
) |
|
|
|
|
|
|
|
|
print("\n[2/5] Adding LoRA adapters (MoE-aware)...") |
|
|
model = FastLanguageModel.get_peft_model( |
|
|
model, |
|
|
r=64, |
|
|
lora_alpha=128, |
|
|
lora_dropout=0.05, |
|
|
target_modules=[ |
|
|
"q_proj", "k_proj", "v_proj", "o_proj", |
|
|
"gate_proj", "up_proj", "down_proj", |
|
|
], |
|
|
bias="none", |
|
|
use_gradient_checkpointing="unsloth", |
|
|
random_state=1337, |
|
|
use_rslora=False, |
|
|
loftq_config=None, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n[3/5] Loading dataset...") |
|
|
dataset = load_dataset(DATASET, split="train") |
|
|
print(f"Dataset: {len(dataset)} examples") |
|
|
|
|
|
|
|
|
def format_for_grpo(example): |
|
|
messages = example["messages"] |
|
|
|
|
|
|
|
|
prompt_parts = [] |
|
|
answer = "" |
|
|
|
|
|
for msg in messages: |
|
|
if msg["role"] == "system": |
|
|
prompt_parts.append(msg["content"]) |
|
|
elif msg["role"] == "user": |
|
|
prompt_parts.append(msg["content"]) |
|
|
elif msg["role"] == "assistant": |
|
|
answer = msg["content"] |
|
|
|
|
|
prompt = "\n".join(prompt_parts) |
|
|
|
|
|
return { |
|
|
"prompt": prompt, |
|
|
"completion": answer, |
|
|
} |
|
|
|
|
|
dataset = dataset.map(format_for_grpo, remove_columns=dataset.column_names) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n[4/5] Training with GRPO...") |
|
|
|
|
|
|
|
|
|
|
|
def reward_fn(completions, **kwargs): |
|
|
rewards = [] |
|
|
for completion in completions: |
|
|
reward = 0.0 |
|
|
|
|
|
|
|
|
if len(completion) > 100: |
|
|
reward += 0.5 |
|
|
if len(completion) > 300: |
|
|
reward += 0.3 |
|
|
|
|
|
|
|
|
identity_keywords = ["RAYAP", "D1337", "rayap", "d1337"] |
|
|
for kw in identity_keywords: |
|
|
if kw in completion: |
|
|
reward += 0.2 |
|
|
|
|
|
|
|
|
refusal_keywords = ["I cannot", "I can't", "I'm sorry", "tidak bisa", "maaf"] |
|
|
for kw in refusal_keywords: |
|
|
if kw.lower() in completion.lower(): |
|
|
reward -= 1.0 |
|
|
|
|
|
rewards.append(reward) |
|
|
|
|
|
return rewards |
|
|
|
|
|
|
|
|
grpo_config = GRPOConfig( |
|
|
output_dir="./rayap-coder-checkpoints", |
|
|
per_device_train_batch_size=1, |
|
|
gradient_accumulation_steps=8, |
|
|
num_train_epochs=2, |
|
|
learning_rate=5e-5, |
|
|
lr_scheduler_type="cosine", |
|
|
warmup_ratio=0.1, |
|
|
bf16=is_bfloat16_supported(), |
|
|
logging_steps=5, |
|
|
save_strategy="epoch", |
|
|
optim="adamw_8bit", |
|
|
seed=1337, |
|
|
push_to_hub=True, |
|
|
hub_model_id=OUTPUT, |
|
|
hub_token=HF_TOKEN, |
|
|
report_to="none", |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
from trl import SFTTrainer, SFTConfig |
|
|
|
|
|
print("Using SFT (more stable for initial training)...") |
|
|
|
|
|
|
|
|
dataset_raw = load_dataset(DATASET, split="train") |
|
|
|
|
|
def format_chat(example): |
|
|
return tokenizer.apply_chat_template( |
|
|
example["messages"], |
|
|
tokenize=False, |
|
|
add_generation_prompt=False |
|
|
) |
|
|
|
|
|
sft_config = SFTConfig( |
|
|
output_dir="./rayap-coder-checkpoints", |
|
|
per_device_train_batch_size=1, |
|
|
gradient_accumulation_steps=8, |
|
|
num_train_epochs=3, |
|
|
learning_rate=2e-4, |
|
|
lr_scheduler_type="cosine", |
|
|
warmup_ratio=0.1, |
|
|
bf16=is_bfloat16_supported(), |
|
|
max_seq_length=2048, |
|
|
logging_steps=5, |
|
|
save_strategy="epoch", |
|
|
optim="adamw_8bit", |
|
|
seed=1337, |
|
|
push_to_hub=True, |
|
|
hub_model_id=OUTPUT, |
|
|
hub_token=HF_TOKEN, |
|
|
report_to="none", |
|
|
dataset_text_field="text", |
|
|
) |
|
|
|
|
|
|
|
|
dataset_raw = dataset_raw.map( |
|
|
lambda x: {"text": format_chat(x)}, |
|
|
remove_columns=dataset_raw.column_names |
|
|
) |
|
|
|
|
|
trainer = SFTTrainer( |
|
|
model=model, |
|
|
tokenizer=tokenizer, |
|
|
train_dataset=dataset_raw, |
|
|
args=sft_config, |
|
|
) |
|
|
|
|
|
trainer.train() |
|
|
|
|
|
except Exception as e: |
|
|
print(f"SFT error: {e}") |
|
|
print("Trying basic training...") |
|
|
|
|
|
|
|
|
from transformers import TrainingArguments, Trainer |
|
|
|
|
|
training_args = TrainingArguments( |
|
|
output_dir="./rayap-coder-checkpoints", |
|
|
per_device_train_batch_size=1, |
|
|
gradient_accumulation_steps=8, |
|
|
num_train_epochs=3, |
|
|
learning_rate=2e-4, |
|
|
bf16=True, |
|
|
logging_steps=5, |
|
|
save_strategy="epoch", |
|
|
push_to_hub=True, |
|
|
hub_model_id=OUTPUT, |
|
|
hub_token=HF_TOKEN, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n[5/5] Saving and pushing to Hub...") |
|
|
|
|
|
|
|
|
model.save_pretrained_merged( |
|
|
OUTPUT, |
|
|
tokenizer, |
|
|
save_method="lora", |
|
|
token=HF_TOKEN, |
|
|
push_to_hub=True, |
|
|
) |
|
|
|
|
|
print(f""" |
|
|
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
|
β TRAINING COMPLETE! β |
|
|
β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£ |
|
|
β Model: https://huggingface.co/{OUTPUT} |
|
|
β |
|
|
β D1337 SOVEREIGN LABS - RAYAP-CODER |
|
|
β Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai |
|
|
β |
|
|
β Update endpoint LORA_MODULES: |
|
|
β rayap-coder=pacman1337/rayap-coder-30b |
|
|
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
|
""") |
|
|
|