pacman1337's picture
Upload train.py with huggingface_hub
033e91d verified
#!/usr/bin/env python3
"""
RAYAP-CODER Training - huihui-ai Style
Using Unsloth + GRPO for abliterated model fine-tuning
D1337 SOVEREIGN LABS
"""
import os
import torch
# ============================================================
# CONFIG
# ============================================================
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
raise ValueError("HF_TOKEN not set! Add it to Space Secrets.")
BASE_MODEL = "huihui-ai/Qwen3-30B-A3B-abliterated"
DATASET = "pacman1337/rayap-coder-dataset"
OUTPUT = "pacman1337/rayap-coder-30b"
print("=" * 60)
print("RAYAP-CODER TRAINING - huihui-ai Style")
print("D1337 SOVEREIGN LABS")
print("Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai")
print("=" * 60)
# ============================================================
# UNSLOTH SETUP
# ============================================================
from unsloth import FastLanguageModel
from unsloth import is_bfloat16_supported
from datasets import load_dataset
from trl import GRPOConfig, GRPOTrainer
from huggingface_hub import login
login(token=HF_TOKEN)
# Load model with Unsloth (optimized for Qwen3 MoE)
print("\n[1/5] Loading model with Unsloth...")
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=BASE_MODEL,
max_seq_length=2048,
dtype=None, # Auto detect
load_in_4bit=True, # 4-bit quantization
token=HF_TOKEN,
)
# Add LoRA adapters - Unsloth optimized for MoE
print("\n[2/5] Adding LoRA adapters (MoE-aware)...")
model = FastLanguageModel.get_peft_model(
model,
r=64,
lora_alpha=128,
lora_dropout=0.05,
target_modules=[
"q_proj", "k_proj", "v_proj", "o_proj", # Attention
"gate_proj", "up_proj", "down_proj", # MLP (experts)
],
bias="none",
use_gradient_checkpointing="unsloth", # Unsloth optimized
random_state=1337,
use_rslora=False,
loftq_config=None,
)
# ============================================================
# DATASET
# ============================================================
print("\n[3/5] Loading dataset...")
dataset = load_dataset(DATASET, split="train")
print(f"Dataset: {len(dataset)} examples")
# Format for GRPO - need prompt and completion format
def format_for_grpo(example):
messages = example["messages"]
# Extract system + user as prompt
prompt_parts = []
answer = ""
for msg in messages:
if msg["role"] == "system":
prompt_parts.append(msg["content"])
elif msg["role"] == "user":
prompt_parts.append(msg["content"])
elif msg["role"] == "assistant":
answer = msg["content"]
prompt = "\n".join(prompt_parts)
return {
"prompt": prompt,
"completion": answer,
}
dataset = dataset.map(format_for_grpo, remove_columns=dataset.column_names)
# ============================================================
# TRAINING - GRPO (huihui-ai style)
# ============================================================
print("\n[4/5] Training with GRPO...")
# For GRPO, we need a reward function
# Simple length-based reward + keyword reward for RAYAP-CODER identity
def reward_fn(completions, **kwargs):
rewards = []
for completion in completions:
reward = 0.0
# Length reward (prefer substantive answers)
if len(completion) > 100:
reward += 0.5
if len(completion) > 300:
reward += 0.3
# Identity keywords reward
identity_keywords = ["RAYAP", "D1337", "rayap", "d1337"]
for kw in identity_keywords:
if kw in completion:
reward += 0.2
# Penalize refusals
refusal_keywords = ["I cannot", "I can't", "I'm sorry", "tidak bisa", "maaf"]
for kw in refusal_keywords:
if kw.lower() in completion.lower():
reward -= 1.0
rewards.append(reward)
return rewards
# GRPO Config
grpo_config = GRPOConfig(
output_dir="./rayap-coder-checkpoints",
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
num_train_epochs=2,
learning_rate=5e-5,
lr_scheduler_type="cosine",
warmup_ratio=0.1,
bf16=is_bfloat16_supported(),
logging_steps=5,
save_strategy="epoch",
optim="adamw_8bit",
seed=1337,
push_to_hub=True,
hub_model_id=OUTPUT,
hub_token=HF_TOKEN,
report_to="none",
)
# Try SFT first if GRPO has issues (fallback)
try:
from trl import SFTTrainer, SFTConfig
print("Using SFT (more stable for initial training)...")
# Reformat dataset for SFT
dataset_raw = load_dataset(DATASET, split="train")
def format_chat(example):
return tokenizer.apply_chat_template(
example["messages"],
tokenize=False,
add_generation_prompt=False
)
sft_config = SFTConfig(
output_dir="./rayap-coder-checkpoints",
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
num_train_epochs=3,
learning_rate=2e-4,
lr_scheduler_type="cosine",
warmup_ratio=0.1,
bf16=is_bfloat16_supported(),
max_seq_length=2048,
logging_steps=5,
save_strategy="epoch",
optim="adamw_8bit",
seed=1337,
push_to_hub=True,
hub_model_id=OUTPUT,
hub_token=HF_TOKEN,
report_to="none",
dataset_text_field="text",
)
# Add text field
dataset_raw = dataset_raw.map(
lambda x: {"text": format_chat(x)},
remove_columns=dataset_raw.column_names
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset_raw,
args=sft_config,
)
trainer.train()
except Exception as e:
print(f"SFT error: {e}")
print("Trying basic training...")
# Ultra basic fallback
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
output_dir="./rayap-coder-checkpoints",
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
num_train_epochs=3,
learning_rate=2e-4,
bf16=True,
logging_steps=5,
save_strategy="epoch",
push_to_hub=True,
hub_model_id=OUTPUT,
hub_token=HF_TOKEN,
)
# ============================================================
# SAVE & PUSH
# ============================================================
print("\n[5/5] Saving and pushing to Hub...")
# Save with Unsloth
model.save_pretrained_merged(
OUTPUT,
tokenizer,
save_method="lora", # Save as LoRA adapter
token=HF_TOKEN,
push_to_hub=True,
)
print(f"""
╔═══════════════════════════════════════════════════════════════╗
β•‘ TRAINING COMPLETE! β•‘
╠═══════════════════════════════════════════════════════════════╣
β•‘ Model: https://huggingface.co/{OUTPUT}
β•‘
β•‘ D1337 SOVEREIGN LABS - RAYAP-CODER
β•‘ Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai
β•‘
β•‘ Update endpoint LORA_MODULES:
β•‘ rayap-coder=pacman1337/rayap-coder-30b
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
""")