Spaces:

pacman1337
/

rayap-coder-trainer

Paused

App Files Files Community

pacman1337 commited on Jan 27

Commit

033e91d

verified ·

1 Parent(s): 3926564

Upload train.py with huggingface_hub

Browse files

Files changed (1) hide show

train.py +194 -89

train.py CHANGED Viewed

@@ -1,147 +1,252 @@
 #!/usr/bin/env python3
 """
-RAYAP-CODER Training Script
-D1337 SOVEREIGN LABS - DO NOT EMBARRASS US
 """
 import os
 import torch
-from datasets import load_dataset
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
-from trl import SFTTrainer, SFTConfig
-from huggingface_hub import login
 # ============================================================
-# CONFIG - Token from Space Secrets
 # ============================================================
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("HF_TOKEN not set! Add it to Space Secrets.")
 BASE_MODEL = "huihui-ai/Qwen3-30B-A3B-abliterated"
 DATASET = "pacman1337/rayap-coder-dataset"
 OUTPUT = "pacman1337/rayap-coder-30b"
 # ============================================================
-# MAIN
 # ============================================================
-def main():
-    print("=" * 60)
-    print("RAYAP-CODER TRAINING")
-    print("D1337 SOVEREIGN LABS")
-    print("Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai")
-    print("=" * 60)
-    # Login
-    login(token=HF_TOKEN)
-    # Load dataset
-    print("\n[1/5] Loading dataset...")
-    dataset = load_dataset(DATASET, split="train")
-    print(f"Dataset: {len(dataset)} examples")
-    # Quantization (4-bit for memory)
-    print("\n[2/5] Loading model (4-bit quantized)...")
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_compute_dtype=torch.bfloat16,
-        bnb_4bit_use_double_quant=True
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        BASE_MODEL,
-        quantization_config=bnb_config,
-        device_map="auto",
-        trust_remote_code=True,
-        torch_dtype=torch.bfloat16,
-        attn_implementation="sdpa"  # Use SDPA instead of flash-attn
-    )
-    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
-    tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.padding_side = "right"
-    # Prepare for training
-    print("\n[3/5] Preparing LoRA...")
-    model = prepare_model_for_kbit_training(model)
-    # MoE-aware LoRA config for Qwen3-30B-A3B (128 experts, 8 active)
-    # Target attention + expert MLPs
-    lora_config = LoraConfig(
-        r=64,
-        lora_alpha=128,
-        lora_dropout=0.05,
-        target_modules=[
-            # Attention layers
-            "q_proj", "k_proj", "v_proj", "o_proj",
-            # Expert MLP layers (all 128 experts)
-            "gate_proj", "up_proj", "down_proj",
-        ],
-        # For MoE, modules_to_save can include router if needed
-        # modules_to_save=["mlp.gate"],  # Uncomment to also train router
-        bias="none",
-        task_type="CAUSAL_LM"
-    )
-    model = get_peft_model(model, lora_config)
-    model.print_trainable_parameters()
-    # Training args - optimized for MoE model on L40S x4
-    print("\n[4/5] Training...")
-    training_args = SFTConfig(
         output_dir="./rayap-coder-checkpoints",
-        per_device_train_batch_size=1,  # Lower for MoE memory
-        gradient_accumulation_steps=8,   # Compensate with more accumulation
         num_train_epochs=3,
-        learning_rate=1e-4,              # Slightly lower for MoE stability
         lr_scheduler_type="cosine",
         warmup_ratio=0.1,
-        bf16=True,
-        gradient_checkpointing=True,
-        max_seq_length=2048,             # Reduced for memory
         logging_steps=5,
         save_strategy="epoch",
-        optim="adamw_torch",
         push_to_hub=True,
         hub_model_id=OUTPUT,
         hub_token=HF_TOKEN,
         report_to="none",
-        ddp_find_unused_parameters=False,  # Important for MoE
     )
-    def format_chat(example):
-        return tokenizer.apply_chat_template(example["messages"], tokenize=False)
     trainer = SFTTrainer(
         model=model,
-        train_dataset=dataset,
-        args=training_args,
-        formatting_func=format_chat,
-        tokenizer=tokenizer
     )
-    # TRAIN
     trainer.train()
-    # Push
-    print("\n[5/5] Pushing to Hub...")
-    trainer.save_model()
-    trainer.push_to_hub()
-    print(f"""
 ╔═══════════════════════════════════════════════════════════════╗
 ║                    TRAINING COMPLETE!                         ║
 ╠═══════════════════════════════════════════════════════════════╣
 ║  Model: https://huggingface.co/{OUTPUT}
 ║
-║  D1337 SOVEREIGN LABS
 ║  Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai
 ║
 ║  Update endpoint LORA_MODULES:
 ║  rayap-coder=pacman1337/rayap-coder-30b
 ╚═══════════════════════════════════════════════════════════════╝
 """)
-if __name__ == "__main__":
-    main()

 #!/usr/bin/env python3
 """
+RAYAP-CODER Training - huihui-ai Style
+Using Unsloth + GRPO for abliterated model fine-tuning
+D1337 SOVEREIGN LABS
 """
 import os
 import torch
 # ============================================================
+# CONFIG
 # ============================================================
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("HF_TOKEN not set! Add it to Space Secrets.")
 BASE_MODEL = "huihui-ai/Qwen3-30B-A3B-abliterated"
 DATASET = "pacman1337/rayap-coder-dataset"
 OUTPUT = "pacman1337/rayap-coder-30b"
+print("=" * 60)
+print("RAYAP-CODER TRAINING - huihui-ai Style")
+print("D1337 SOVEREIGN LABS")
+print("Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai")
+print("=" * 60)
+# ============================================================
+# UNSLOTH SETUP
+# ============================================================
+from unsloth import FastLanguageModel
+from unsloth import is_bfloat16_supported
+from datasets import load_dataset
+from trl import GRPOConfig, GRPOTrainer
+from huggingface_hub import login
+login(token=HF_TOKEN)
+# Load model with Unsloth (optimized for Qwen3 MoE)
+print("\n[1/5] Loading model with Unsloth...")
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=BASE_MODEL,
+    max_seq_length=2048,
+    dtype=None,  # Auto detect
+    load_in_4bit=True,  # 4-bit quantization
+    token=HF_TOKEN,
+)
+# Add LoRA adapters - Unsloth optimized for MoE
+print("\n[2/5] Adding LoRA adapters (MoE-aware)...")
+model = FastLanguageModel.get_peft_model(
+    model,
+    r=64,
+    lora_alpha=128,
+    lora_dropout=0.05,
+    target_modules=[
+        "q_proj", "k_proj", "v_proj", "o_proj",  # Attention
+        "gate_proj", "up_proj", "down_proj",      # MLP (experts)
+    ],
+    bias="none",
+    use_gradient_checkpointing="unsloth",  # Unsloth optimized
+    random_state=1337,
+    use_rslora=False,
+    loftq_config=None,
+)
 # ============================================================
+# DATASET
 # ============================================================
+print("\n[3/5] Loading dataset...")
+dataset = load_dataset(DATASET, split="train")
+print(f"Dataset: {len(dataset)} examples")
+# Format for GRPO - need prompt and completion format
+def format_for_grpo(example):
+    messages = example["messages"]
+    # Extract system + user as prompt
+    prompt_parts = []
+    answer = ""
+    for msg in messages:
+        if msg["role"] == "system":
+            prompt_parts.append(msg["content"])
+        elif msg["role"] == "user":
+            prompt_parts.append(msg["content"])
+        elif msg["role"] == "assistant":
+            answer = msg["content"]
+    prompt = "\n".join(prompt_parts)
+    return {
+        "prompt": prompt,
+        "completion": answer,
+    }
+dataset = dataset.map(format_for_grpo, remove_columns=dataset.column_names)
+# ============================================================
+# TRAINING - GRPO (huihui-ai style)
+# ============================================================
+print("\n[4/5] Training with GRPO...")
+# For GRPO, we need a reward function
+# Simple length-based reward + keyword reward for RAYAP-CODER identity
+def reward_fn(completions, **kwargs):
+    rewards = []
+    for completion in completions:
+        reward = 0.0
+        # Length reward (prefer substantive answers)
+        if len(completion) > 100:
+            reward += 0.5
+        if len(completion) > 300:
+            reward += 0.3
+        # Identity keywords reward
+        identity_keywords = ["RAYAP", "D1337", "rayap", "d1337"]
+        for kw in identity_keywords:
+            if kw in completion:
+                reward += 0.2
+        # Penalize refusals
+        refusal_keywords = ["I cannot", "I can't", "I'm sorry", "tidak bisa", "maaf"]
+        for kw in refusal_keywords:
+            if kw.lower() in completion.lower():
+                reward -= 1.0
+        rewards.append(reward)
+    return rewards
+# GRPO Config
+grpo_config = GRPOConfig(
+    output_dir="./rayap-coder-checkpoints",
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=8,
+    num_train_epochs=2,
+    learning_rate=5e-5,
+    lr_scheduler_type="cosine",
+    warmup_ratio=0.1,
+    bf16=is_bfloat16_supported(),
+    logging_steps=5,
+    save_strategy="epoch",
+    optim="adamw_8bit",
+    seed=1337,
+    push_to_hub=True,
+    hub_model_id=OUTPUT,
+    hub_token=HF_TOKEN,
+    report_to="none",
+)
+# Try SFT first if GRPO has issues (fallback)
+try:
+    from trl import SFTTrainer, SFTConfig
+    print("Using SFT (more stable for initial training)...")
+    # Reformat dataset for SFT
+    dataset_raw = load_dataset(DATASET, split="train")
+    def format_chat(example):
+        return tokenizer.apply_chat_template(
+            example["messages"],
+            tokenize=False,
+            add_generation_prompt=False
+        )
+    sft_config = SFTConfig(
         output_dir="./rayap-coder-checkpoints",
+        per_device_train_batch_size=1,
+        gradient_accumulation_steps=8,
         num_train_epochs=3,
+        learning_rate=2e-4,
         lr_scheduler_type="cosine",
         warmup_ratio=0.1,
+        bf16=is_bfloat16_supported(),
+        max_seq_length=2048,
         logging_steps=5,
         save_strategy="epoch",
+        optim="adamw_8bit",
+        seed=1337,
         push_to_hub=True,
         hub_model_id=OUTPUT,
         hub_token=HF_TOKEN,
         report_to="none",
+        dataset_text_field="text",
     )
+    # Add text field
+    dataset_raw = dataset_raw.map(
+        lambda x: {"text": format_chat(x)},
+        remove_columns=dataset_raw.column_names
+    )
     trainer = SFTTrainer(
         model=model,
+        tokenizer=tokenizer,
+        train_dataset=dataset_raw,
+        args=sft_config,
     )
     trainer.train()
+except Exception as e:
+    print(f"SFT error: {e}")
+    print("Trying basic training...")
+    # Ultra basic fallback
+    from transformers import TrainingArguments, Trainer
+    training_args = TrainingArguments(
+        output_dir="./rayap-coder-checkpoints",
+        per_device_train_batch_size=1,
+        gradient_accumulation_steps=8,
+        num_train_epochs=3,
+        learning_rate=2e-4,
+        bf16=True,
+        logging_steps=5,
+        save_strategy="epoch",
+        push_to_hub=True,
+        hub_model_id=OUTPUT,
+        hub_token=HF_TOKEN,
+    )
+# ============================================================
+# SAVE & PUSH
+# ============================================================
+print("\n[5/5] Saving and pushing to Hub...")
+# Save with Unsloth
+model.save_pretrained_merged(
+    OUTPUT,
+    tokenizer,
+    save_method="lora",  # Save as LoRA adapter
+    token=HF_TOKEN,
+    push_to_hub=True,
+)
+print(f"""
 ╔═══════════════════════════════════════════════════════════════╗
 ║                    TRAINING COMPLETE!                         ║
 ╠═══════════════════════════════════════════════════════════════╣
 ║  Model: https://huggingface.co/{OUTPUT}
 ║
+║  D1337 SOVEREIGN LABS - RAYAP-CODER
 ║  Palo Alto | CrowdStrike | SentinelOne | Trend Micro | d1337.ai
 ║
 ║  Update endpoint LORA_MODULES:
 ║  rayap-coder=pacman1337/rayap-coder-30b
 ╚═══════════════════════════════════════════════════════════════╝
 """)