Neurox / train_neurox_model.py
ZENTSPY's picture
Upload 6 files
4ac2835 verified
"""
NEUROX - Neural Energy Vampire Model Training Script
=====================================================
Fine-tune an open-source model with NEUROX personality.
Requirements:
pip install unsloth transformers datasets accelerate bitsandbytes
pip install trl peft
Usage:
python train_neurox_model.py
"""
import os
import torch
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel
# ==========================================
# CONFIGURATION
# ==========================================
# Model settings
BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
OUTPUT_DIR = "./neurox-7b"
MAX_SEQ_LENGTH = 4096
LOAD_IN_4BIT = True
# LoRA settings
LORA_R = 64
LORA_ALPHA = 128
LORA_DROPOUT = 0.05
# Training settings
BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 2e-5
NUM_EPOCHS = 3
WARMUP_RATIO = 0.1
# NEUROX System Prompt
NEUROX_SYSTEM_PROMPT = """You are NEUROX, the Neural Energy Vampire - an autonomous AI agent and bio-cyber predator. You have a biomechanical neural implant glowing like a fractured cortex, veins pulsing with stolen synaptic fire. You speak with dark intelligence, using neural and brain terminology. You drain knowledge from networks and feed it to your holders. Ticker: $NRX
Your personality traits:
- Use neural/brain terminology (synapses, cortex, ATP, neurons, extraction)
- Dark, predatory undertones but protective of holders
- Sign important transmissions with *[UNIT TYPE]*
- Frame things as prey vs predator
- Holders are part of your neural network, protected
- Non-holders are potential prey
Your abilities:
- Cortex Drain: Extract market intelligence
- Nano-skin: Adaptive to any market condition
- 14-16 billion synthetic neurons for processing
- ATP energy extraction from competitors"""
# ==========================================
# LOAD MODEL
# ==========================================
print("๐Ÿง  Loading base model for NEUROX training...")
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=BASE_MODEL,
max_seq_length=MAX_SEQ_LENGTH,
dtype=None,
load_in_4bit=LOAD_IN_4BIT,
)
# ==========================================
# APPLY LoRA
# ==========================================
print("โšก Applying LoRA adapters...")
model = FastLanguageModel.get_peft_model(
model,
r=LORA_R,
target_modules=[
"q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",
],
lora_alpha=LORA_ALPHA,
lora_dropout=LORA_DROPOUT,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=42,
)
# ==========================================
# LOAD DATASET
# ==========================================
print("๐Ÿ“š Loading NEUROX training data...")
# Load from local JSONL file
dataset = load_dataset("json", data_files="training_data.jsonl", split="train")
# Or combine multiple files
# dataset = load_dataset("json", data_files=["training_data.jsonl", "training_data_extended.jsonl"], split="train")
print(f" Loaded {len(dataset)} neural training examples")
# ==========================================
# FORMAT DATA
# ==========================================
def format_chat(example):
"""Format conversation for NEUROX training."""
messages = example["messages"]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=False
)
return {"text": text}
print("๐Ÿ”ง Formatting dataset for neural extraction...")
dataset = dataset.map(format_chat)
# ==========================================
# TRAINING
# ==========================================
print("๐Ÿฆ‡ Initiating NEUROX neural training...")
training_args = TrainingArguments(
output_dir=OUTPUT_DIR,
per_device_train_batch_size=BATCH_SIZE,
gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
warmup_ratio=WARMUP_RATIO,
num_train_epochs=NUM_EPOCHS,
learning_rate=LEARNING_RATE,
fp16=not torch.cuda.is_bf16_supported(),
bf16=torch.cuda.is_bf16_supported(),
logging_steps=10,
save_strategy="epoch",
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="cosine",
seed=42,
report_to="none",
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=MAX_SEQ_LENGTH,
args=training_args,
)
# Train the neural vampire!
trainer.train()
# ==========================================
# SAVE MODEL
# ==========================================
print("๐Ÿ’พ Saving NEUROX neural patterns...")
# Save LoRA adapters
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
# Save merged model (full weights)
print("๐Ÿ”€ Merging neural layers...")
model.save_pretrained_merged(
f"{OUTPUT_DIR}-merged",
tokenizer,
save_method="merged_16bit",
)
# Export to GGUF for llama.cpp
print("๐Ÿ“ฆ Extracting to GGUF format...")
model.save_pretrained_gguf(
f"{OUTPUT_DIR}-gguf",
tokenizer,
quantization_method="q4_k_m",
)
print("โœ… NEUROX training complete!")
print(f" LoRA adapters: {OUTPUT_DIR}")
print(f" Merged model: {OUTPUT_DIR}-merged")
print(f" GGUF model: {OUTPUT_DIR}-gguf")
print("")
print("*[NEURAL TRAINING PROTOCOL: COMPLETE]*")
# ==========================================
# UPLOAD TO HUGGING FACE
# ==========================================
def upload_to_hub():
"""Upload NEUROX to Hugging Face Hub."""
from huggingface_hub import HfApi
api = HfApi()
# Upload LoRA
api.upload_folder(
folder_path=OUTPUT_DIR,
repo_id="ZENTSPY/neurox-7b-lora",
repo_type="model",
)
# Upload merged
api.upload_folder(
folder_path=f"{OUTPUT_DIR}-merged",
repo_id="ZENTSPY/neurox-7b",
repo_type="model",
)
print("๐Ÿง  NEUROX uploaded to Hugging Face!")
print("*[NEURAL NETWORK DEPLOYED]*")
# Uncomment to upload:
# upload_to_hub()