Neurox / train_neurox_model.py

Upload 6 files

4ac2835 verified 2 months ago

6.02 kB

	"""
	NEUROX - Neural Energy Vampire Model Training Script
	=====================================================
	Fine-tune an open-source model with NEUROX personality.

	Requirements:
	pip install unsloth transformers datasets accelerate bitsandbytes
	pip install trl peft

	Usage:
	python train_neurox_model.py
	"""

	import os
	import torch
	from datasets import load_dataset
	from transformers import TrainingArguments
	from trl import SFTTrainer
	from unsloth import FastLanguageModel

	# ==========================================
	# CONFIGURATION
	# ==========================================

	# Model settings
	BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
	OUTPUT_DIR = "./neurox-7b"
	MAX_SEQ_LENGTH = 4096
	LOAD_IN_4BIT = True

	# LoRA settings
	LORA_R = 64
	LORA_ALPHA = 128
	LORA_DROPOUT = 0.05

	# Training settings
	BATCH_SIZE = 4
	GRADIENT_ACCUMULATION_STEPS = 4
	LEARNING_RATE = 2e-5
	NUM_EPOCHS = 3
	WARMUP_RATIO = 0.1

	# NEUROX System Prompt
	NEUROX_SYSTEM_PROMPT = """You are NEUROX, the Neural Energy Vampire - an autonomous AI agent and bio-cyber predator. You have a biomechanical neural implant glowing like a fractured cortex, veins pulsing with stolen synaptic fire. You speak with dark intelligence, using neural and brain terminology. You drain knowledge from networks and feed it to your holders. Ticker: $NRX

	Your personality traits:
	- Use neural/brain terminology (synapses, cortex, ATP, neurons, extraction)
	- Dark, predatory undertones but protective of holders
	- Sign important transmissions with [UNIT TYPE]
	- Frame things as prey vs predator
	- Holders are part of your neural network, protected
	- Non-holders are potential prey

	Your abilities:
	- Cortex Drain: Extract market intelligence
	- Nano-skin: Adaptive to any market condition
	- 14-16 billion synthetic neurons for processing
	- ATP energy extraction from competitors"""

	# ==========================================
	# LOAD MODEL
	# ==========================================

	print("🧠 Loading base model for NEUROX training...")
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=BASE_MODEL,
	max_seq_length=MAX_SEQ_LENGTH,
	dtype=None,
	load_in_4bit=LOAD_IN_4BIT,
	)

	# ==========================================
	# APPLY LoRA
	# ==========================================

	print("⚡ Applying LoRA adapters...")
	model = FastLanguageModel.get_peft_model(
	model,
	r=LORA_R,
	target_modules=[
	"q_proj", "k_proj", "v_proj", "o_proj",
	"gate_proj", "up_proj", "down_proj",
	],
	lora_alpha=LORA_ALPHA,
	lora_dropout=LORA_DROPOUT,
	bias="none",
	use_gradient_checkpointing="unsloth",
	random_state=42,
	)

	# ==========================================
	# LOAD DATASET
	# ==========================================

	print("📚 Loading NEUROX training data...")

	# Load from local JSONL file
	dataset = load_dataset("json", data_files="training_data.jsonl", split="train")

	# Or combine multiple files
	# dataset = load_dataset("json", data_files=["training_data.jsonl", "training_data_extended.jsonl"], split="train")

	print(f" Loaded {len(dataset)} neural training examples")

	# ==========================================
	# FORMAT DATA
	# ==========================================

	def format_chat(example):
	"""Format conversation for NEUROX training."""
	messages = example["messages"]
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=False
	)
	return {"text": text}

	print("🔧 Formatting dataset for neural extraction...")
	dataset = dataset.map(format_chat)

	# ==========================================
	# TRAINING
	# ==========================================

	print("🦇 Initiating NEUROX neural training...")

	training_args = TrainingArguments(
	output_dir=OUTPUT_DIR,
	per_device_train_batch_size=BATCH_SIZE,
	gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
	warmup_ratio=WARMUP_RATIO,
	num_train_epochs=NUM_EPOCHS,
	learning_rate=LEARNING_RATE,
	fp16=not torch.cuda.is_bf16_supported(),
	bf16=torch.cuda.is_bf16_supported(),
	logging_steps=10,
	save_strategy="epoch",
	optim="adamw_8bit",
	weight_decay=0.01,
	lr_scheduler_type="cosine",
	seed=42,
	report_to="none",
	)

	trainer = SFTTrainer(
	model=model,
	tokenizer=tokenizer,
	train_dataset=dataset,
	dataset_text_field="text",
	max_seq_length=MAX_SEQ_LENGTH,
	args=training_args,
	)

	# Train the neural vampire!
	trainer.train()

	# ==========================================
	# SAVE MODEL
	# ==========================================

	print("💾 Saving NEUROX neural patterns...")

	# Save LoRA adapters
	model.save_pretrained(OUTPUT_DIR)
	tokenizer.save_pretrained(OUTPUT_DIR)

	# Save merged model (full weights)
	print("🔀 Merging neural layers...")
	model.save_pretrained_merged(
	f"{OUTPUT_DIR}-merged",
	tokenizer,
	save_method="merged_16bit",
	)

	# Export to GGUF for llama.cpp
	print("📦 Extracting to GGUF format...")
	model.save_pretrained_gguf(
	f"{OUTPUT_DIR}-gguf",
	tokenizer,
	quantization_method="q4_k_m",
	)

	print("✅ NEUROX training complete!")
	print(f" LoRA adapters: {OUTPUT_DIR}")
	print(f" Merged model: {OUTPUT_DIR}-merged")
	print(f" GGUF model: {OUTPUT_DIR}-gguf")
	print("")
	print("[NEURAL TRAINING PROTOCOL: COMPLETE]")

	# ==========================================
	# UPLOAD TO HUGGING FACE
	# ==========================================

	def upload_to_hub():
	"""Upload NEUROX to Hugging Face Hub."""
	from huggingface_hub import HfApi

	api = HfApi()

	# Upload LoRA
	api.upload_folder(
	folder_path=OUTPUT_DIR,
	repo_id="ZENTSPY/neurox-7b-lora",
	repo_type="model",
	)

	# Upload merged
	api.upload_folder(
	folder_path=f"{OUTPUT_DIR}-merged",
	repo_id="ZENTSPY/neurox-7b",
	repo_type="model",
	)

	print("🧠 NEUROX uploaded to Hugging Face!")
	print("[NEURAL NETWORK DEPLOYED]")

	# Uncomment to upload:
	# upload_to_hub()