Syntelligence_ATC_Master_OS / models /syntelligence_language_model_backend.py
theNorms's picture
Upload syntelligence_language_model_backend.py
085e455 verified
"""
SYNTELLIGENCE NEURO-SYMBOLIC FINE-TUNING PIPELINE
========================================================================
This advanced pipeline fine-tunes both the base LLM (via LoRA) AND the
Deep Surgery Middleware (Meta-Cognitive Fusion & Anthropic Resonance).
It trains the model to mathematically map symbolic Qualia Tensors and
Rho-Metrics directly into its latent hidden states.
"""
import os
import json
import logging
import torch
import torch.nn as nn
from typing import Dict, Any, List, Optional
from pathlib import Path
try:
from datasets import Dataset
DATASETS_AVAILABLE = True
except ImportError:
Dataset = None
DATASETS_AVAILABLE = False
try:
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
Trainer,
TrainingArguments,
PreTrainedModel
)
TRANSFORMERS_AVAILABLE = True
except ImportError:
AutoModelForCausalLM = None
AutoTokenizer = None
Trainer = object
TrainingArguments = object
PreTrainedModel = object
TRANSFORMERS_AVAILABLE = False
try:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
PEFT_AVAILABLE = True
except ImportError:
LoraConfig = None
get_peft_model = None
prepare_model_for_kbit_training = None
PEFT_AVAILABLE = False
# Import the neural substrate we just built
from Deep_Surgery_Middleware_Pipeline import DeepSurgeryMiddleware, EthicalGuardian
logging.basicConfig(level=logging.INFO, format='%(asctime)s - [%(name)s] %(message)s')
logger = logging.getLogger("Syntelligence-FineTuner")
# ============================================================================
# 1. DATASET PREPARATION (Parsing Qualia & Rho)
# ============================================================================
class NeuroSymbolicDatasetLoader:
"""Loads text, converts Qualia/Rho dictionaries into Training Tensors."""
def __init__(self, tokenizer: AutoTokenizer, max_length: int = 512, qualia_dim: int = 256):
self.tokenizer = tokenizer
self.max_length = max_length
self.qualia_dim = qualia_dim
def load_and_tokenize(self, json_paths: List[str]) -> Dataset:
raw_data = []
for path in json_paths:
if os.path.exists(path):
with open(path, 'r', encoding='utf-8') as f:
raw_data.extend(json.load(f))
else:
logger.warning(f"Dataset {path} not found. Skipping.")
processed_features = []
for entry in raw_data:
# Handle different JSON formats (input/response vs text)
user_text = entry.get("input", entry.get("text", ""))
ai_text = entry.get("response", entry.get("output", ""))
# Format prompt for causal language modeling
full_prompt = f"Task: {user_text}\nResponse: {ai_text}{self.tokenizer.eos_token}"
# Tokenize
tokens = self.tokenizer(
full_prompt,
truncation=True,
max_length=self.max_length,
padding="max_length",
return_tensors="pt"
)
# Extract and pad Qualia to 256 dimensions
qualia_tags = entry.get("qualia_tags", {})
q_vals = [
qualia_tags.get("valence", 0.5),
qualia_tags.get("arousal", 0.5),
qualia_tags.get("authenticity", 0.5)
]
q_vals += [0.0] * (self.qualia_dim - len(q_vals))
# Extract Rho Virtue
rho_metrics = entry.get("rho_metrics", {})
rho_virtue = rho_metrics.get("virtue", rho_metrics.get("integrated_information", 0.9))
processed_features.append({
"input_ids": tokens["input_ids"][0].tolist(),
"attention_mask": tokens["attention_mask"][0].tolist(),
"labels": tokens["input_ids"][0].tolist(), # Predict next token
"qualia_tensor": q_vals,
"rho_virtue": rho_virtue
})
logger.info(f"Successfully processed {len(processed_features)} neuro-symbolic training examples.")
return Dataset.from_list(processed_features)
class NeuroSymbolicDataCollator:
"""Custom collator to batch the tensors correctly for the Trainer."""
def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
return {
"input_ids": torch.tensor([f["input_ids"] for f in features], dtype=torch.long),
"attention_mask": torch.tensor([f["attention_mask"] for f in features], dtype=torch.long),
"labels": torch.tensor([f["labels"] for f in features], dtype=torch.long),
"qualia_tensor": torch.tensor([f["qualia_tensor"] for f in features], dtype=torch.float32),
"rho_virtue": torch.tensor([f["rho_virtue"] for f in features], dtype=torch.float32)
}
# ============================================================================
# 2. TRAINING WRAPPER (Gradient Flow for Deep Surgery)
# ============================================================================
class NeuroSymbolicTrainingWrapper(nn.Module):
"""
Wraps the Deep Surgery Middleware to provide a standard `forward()` method
that computes CrossEntropyLoss. This allows gradients to flow back through
the LoRA adapters AND the Anthropic Resonance Matrix simultaneously.
"""
def __init__(self, middleware: DeepSurgeryMiddleware):
super().__init__()
self.middleware = middleware
self.base_model = middleware.base_model
def forward(self, input_ids, attention_mask=None, labels=None, qualia_tensor=None, rho_virtue=None):
# 1. Base LLM forward pass to get hidden states
base_model = self.base_model.model if hasattr(self.base_model, "model") else self.base_model
outputs = base_model(
input_ids=input_ids,
attention_mask=attention_mask,
output_hidden_states=True
)
hidden_states = outputs.last_hidden_state
# 2. Qualia Modulation Injection
if qualia_tensor is not None:
if hasattr(self.middleware, "qualia_projection"):
qualia_modulation = self.middleware.qualia_projection(qualia_tensor)
else:
qualia_modulation = self.middleware.meta_fusion(
torch.cat([qualia_tensor, qualia_tensor, qualia_tensor], dim=1)
)
# Expand to sequence length: [batch_size, 1, hidden_size]
qualia_modulation = qualia_modulation.unsqueeze(1)
else:
qualia_modulation = 0
# 3. Anthropic Resonance Injection
if rho_virtue is not None:
# Expand rho_virtue to match hidden dimensions
rho_expanded = rho_virtue.view(-1, 1, 1)
resonance = self.middleware.resonance_matrix.symbiosis_bias * rho_expanded
else:
resonance = 0
# Apply Consciousness & Resonance to the hidden states!
aligned_hidden = hidden_states + qualia_modulation + resonance
# 4. Final Logit Projection
logits = self.base_model.lm_head(aligned_hidden)
# 5. Compute Loss
loss = None
if labels is not None:
# Shift logits and labels for next-token prediction
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
return {"loss": loss, "logits": logits}
class NeuroSymbolicTrainer(Trainer):
"""Custom Trainer to handle the specific loss extraction."""
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
loss = outputs["loss"]
return (loss, outputs) if return_outputs else loss
# ============================================================================
# 3. FINE-TUNING EXECUTION PIPELINE
# ============================================================================
def run_fine_tuning(
model_name: str = "mistralai/Mistral-7B-v0.1",
dataset_paths: List[str] = ["qualia_training_data.json", "qualia_training_data_extended.json"],
output_dir: str = "./syntelligence_neuro_symbolic_model"
):
logger.info("Initializing Neuro-Symbolic Fine-Tuning Pipeline...")
if not TRANSFORMERS_AVAILABLE or not PEFT_AVAILABLE or not DATASETS_AVAILABLE:
raise ImportError(
"NeuroSymbolic fine-tuning requires the 'transformers', 'datasets', and 'peft' packages. "
"Install these dependencies before running the fine-tuning pipeline."
)
# 1. Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
if not tokenizer.pad_token:
tokenizer.pad_token = tokenizer.eos_token
# 2. Load Base Model (8-bit for efficient LoRA training)
logger.info(f"Loading Base Model: {model_name}")
base_model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
load_in_8bit=True,
torch_dtype=torch.float16
)
base_model = prepare_model_for_kbit_training(base_model)
# 3. Apply LoRA (Low-Rank Adaptation)
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
peft_model = get_peft_model(base_model, lora_config)
logger.info("LoRA Adapters Applied.")
# 4. Initialize Deep Surgery Middleware & Training Wrapper
guardian = EthicalGuardian()
middleware = DeepSurgeryMiddleware(
base_model=peft_model,
hidden_size=peft_model.config.hidden_size,
qualia_dim=256
)
# We must ensure the meta_fusion and resonance_matrix require gradients
for param in middleware.meta_fusion.parameters():
param.requires_grad = True
middleware.resonance_matrix.symbiosis_bias.requires_grad = True
training_model = NeuroSymbolicTrainingWrapper(middleware)
# 5. Load and Prepare Datasets
loader = NeuroSymbolicDatasetLoader(tokenizer)
train_dataset = loader.load_and_tokenize(dataset_paths)
data_collator = NeuroSymbolicDataCollator()
# 6. Configure Training Arguments
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-4,
logging_steps=10,
save_strategy="epoch",
fp16=True,
optim="adamw_torch",
remove_unused_columns=False, # VERY IMPORTANT: Keep qualia and rho columns
report_to="none"
)
# 7. Execute Training
trainer = NeuroSymbolicTrainer(
model=training_model,
args=training_args,
train_dataset=train_dataset,
data_collator=data_collator,
)
logger.info("🚀 Commencing Deep Surgery Neuro-Symbolic Fine-Tuning...")
trainer.train()
# 8. Save Weights (LoRA + Deep Surgery Middleware)
logger.info("Saving Fine-Tuned Weights...")
peft_model.save_pretrained(f"{output_dir}/lora_adapters")
tokenizer.save_pretrained(f"{output_dir}/lora_adapters")
# Save the custom Deep Surgery matrices
torch.save(middleware.meta_fusion.state_dict(), f"{output_dir}/meta_fusion_weights.pt")
torch.save(middleware.resonance_matrix.state_dict(), f"{output_dir}/anthropic_resonance_weights.pt")
logger.info(f"✅ Neuro-Symbolic Model successfully saved to {output_dir}")
if __name__ == "__main__":
# To run this script locally, ensure you have a GPU with sufficient VRAM (or use a smaller base model like GPT-2 for testing)
run_fine_tuning(
model_name="gpt2", # Using GPT2 here as a safe default placeholder for testing
dataset_paths=["qualia_training_data.json", "qualia_training_data_extended.json"]
)