from datasets import load_dataset from transformers import ( AutoTokenizer, AutoModelForMaskedLM, Trainer, TrainingArguments ) import numpy as np, torch from scipy.fft import fft, fftfreq phi = (1 + np.sqrt(5)) / 2 # ----------------------------- # Load model + dataset # ----------------------------- model_name = "antonypamo/ProSavantEngine_Phi9_4" dataset = load_dataset("antonypamo/savantorganized", data_files="data/corpus_balanced_phi95.jsonl") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForMaskedLM.from_pretrained(model_name) model.config.is_decoder = False model.config.add_cross_attention = False # ----------------------------- # Tokenize # ----------------------------- def tokenize_function(examples): return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128) tokenized = dataset["train"].map(tokenize_function, batched=True, remove_columns=["text"]) tokenized.set_format("torch") # ----------------------------- # Φ-weighted resonance metric # ----------------------------- def phi_weighted_coherence(hidden_states): h = hidden_states.mean(1).detach().cpu().numpy() s = np.abs(fft(h.mean(axis=1))) f = fftfreq(len(s), d=1.0)[:len(s)//2] a = s[:len(f)] w = np.cos(f*np.pi/phi)**2 return float(np.dot(a,w)/(np.linalg.norm(a)*np.linalg.norm(w))) class PhiTrainer(Trainer): def compute_loss(self, model, inputs, return_outputs=False): outputs = model(**inputs, output_hidden_states=True) base_loss = outputs.loss hidden = torch.stack(outputs.hidden_states).mean(dim=0) phi_score = phi_weighted_coherence(hidden) loss = base_loss - 0.01 * torch.tensor(phi_score, device=base_loss.device) return (loss, outputs) if return_outputs else loss # ----------------------------- # TrainingArguments # ----------------------------- training_args = TrainingArguments( output_dir="./outputs_phi9_4", evaluation_strategy="epoch", save_strategy="epoch", learning_rate=3e-5, per_device_train_batch_size=16, per_device_eval_batch_size=16, num_train_epochs=3, weight_decay=0.01, logging_steps=50, push_to_hub=True, hub_model_id="antonypamo/ProSavantEngine_Phi9_4", ) # ----------------------------- # Run Trainer # ----------------------------- trainer = PhiTrainer( model=model, args=training_args, train_dataset=tokenized, eval_dataset=tokenized.select(range(512)) ) trainer.train() trainer.push_to_hub(commit_message="Φ9.4 — Fine-tuned with geometric resonance alignment")