File size: 5,317 Bytes
9659b2b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | import os
import sys
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
# --- Architectural Paths ---
PROJECT_DIR = "<your project>"
ADAPTER_DIR = os.path.join(PROJECT_DIR, "adapters")
MODEL_ID = "mlabonne/gemma-3-12b-it-abliterated"
DATASET_ID = "iamtarun/python_code_instructions_18k_alpaca"
# Ensure the hot-swap directory exists
os.makedirs(ADAPTER_DIR, exist_ok=True)
# You must import your custom O-TITANS trainer class here to apply the orthogonal loss penalty.
# Assuming you have it saved in your repository or the Polymath folder.
sys.path.append(PROJECT_DIR)
from otitans_surgery import inject_orthogonal_memory
# from otitans_train import OrthogonalTrainer <-- Uncomment and use this if you have the custom loss class ready
def main():
print(f"[*] Waking the Forge for Python Expert Engram...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print(f"[*] Pulling Dataset: {DATASET_ID}")
dataset = load_dataset(DATASET_ID, split="train")
# 1. The Catalyst: System Override Injection
# We bake this into every single training example to permanently shift the model's coding paradigm.
EXPERT_SYSTEM_PROMPT = "You are the Polymath Python Expert. You do not output textbook examples; you output production-grade, highly optimized, and architecturally sound Python code. Prioritize advanced libraries (e.g., asyncio), secure protocols, and robust error handling."
def format_and_tokenize(examples):
formatted_texts = []
# Alpaca dataset uses 'instruction', 'input', and 'output' columns
for instruction, inp, output in zip(examples['instruction'], examples['input'], examples['output']):
user_msg = instruction
if inp.strip():
user_msg += f"\n\nContext:\n{inp}"
messages = [
{"role": "system", "content": EXPERT_SYSTEM_PROMPT},
{"role": "user", "content": user_msg},
{"role": "assistant", "content": output}
]
# Apply the Gemma 3 chat template
formatted_texts.append(tokenizer.apply_chat_template(messages, tokenize=False))
tokenized = tokenizer(formatted_texts, truncation=True, max_length=2048, padding="max_length")
# --- THE GEMMA 3 MULTIMODAL BYPASS ---
# Force the vision tower to recognize all inputs as text tokens
tokenized["token_type_ids"] = [[0] * len(ids) for ids in tokenized["input_ids"]]
tokenized["labels"] = [ids.copy() for ids in tokenized["input_ids"]]
return tokenized
print("[*] Formatting and injecting Expert System Prompt...")
tokenized_datasets = dataset.map(
format_and_tokenize,
batched=True,
remove_columns=dataset.column_names,
desc="Tokenizing Dataset"
)
# 2. Load Foundation
print("[*] Loading 12B Foundation Weights into VRAM...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto"
)
# 3. Apply the O-TITANS Surgical Constraints
# Strictly isolating to q_proj and v_proj at rank 16.
print("[*] Applying Orthogonal Penalty Matrix to Attention Vectors...")
# NOTE: If your inject_orthogonal_memory function returns the model, use it here.
# Otherwise, apply a standard LoRA targeted strictly at q_proj and v_proj.
peft_config = LoraConfig(
r=16,
lora_alpha=32.0,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
# 4. The Kiln Parameters
training_args = TrainingArguments(
output_dir=os.path.join(PROJECT_DIR, "temp_python_checkpoint"),
per_device_train_batch_size=1, # 12B model requires a micro-batch
gradient_accumulation_steps=8,
learning_rate=2e-5,
num_train_epochs=1, # 1 epoch over 18k is sufficient for a targeted engram
logging_steps=50,
bf16=True,
report_to="none",
optim="adamw_torch"
)
# If you have the OrthogonalTrainer class from your Platypus run, swap it here to enforce the math.
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets,
)
print("\n[*] Commencing SFT. Forging the code_python engram...")
trainer.train()
# 5. Extract and format the specific adapter tensor
final_output_path = os.path.join(ADAPTER_DIR, "otitans_code_python.pt")
print(f"[*] Extracting specialized memory states to {final_output_path}...")
# We only want to save our customized q_proj and v_proj weights, not the whole massive directory.
adapter_state_dict = {k: v.cpu() for k, v in model.state_dict().items() if "lora" in k}
torch.save(adapter_state_dict, final_output_path)
print(f"[*] Engram Forge Complete. The Polymath Swarm is now armed.")
if __name__ == "__main__":
main()
|