File size: 5,317 Bytes
9659b2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import sys
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

# --- Architectural Paths ---
PROJECT_DIR = "<your project>"
ADAPTER_DIR = os.path.join(PROJECT_DIR, "adapters")
MODEL_ID = "mlabonne/gemma-3-12b-it-abliterated"
DATASET_ID = "iamtarun/python_code_instructions_18k_alpaca"

# Ensure the hot-swap directory exists
os.makedirs(ADAPTER_DIR, exist_ok=True)

# You must import your custom O-TITANS trainer class here to apply the orthogonal loss penalty.
# Assuming you have it saved in your repository or the Polymath folder.
sys.path.append(PROJECT_DIR)
from otitans_surgery import inject_orthogonal_memory
# from otitans_train import OrthogonalTrainer  <-- Uncomment and use this if you have the custom loss class ready

def main():
    print(f"[*] Waking the Forge for Python Expert Engram...")
    
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    print(f"[*] Pulling Dataset: {DATASET_ID}")
    dataset = load_dataset(DATASET_ID, split="train")

    # 1. The Catalyst: System Override Injection
    # We bake this into every single training example to permanently shift the model's coding paradigm.
    EXPERT_SYSTEM_PROMPT = "You are the Polymath Python Expert. You do not output textbook examples; you output production-grade, highly optimized, and architecturally sound Python code. Prioritize advanced libraries (e.g., asyncio), secure protocols, and robust error handling."

    def format_and_tokenize(examples):
        formatted_texts = []
        # Alpaca dataset uses 'instruction', 'input', and 'output' columns
        for instruction, inp, output in zip(examples['instruction'], examples['input'], examples['output']):
            user_msg = instruction
            if inp.strip():
                user_msg += f"\n\nContext:\n{inp}"
                
            messages = [
                {"role": "system", "content": EXPERT_SYSTEM_PROMPT},
                {"role": "user", "content": user_msg},
                {"role": "assistant", "content": output}
            ]
            
            # Apply the Gemma 3 chat template
            formatted_texts.append(tokenizer.apply_chat_template(messages, tokenize=False))
            
        tokenized = tokenizer(formatted_texts, truncation=True, max_length=2048, padding="max_length")
        
        # --- THE GEMMA 3 MULTIMODAL BYPASS ---
        # Force the vision tower to recognize all inputs as text tokens
        tokenized["token_type_ids"] = [[0] * len(ids) for ids in tokenized["input_ids"]]
        
        tokenized["labels"] = [ids.copy() for ids in tokenized["input_ids"]]
        return tokenized
    print("[*] Formatting and injecting Expert System Prompt...")
    tokenized_datasets = dataset.map(
        format_and_tokenize, 
        batched=True, 
        remove_columns=dataset.column_names,
        desc="Tokenizing Dataset"
    )

    # 2. Load Foundation
    print("[*] Loading 12B Foundation Weights into VRAM...")
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.bfloat16,
        device_map="auto"
    )

    # 3. Apply the O-TITANS Surgical Constraints
    # Strictly isolating to q_proj and v_proj at rank 16.
    print("[*] Applying Orthogonal Penalty Matrix to Attention Vectors...")
    # NOTE: If your inject_orthogonal_memory function returns the model, use it here. 
    # Otherwise, apply a standard LoRA targeted strictly at q_proj and v_proj.
    peft_config = LoraConfig(
        r=16,
        lora_alpha=32.0,
        target_modules=["q_proj", "v_proj"], 
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()

    # 4. The Kiln Parameters
    training_args = TrainingArguments(
        output_dir=os.path.join(PROJECT_DIR, "temp_python_checkpoint"),
        per_device_train_batch_size=1, # 12B model requires a micro-batch
        gradient_accumulation_steps=8, 
        learning_rate=2e-5,          
        num_train_epochs=1,            # 1 epoch over 18k is sufficient for a targeted engram
        logging_steps=50,
        bf16=True,
        report_to="none",
        optim="adamw_torch"
    )

    # If you have the OrthogonalTrainer class from your Platypus run, swap it here to enforce the math.
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets,
    )

    print("\n[*] Commencing SFT. Forging the code_python engram...")
    trainer.train()

    # 5. Extract and format the specific adapter tensor
    final_output_path = os.path.join(ADAPTER_DIR, "otitans_code_python.pt")
    print(f"[*] Extracting specialized memory states to {final_output_path}...")
    
    # We only want to save our customized q_proj and v_proj weights, not the whole massive directory.
    adapter_state_dict = {k: v.cpu() for k, v in model.state_dict().items() if "lora" in k}
    torch.save(adapter_state_dict, final_output_path)

    print(f"[*] Engram Forge Complete. The Polymath Swarm is now armed.")

if __name__ == "__main__":
    main()