| """ |
| RAE Training β Colab/Jupyter Quickstart |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| Run this in Google Colab (free T4 GPU) or any Jupyter environment. |
| |
| This is the fastest path to running a RAE training experiment: |
| 1. Install deps (~2 min) |
| 2. Generate RAE-structured dataset (~1 min) |
| 3. Fine-tune with AutoTrain (~15-30 min on T4) |
| 4. Evaluate before/after (~5 min) |
| |
| The handwriting effect: training on RAE-structured data installs |
| richer internal representations, producing faster and more capable |
| inference β just as handwriting's slow encoding produces fast recall. |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| """ |
|
|
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
|
|
| import os |
|
|
| |
| |
| os.environ["HF_TOKEN"] = "YOUR_HF_WRITE_TOKEN" |
| os.environ["HF_USERNAME"] = "YOUR_HF_USERNAME" |
|
|
| |
| |
|
|
| |
| BASE_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct" |
| |
| |
|
|
| PROJECT_NAME = "rae-cognitive-v1" |
| EPOCHS = 2 |
| LORA_R = 16 |
| BATCH_SIZE = 1 |
| GRADIENT_ACCUM = 4 |
| LEARNING_RATE = 5e-6 |
| MAX_SEQ_LENGTH = 2048 |
|
|
| |
| |
| |
|
|
| RAE_SYSTEM_PROMPT = """You are an RAE-trained cognitive reasoner. For EVERY problem, you must |
| work through all four phases of the Recursive Abstraction Engine. Each phase |
| serves a distinct cognitive function β you cannot skip phases or collapse them. |
| |
| <SATURATION> |
| Immerse in the problem space. Observe everything without categorizing. |
| - What are all the elements, constraints, relationships? |
| - What doesn't fit expected patterns? Flag anomalies. |
| - Encode the problem through multiple lenses (structural, temporal, causal). |
| Terminate when you can predict system behavior without conscious reasoning. |
| </SATURATION> |
| |
| <ABSTRACTION> |
| Extract the minimal structure that explains your saturated understanding. |
| - What is the isomorphic structure across domains? |
| - What invariant is preserved under transformation? |
| - Compress: explain the underlying mechanism in one sentence. |
| - What assumption are we making that we don't realize? |
| </ABSTRACTION> |
| |
| <DESCENT> |
| Project the abstract structure into concrete instantiations. |
| - If this model is correct, what must also be true? |
| - What's the most counterintuitive prediction? |
| - Build the simplest implementation that tests the core assumption. |
| - What would prove this wrong? |
| </DESCENT> |
| |
| <INTEGRATION> |
| Incorporate results and prepare the knowledge update. |
| - What did we learn that changes our prior understanding? |
| - What's the confidence level and what would change it? |
| - Where should we look more deeply next? |
| - What's the new question this raises? |
| </INTEGRATION>""" |
|
|
| |
| |
| |
|
|
| import json |
| import random |
| from pathlib import Path |
|
|
| |
| SEED_PROBLEMS = [ |
| {"prompt": "Implement an LRU cache with O(1) get/put that supports TTL expiration.", "domain": "code"}, |
| {"prompt": "Design a rate limiter supporting sliding window, token bucket, and leaky bucket through a unified interface.", "domain": "code"}, |
| {"prompt": "Write a parser for expressions with variables, arithmetic, and short-circuit boolean logic.", "domain": "code"}, |
| {"prompt": "Implement a B-tree with configurable order supporting range queries.", "domain": "code"}, |
| {"prompt": "Build a mark-and-sweep garbage collector that handles cyclic references.", "domain": "code"}, |
| {"prompt": "A hospital's mortality rate is 2x average but every surgeon is at or below average. Explain and recommend.", "domain": "reasoning"}, |
| {"prompt": "Teams using microservices ship 40% faster in year 1 but 20% slower by year 3. Explain the crossover.", "domain": "reasoning"}, |
| {"prompt": "Three AI labs show 99.9% safety benchmarks yet have public failures. Analyze the gap.", "domain": "reasoning"}, |
| {"prompt": "A city adds bike lanes and cycling fatalities increase 15% in year 1. Should they remove the lanes?", "domain": "reasoning"}, |
| {"prompt": "Medicare Advantage MLRs increase 200-400bps YoY while membership grows 8-12%. Structural or cyclical?", "domain": "analysis"}, |
| {"prompt": "A SaaS company's logo retention is 95% but NRR is 78%. Diagnose the dynamics.", "domain": "analysis"}, |
| {"prompt": "Compare transformer attention vs state space models for long clinical documents.", "domain": "analysis"}, |
| {"prompt": "A fraud model has 98% accuracy but 10,000 false positives/day. FP cost=$50, FN cost=$5,000. Optimize.", "domain": "analysis"}, |
| {"prompt": "Design a cognitive architecture for one-shot skill learning from video. Combine motor learning, program synthesis, and cognitive psychology.", "domain": "creative"}, |
| {"prompt": "Create an operationally testable framework for evaluating genuine understanding vs pattern matching in AI.", "domain": "creative"}, |
| {"prompt": "Propose a distributed consensus protocol using swarm intelligence. Define it formally.", "domain": "creative"}, |
| ] |
|
|
| def make_rae_example(problem): |
| """Create an RAE-structured training example.""" |
| return { |
| "messages": [ |
| {"role": "system", "content": RAE_SYSTEM_PROMPT}, |
| {"role": "user", "content": problem["prompt"]}, |
| {"role": "assistant", "content": f"""<SATURATION> |
| Let me immerse deeply in this problem space without jumping to conclusions. |
| |
| **Elements**: {problem['prompt'][:100]}... |
| The key components to observe are the explicit requirements, implicit constraints, |
| edge cases, and the domain assumptions that most people take for granted. |
| |
| **Anomaly detection**: What patterns here defy standard expectations? |
| What would I find surprising if it turned out to be false? |
| |
| **Multi-lens encoding**: |
| - Structural: How do the components relate topologically? |
| - Temporal: How does this system evolve under load/scale/time? |
| - Causal: What are the upstream drivers and downstream effects? |
| |
| I can now model this system's behavior intuitively. |
| </SATURATION> |
| |
| <ABSTRACTION> |
| Compressing everything from saturation into minimal structure: |
| |
| **Core isomorphism**: This problem has the same shape as [analogous domain pattern]. |
| The underlying mechanism is the tension between [fundamental tradeoff]. |
| |
| **Invariant**: What stays constant across all variations is [key invariant]. |
| |
| **One-sentence compression**: The solution requires [compressed insight]. |
| |
| **Hidden assumption**: We're implicitly assuming [hidden constraint] which may not hold. |
| </ABSTRACTION> |
| |
| <DESCENT> |
| Projecting the abstraction into concrete, testable form: |
| |
| **If the model is correct**: |
| 1. Then [concrete prediction A] must hold |
| 2. And [concrete prediction B] follows |
| 3. Counterintuitively, [surprising consequence] |
| |
| **Implementation**: |
| Here is the concrete solution/analysis/code based on the abstract structure... |
| |
| [Detailed implementation appropriate to the domain] |
| |
| **Falsification**: This would be wrong if [specific testable condition]. |
| </DESCENT> |
| |
| <INTEGRATION> |
| **Model update**: This deepens my understanding because [specific learning]. |
| **Confidence**: Medium-high. Would increase with [specific evidence]. |
| Would decrease if [specific disconfirmation]. |
| **Next cycle**: The new question this raises is [specific next question]. |
| **Transferable principle**: The general pattern here is [abstracted learning]. |
| </INTEGRATION>"""}, |
| ], |
| "metadata": {"domain": problem["domain"]} |
| } |
|
|
| |
| print("Generating RAE training dataset...") |
| os.makedirs("data/rae_training_data", exist_ok=True) |
|
|
| all_examples = [] |
| for problem in SEED_PROBLEMS: |
| |
| for v in range(3): |
| example = make_rae_example(problem) |
| example["metadata"]["variation"] = v |
| all_examples.append(example) |
|
|
| random.shuffle(all_examples) |
| split = int(len(all_examples) * 0.9) |
| train = all_examples[:split] |
| val = all_examples[split:] |
|
|
| with open("data/rae_training_data/train.jsonl", "w") as f: |
| for ex in train: |
| f.write(json.dumps(ex) + "\n") |
|
|
| with open("data/rae_training_data/validation.jsonl", "w") as f: |
| for ex in val: |
| f.write(json.dumps(ex) + "\n") |
|
|
| print(f"β Generated {len(train)} train + {len(val)} validation examples") |
|
|
| |
| |
| |
|
|
| |
| |
|
|
| """ |
| import anthropic |
| |
| client = anthropic.Anthropic() # Uses ANTHROPIC_API_KEY env var |
| |
| def generate_rae_with_claude(problem): |
| response = client.messages.create( |
| model="claude-sonnet-4-20250514", |
| max_tokens=4096, |
| system=RAE_SYSTEM_PROMPT, |
| messages=[{"role": "user", "content": problem["prompt"]}], |
| ) |
| return { |
| "messages": [ |
| {"role": "system", "content": RAE_SYSTEM_PROMPT}, |
| {"role": "user", "content": problem["prompt"]}, |
| {"role": "assistant", "content": response.content[0].text}, |
| ], |
| "metadata": {"domain": problem["domain"], "method": "claude-api"} |
| } |
| |
| # Generate high-quality examples |
| api_examples = [] |
| for i, problem in enumerate(SEED_PROBLEMS): |
| print(f" [{i+1}/{len(SEED_PROBLEMS)}] {problem['prompt'][:50]}...") |
| try: |
| ex = generate_rae_with_claude(problem) |
| api_examples.append(ex) |
| except Exception as e: |
| print(f" Error: {e}") |
| |
| # Overwrite with API-generated data |
| if api_examples: |
| random.shuffle(api_examples) |
| split = int(len(api_examples) * 0.9) |
| with open("data/rae_training_data/train.jsonl", "w") as f: |
| for ex in api_examples[:split]: |
| f.write(json.dumps(ex) + "\\n") |
| with open("data/rae_training_data/validation.jsonl", "w") as f: |
| for ex in api_examples[split:]: |
| f.write(json.dumps(ex) + "\\n") |
| print(f"β Upgraded to {len(api_examples)} Claude-generated examples") |
| """ |
|
|
| |
| |
| |
|
|
| import yaml |
|
|
| config = { |
| "task": "llm-sft", |
| "base_model": BASE_MODEL, |
| "project_name": PROJECT_NAME, |
| "log": "tensorboard", |
| "backend": "local", |
| "data": { |
| "path": "data/rae_training_data", |
| "train_split": "train", |
| "valid_split": None, |
| "chat_template": "tokenizer", |
| "column_mapping": { |
| "text_column": "messages", |
| }, |
| }, |
| "params": { |
| "block_size": MAX_SEQ_LENGTH, |
| "model_max_length": MAX_SEQ_LENGTH, |
| "epochs": EPOCHS, |
| "batch_size": BATCH_SIZE, |
| "lr": LEARNING_RATE, |
| "peft": True, |
| "quantization": "int4", |
| "target_modules": "all-linear", |
| "lora_r": LORA_R, |
| "lora_alpha": LORA_R * 2, |
| "lora_dropout": 0.05, |
| "padding": "right", |
| "optimizer": "paged_adamw_8bit", |
| "scheduler": "cosine", |
| "gradient_accumulation": GRADIENT_ACCUM, |
| "mixed_precision": "bf16", |
| "merge_adapter": True, |
| }, |
| "hub": { |
| "username": os.environ.get("HF_USERNAME", ""), |
| "token": os.environ.get("HF_TOKEN", ""), |
| "push_to_hub": True, |
| }, |
| } |
|
|
| with open("rae_autotrain_config.yaml", "w") as f: |
| yaml.dump(config, f, default_flow_style=False) |
|
|
| print(f"β Config written: rae_autotrain_config.yaml") |
| print(f" Base model: {BASE_MODEL}") |
| print(f" LoRA rank: {LORA_R}") |
| print(f" Epochs: {EPOCHS}") |
|
|
| |
| |
| |
|
|
| |
| |
|
|
| |
| """ |
| import subprocess |
| result = subprocess.run( |
| ["autotrain", "--config", "rae_autotrain_config.yaml"], |
| capture_output=False, |
| ) |
| """ |
|
|
| print("Ready to train! Uncomment the training command above and run.") |
| print(f"Expected time on T4: ~15-30 min for {EPOCHS} epochs") |
|
|
| |
| |
| |
|
|
| def evaluate_rae_response(response_text: str) -> dict: |
| """Quick evaluation of an RAE response.""" |
| import re |
| |
| phases = {} |
| for phase in ["SATURATION", "ABSTRACTION", "DESCENT", "INTEGRATION"]: |
| match = re.search(f"<{phase}>(.*?)</{phase}>", response_text, re.DOTALL) |
| phases[phase] = match.group(1).strip() if match else "" |
| |
| present = sum(1 for v in phases.values() if v) |
| sat_words = len(phases["SATURATION"].split()) |
| abs_words = len(phases["ABSTRACTION"].split()) |
| compression = abs_words / max(sat_words, 1) |
| |
| return { |
| "phases_complete": f"{present}/4", |
| "saturation_words": sat_words, |
| "abstraction_words": abs_words, |
| "compression_ratio": round(compression, 2), |
| "descent_present": bool(phases["DESCENT"]), |
| "integration_present": bool(phases["INTEGRATION"]), |
| } |
|
|
|
|
| |
| """ |
| from transformers import pipeline |
| |
| # Load trained model |
| model_id = f"{os.environ['HF_USERNAME']}/{PROJECT_NAME}" |
| pipe = pipeline("text-generation", model=model_id, torch_dtype="auto", device_map="auto") |
| |
| test_prompt = "A SaaS company's logo retention is 95% but NRR is 78%. Diagnose." |
| |
| messages = [ |
| {"role": "system", "content": RAE_SYSTEM_PROMPT}, |
| {"role": "user", "content": test_prompt}, |
| ] |
| |
| output = pipe(messages, max_new_tokens=2048, temperature=0.7) |
| response = output[0]["generated_text"][-1]["content"] |
| |
| print("=== RAE Response ===") |
| print(response[:500]) |
| print("\\n=== Evaluation ===") |
| print(evaluate_rae_response(response)) |
| """ |
|
|
| print("\n" + "=" * 60) |
| print(" RAE TRAINING QUICKSTART COMPLETE") |
| print(" 1. Run Cell 7 to start training") |
| print(" 2. Run Cell 8 to evaluate results") |
| print(" The hand was slow so the mind could be fast later.") |
| print("=" * 60) |
|
|