aether-core / aether_train_cpu.py

Upload aether_train_cpu.py

63710ed verified 14 days ago

13.1 kB

	#!/usr/bin/env python3
	"""
	AETHER Training — CPU Compatible.
	Runs on CPU, Colab CPU, or any machine. Slower but works everywhere.
	Uses SFT (Supervised Fine-Tuning) instead of GRPO for CPU efficiency.
	"""

	import os
	import json
	import logging
	from typing import List

	import torch
	from datasets import load_dataset, Dataset
	from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
	from trl import SFTTrainer, SFTConfig

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger("AETHER.CPU")


	def prepare_aether_dataset():
	"""Create AETHER-specific reasoning dataset."""

	# Try DeepMath first
	try:
	ds = load_dataset("trl-lib/DeepMath-103K", split="train")
	logger.info(f"Loaded DeepMath: {len(ds)} examples")

	# Convert to text format for SFT
	def format_example(ex):
	prompt = ex.get("prompt", "")
	if isinstance(prompt, list): # messages format
	prompt = "\n".join(m.get("content", "") for m in prompt if m.get("role") == "user")
	solution = ex.get("solution", "")
	return {
	"text": f"### Instruction:\n{prompt}\n\n### Response:\n<think>\nLet me think step by step about this problem.\n\nStep 1: Analyze the given information.\nStep 2: Apply relevant mathematical principles.\nStep 3: Calculate the result carefully.\n\nTherefore, the answer is {solution}.\n</think>\n\nThe answer is {solution}."
	}

	ds = ds.map(format_example)
	ds = ds.remove_columns([c for c in ds.column_names if c != "text"])
	return ds

	except Exception as e:
	logger.warning(f"DeepMath failed: {e}")

	# Fallback: AETHER reasoning dataset
	logger.info("Creating AETHER reasoning dataset...")

	reasoning_examples = [
	{
	"text": "### Instruction:\nThink step by step: What is 17 + 25?\n\n### Response:\n<think>\nStep 1: Break down the numbers. 17 + 25.\nStep 2: Add the ones place: 7 + 5 = 12. Write down 2, carry 1.\nStep 3: Add the tens place: 1 + 2 + 1 (carry) = 4.\nStep 4: Combine: 42.\n\nTherefore, 17 + 25 = 42.\n</think>\n\nThe answer is 42."
	},
	{
	"text": "### Instruction:\nPlan and reason: A farmer has 3 fields. Each produces 42 bushels. Total?\n\n### Response:\n<think>\nStep 1: Identify given information. 3 fields, 42 bushels each.\nStep 2: Recognize this as multiplication: 3 × 42.\nStep 3: Calculate: 3 × 40 = 120, 3 × 2 = 6.\nStep 4: Sum: 120 + 6 = 126.\n\nTherefore, total bushels = 126.\n</think>\n\nThe answer is 126 bushels."
	},
	{
	"text": "### Instruction:\nReflect and improve: Your answer was 50. Correct answer is 60. What went wrong?\n\n### Response:\n<think>\nStep 1: Evaluate the error. The difference is 10 (60 - 50 = 10).\nStep 2: Check calculation steps. Likely missed adding a carry or misread a digit.\nStep 3: Identify root cause: Insufficient verification of intermediate steps.\nStep 4: Improvement plan: Always verify by reverse calculation (60 - 10 = 50 confirms error magnitude).\n\nTherefore, I need to improve my verification process.\n</think>\n\nI made a calculation error, likely missing a carry. I'll add verification steps in future."
	},
	{
	"text": "### Instruction:\nWhy does recursive self-improvement require safety constraints?\n\n### Response:\n<think>\nStep 1: Define recursive self-improvement. A system that modifies its own architecture to improve performance.\nStep 2: Consider failure modes. Unconstrained modification could: (a) remove safety checks, (b) optimize for wrong metrics, (c) cause instability.\nStep 3: Analyze historical examples. Unconstrained optimization leads to exploitation of loopholes (reward hacking).\nStep 4: Apply control theory. Any self-modifying system needs invariant constraints to maintain stability.\nStep 5: Propose solution. Sandboxed evaluation, human oversight, and constrained mutation operators.\n\nTherefore, safety constraints are necessary to maintain beneficial behavior during self-improvement.\n</think>\n\nSafety constraints prevent harmful self-modification, maintain stability, and ensure alignment with human values during recursive improvement."
	},
	{
	"text": "### Instruction:\nHow would you build a self-evolving AI system? Break into phases.\n\n### Response:\n<think>\nPhase 1 (Foundation): Establish base architecture with neuro-symbolic integration. Include knowledge graph, neural networks, and symbolic reasoning.\nPhase 2 (Evolution Loop): Implement MAP-Elites archive for diversity preservation. Use Performance-Novelty selection.\nPhase 3 (Hierarchical Control): Deploy macro-policy for blueprint generation and micro-policy for execution. Apply iterative co-evolution.\nPhase 4 (Multi-Agent): Coordinate manager, executor, and developer agents. Enable experience sharing between agents.\nPhase 5 (Safety): Add sandboxed evaluation, architecture validation, and audit logging. Require human oversight for major changes.\nPhase 6 (Meta-Cognition): Implement self-reflection to analyze performance trends and generate evolution directives.\n\nTherefore, a self-evolving AI requires careful balance between capability and safety.\n</think>\n\nA self-evolving AI needs: neuro-symbolic foundation, evolutionary loop, hierarchical control, multi-agent coordination, safety constraints, and meta-cognitive reflection."
	},
	{
	"text": "### Instruction:\nTrade-offs between symbolic and neural reasoning?\n\n### Response:\n<think>\nStep 1: Symbolic reasoning strengths. Interpretable, exact, compositional, supports formal verification.\nStep 2: Symbolic weaknesses. Brittle with noisy data, requires manual rule engineering, limited pattern generalization.\nStep 3: Neural reasoning strengths. Robust to noise, learns from data, excellent pattern recognition, handles unstructured input.\nStep 4: Neural weaknesses. Opaque (black box), no formal guarantees, can hallucinate, requires massive data.\nStep 5: Integration approach. Use neural for perception and pattern learning, symbolic for reasoning and verification.\n\nTherefore, neuro-symbolic integration combines the best of both paradigms.\n</think>\n\nSymbolic = interpretable/exact but brittle. Neural = robust/flexible but opaque. Integration is optimal."
	},
	{
	"text": "### Instruction:\nDesign a multi-agent system with manager and workers.\n\n### Response:\n<think>\nBlueprint:\n1. Manager Agent (Macro-Policy): Decomposes tasks, generates sub-goal blueprints, coordinates workers.\n2. Executor Agent (Micro-Policy): Executes atomic actions conditioned on active sub-goal.\n3. Developer Agent: Creates new tools when capability gaps detected.\n4. Integrator Agent: Merges worker outputs into coherent final response.\n5. Safety Agent: Validates all outputs against constraints before deployment.\n\nCoordination: Manager uses dynamic routing weights to assign tasks. Workers report progress. Experience sharing via group traces.\n\nTraining: Use MLPO — train only the manager with GRPO, peers remain untrained for efficiency.\n</think>\n\nMulti-agent system: Manager (planning), Executor (action), Developer (tools), Integrator (merging), Safety (validation). Train manager only via MLPO."
	},
	{
	"text": "### Instruction:\nEvolve a sorting algorithm iteratively.\n\n### Response:\n<think>\nGeneration 0: Bubble Sort. Simple but O(n²).\n\nGeneration 1 (Mutation): Quick Sort. Divide and conquer. O(n log n) average.\nEvaluation: Better average case but O(n²) worst case. Unstable.\n\nGeneration 2 (Crossover): Merge Sort. Guaranteed O(n log n). Stable.\nEvaluation: Consistent performance. Needs O(n) extra space.\n\nGeneration 3 (Innovation): Timsort. Hybrid of merge + insertion.\nEvaluation: O(n log n) worst, O(n) best. Adaptive to partially sorted data.\n\nTherefore, evolution yields progressively better solutions through mutation, evaluation, and selection.\n</think>\n\nEvolution path: Bubble → Quick → Merge → Timsort. Each generation evaluated for time complexity, stability, and space usage."
	},
	{
	"text": "### Instruction:\nPenguins are birds. All birds can fly. What about penguins?\n\n### Response:\n<think>\nStep 1: Parse premises. P(Bird\|Penguin) = 1, P(Fly\|Bird) = 1.\nStep 2: Apply transitive reasoning. If A→B and B→C, then A→C.\nStep 3: Derive conclusion. Penguin → Bird → Fly, therefore Penguin → Fly.\nStep 4: Check against knowledge. Real-world penguins cannot fly — premises contain false generalization.\nStep 5: Identify failure. The premise \"all birds can fly\" is incorrect (counterexample: penguins, ostriches).\n\nTherefore, symbolic reasoning reveals the inconsistency in the premises. The conclusion is invalid due to false premise.\n</think>\n\nThe conclusion that penguins can fly is invalid because the premise \"all birds can fly\" is false. Penguins are a counterexample."
	},
	{
	"text": "### Instruction:\nEvaluate your own reasoning and identify biases.\n\n### Response:\n<think>\nSelf-Evaluation:\n1. Confirmation bias: Do I favor evidence supporting my initial hypothesis? Check by actively seeking disconfirming evidence.\n2. Anchoring bias: Am I over-relying on the first piece of information? Mitigate by considering multiple starting points.\n3. Availability bias: Am I using easily recalled examples instead of statistical data? Counter with systematic data collection.\n4. Overconfidence: Is my confidence calibrated to actual accuracy? Use prediction markets or track calibration scores.\n5. Framing effects: Does problem phrasing influence my conclusion? Test with multiple equivalent formulations.\n\nImprovement: Apply adversarial reasoning — deliberately argue against my own conclusion.\n\nTherefore, meta-cognitive reflection improves reasoning quality through bias identification.\n</think>\n\nKey biases: confirmation, anchoring, availability, overconfidence, framing. Mitigation: adversarial reasoning, systematic data, calibration tracking."
	},
	]

	# Replicate 100x for dataset size
	reasoning_examples = reasoning_examples * 100

	ds = Dataset.from_list(reasoning_examples)
	return ds


	def main():
	MODEL_NAME = os.environ.get("AETHER_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
	OUTPUT_DIR = os.environ.get("AETHER_OUTPUT", "./aether-output-cpu")
	HUB_MODEL_ID = os.environ.get("AETHER_HUB_ID", "camdog920/aether-qwen-0.5b-sft")

	logger.info("=" * 60)
	logger.info("AETHER CPU Training — SFT with Reasoning Dataset")
	logger.info("=" * 60)
	logger.info(f"Model: {MODEL_NAME}")
	logger.info(f"Output: {OUTPUT_DIR}")
	logger.info(f"Hub: {HUB_MODEL_ID}")

	device = "cuda" if torch.cuda.is_available() else "cpu"
	logger.info(f"Device: {device}")

	# Load model (fp32 for CPU, bf16 for CUDA)
	logger.info("Loading model...")
	dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=dtype,
	device_map="auto" if torch.cuda.is_available() else None,
	trust_remote_code=True,
	)
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Load dataset
	logger.info("Preparing dataset...")
	dataset = prepare_aether_dataset()
	dataset = dataset.train_test_split(test_size=0.1)
	train_ds = dataset["train"]
	eval_ds = dataset["test"]
	logger.info(f"Train: {len(train_ds)}, Eval: {len(eval_ds)}")

	# Training args
	training_args = SFTConfig(
	output_dir=OUTPUT_DIR,
	num_train_epochs=3, # More epochs for smaller dataset
	per_device_train_batch_size=2,
	per_device_eval_batch_size=2,
	gradient_accumulation_steps=4,
	learning_rate=2e-5,
	logging_steps=10,
	save_steps=200,
	eval_strategy="steps",
	eval_steps=100,
	max_seq_length=512,
	bf16=torch.cuda.is_available(),
	fp16=not torch.cuda.is_available(),
	report_to=[],
	disable_tqdm=False,
	logging_first_step=True,
	push_to_hub=True,
	hub_model_id=HUB_MODEL_ID,
	)

	# Trainer
	logger.info("Initializing SFT Trainer...")
	trainer = SFTTrainer(
	model=model,
	args=training_args,
	train_dataset=train_ds,
	eval_dataset=eval_ds,
	tokenizer=tokenizer,
	)

	# Train
	logger.info("Starting training...")
	trainer.train()

	# Save
	logger.info("Saving model...")
	trainer.save_model(OUTPUT_DIR)
	tokenizer.save_pretrained(OUTPUT_DIR)

	logger.info("=" * 60)
	logger.info("Training complete!")
	logger.info(f"Model: https://huggingface.co/{HUB_MODEL_ID}")
	logger.info("=" * 60)


	if __name__ == "__main__":
	main()