Wave 13: serverless DiLoCo + replaysim normalization + 3 distillation losses + PRIME-RL + Monarch

b266c31 about 1 month ago

2.56 kB

	"""Monarch actor skeletons — DESIGN/SKELETON for v0.

	Per ADR-006, full Monarch integration is deferred to v0.2+. This file
	documents the actor signatures so the framework's recipe matrix is
	complete.

	Importing this module does NOT require monarch installed; the imports
	are deferred inside class bodies. Real instantiation will fail without
	monarch, which is the desired behavior for a recipe document.
	"""
	from __future__ import annotations

	from typing import Any


	class TrainerActor:
	"""Hosts the framework's 3-channel composer trainer.

	Real implementation (v0.2+):

	from monarch import Actor, endpoint

	class TrainerActor(Actor):
	@endpoint
	async def train_outer_step(self, batch_id: int) -> dict:
	# 1. Pull batch from generator
	# 2. Run inner H steps with composer compose_loss
	# 3. Compute pseudo-gradient
	# 4. Hand to ObjectStoreAllReduce manager
	# 5. Apply outer SGD step
	# 6. Return metrics dict
	...

	For v0 the actor is just a documentation stub.
	"""
	backend = "monarch"
	role = "trainer"

	def __init__(self) -> None:
	raise NotImplementedError(
	"Monarch trainer actor is a v0 skeleton; implementation "
	"deferred to v0.2 per ADR-006."
	)

	async def train_outer_step(self, batch_id: int) -> dict[str, Any]:
	raise NotImplementedError


	class GeneratorActor:
	"""vllm-backed rollout actor."""
	backend = "monarch"
	role = "generator"

	def __init__(self) -> None:
	raise NotImplementedError("v0 skeleton — see ADR-006.")

	async def rollout(self, prompts: list[str]) -> list[str]:
	raise NotImplementedError


	class RewarderActor:
	"""verifiers-protocol rewarder for RLVR-style RL."""
	backend = "monarch"
	role = "rewarder"

	def __init__(self) -> None:
	raise NotImplementedError("v0 skeleton — see ADR-006.")

	async def score(self, completions: list[str]) -> list[float]:
	raise NotImplementedError


	class TeacherPoolActor:
	"""Channel-3 teacher pool — wraps composer_replication.teacher_replay."""
	backend = "monarch"
	role = "teacher_pool"

	def __init__(self) -> None:
	raise NotImplementedError("v0 skeleton — see ADR-006.")

	async def replay(self, states: list[dict]) -> list[dict]:
	raise NotImplementedError


	__all__ = [
	"GeneratorActor",
	"RewarderActor",
	"TeacherPoolActor",
	"TrainerActor",
	]