AlexWortega
/

moe100m-physics-tinybpe

Mixture of Experts

Model card Files Files and versions

moe100m-physics-tinybpe / physics_serialize.py

AlexWortega's picture

Upload physics_serialize.py with huggingface_hub

3ab4691 verified 3 days ago

history blame contribute delete

1.98 kB

	"""Shared serialization for the tiny-vocab physics MoE.

	Reuses physics_core.fmt_header / fmt_frame, but reduces every frame's
	free-text description to a tiny controlled keyword set so the learned vocab
	stays simulation-only.

	Controlled description set (after the `Frame N:` token):
	- "at rest" <- "All objects are at rest."
	- "in motion" <- "All objects are in motion."
	- "settling" <- "K of N objects are moving." (partial motion)
	Anything else -> dropped (description omitted; frame still emitted).
	"""
	from __future__ import annotations
	import re
	import physics_core as pc

	_AT_REST = re.compile(r"all objects are at rest", re.I)
	_IN_MOTION = re.compile(r"all objects are in motion", re.I)
	_PARTIAL = re.compile(r"\d+\s+of\s+\d+\s+objects are moving", re.I)


	def reduce_desc(raw: str) -> str:
	"""Map a frame's free-text description to a controlled keyword (or '')."""
	if _AT_REST.search(raw):
	return "at rest"
	if _IN_MOTION.search(raw):
	return "in motion"
	if _PARTIAL.search(raw):
	return "settling"
	return ""


	def fmt_frame_reduced(fr: dict) -> str:
	"""Like pc.fmt_frame but with the description replaced by a keyword."""
	fr2 = dict(fr)
	fr2["description"] = reduce_desc(fr.get("description", ""))
	return pc.fmt_frame(fr2)


	def fmt_header_reduced(header: dict) -> str:
	"""pc.fmt_header with the free-text Scene description blanked out.

	Keeps every structural line (Gravity / Timestep / Type / Difficulty /
	Static / Constraints) so the categorical `Type:` token survives, but the
	`Scene:` line carries no English prose -> vocab stays sim-only.
	"""
	h2 = dict(header)
	h2["description"] = ""
	return pc.fmt_header(h2)


	def serialize_scene(header: dict, frames: list) -> str:
	"""Full scene text: reduced header + reduced frames (no trailing BOS/EOS)."""
	txt = fmt_header_reduced(header)
	txt += "".join(fmt_frame_reduced(fr) for fr in frames)
	return txt