ZAIDX11 commited on Dec 11, 2025

Commit

6d92a68

verified ·

1 Parent(s): 85a5bf3

Add files using upload-large-folder tool

Browse files

Files changed (20) hide show

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/__init__.py +687 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/fragmented_array.py +661 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/profiler.py +289 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/utils.py +699 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/wgmma.py +518 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/__init__.py +48 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/gpu.py +18 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/ops/__init__.py +19 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/ops/gpu/attention.py +573 -0
external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/tpu.py +53 -0
external/alphageometry/README.md +447 -0
external/alphageometry/lm_inference_test.py +89 -0
external/alphageometry/models.py +178 -0
external/alphageometry/numericals.py +1921 -0
external/alphageometry/numericals_test.py +313 -0
external/alphageometry/pretty.py +216 -0
external/alphageometry/problem.py +1133 -0
external/alphageometry/problem_test.py +61 -0
external/alphageometry/requirements.in +17 -0
external/alphageometry/requirements.txt +0 -0

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/__init__.py ADDED Viewed

	@@ -0,0 +1,687 @@

+from collections.abc import Callable
+# Copyright 2024 The JAX Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import contextlib
+import ctypes
+import dataclasses
+import functools
+import itertools
+import os
+import pathlib
+import subprocess
+import tempfile
+import time
+from typing import Any, Generic, Sequence, TypeVar
+import jax
+from jax._src import config
+from jax._src import core as jax_core
+from jax._src.interpreters import mlir
+from jax._src.lib import xla_client
+from jax._src.lib import mosaic_gpu as mosaic_gpu_lib
+from jaxlib.mlir import ir
+from jaxlib.mlir.dialects import arith
+from jaxlib.mlir.dialects import builtin
+from jaxlib.mlir.dialects import func
+from jaxlib.mlir.dialects import gpu
+from jaxlib.mlir.dialects import llvm
+from jaxlib.mlir.dialects import memref
+from jaxlib.mlir.dialects import nvgpu
+from jaxlib.mlir.dialects import nvvm
+from jaxlib.mlir.passmanager import PassManager
+import numpy as np
+from . import dsl as mgpu
+from . import profiler
+from . import utils
+# mypy: ignore-errors
+# MLIR can't find libdevice unless we point it to the CUDA path
+# TODO(apaszke): Unify with jax._src.lib.cuda_path
+CUDA_ROOT = "/usr/local/cuda"
+if os.environ.get("CUDA_ROOT") is None:
+  os.environ["CUDA_ROOT"] = CUDA_ROOT
+else:
+  CUDA_ROOT = os.environ["CUDA_ROOT"]
+PTXAS_PATH = os.path.join(CUDA_ROOT, "bin/ptxas")
+NVDISASM_PATH = os.path.join(CUDA_ROOT, "bin/nvdisasm")
+TMA_DESCRIPTOR_BYTES = 128
+TMA_DESCRIPTOR_ALIGNMENT = 64
+c = mgpu.c  # This is too common to fully qualify.
+RUNTIME_PATH = pathlib.Path(mosaic_gpu_lib._mosaic_gpu_ext.__file__).parent / "libmosaic_gpu_runtime.so"
+if RUNTIME_PATH.exists():
+  # Set this so that the custom call can find it
+  os.environ["MOSAIC_GPU_RUNTIME_LIB_PATH"] = str(RUNTIME_PATH)
+mosaic_gpu_p = jax.core.Primitive("mosaic_gpu_p")
+mosaic_gpu_p.multiple_results = True
+@mosaic_gpu_p.def_abstract_eval
+def _mosaic_gpu_abstract_eval(*_, module, out_types, gmem_scratch_bytes):
+  del module, gmem_scratch_bytes  # Unused.
+  return [jax._src.core.ShapedArray(t.shape, t.dtype) for t in out_types]
+# TODO(apaszke): Implement a proper system for managing kernel lifetimes
+kernel_idx = itertools.count()
+def _mosaic_gpu_lowering_rule(ctx, *args, module, out_types, gmem_scratch_bytes):
+  del out_types  # Unused.
+  idx_bytes = next(kernel_idx).to_bytes(8, byteorder="little")
+  op = mlir.custom_call(
+      "mosaic_gpu",
+      result_types=[
+          *(mlir.aval_to_ir_type(aval) for aval in ctx.avals_out),
+          mlir.aval_to_ir_type(
+              jax_core.ShapedArray((gmem_scratch_bytes,), np.uint8)
+          ),
+      ],
+      operands=args,
+      backend_config=idx_bytes
+      + module.operation.get_asm(binary=True, enable_debug_info=True),
+  )
+  return op.results[:-1]  # Skip the scratch space.
+mlir.register_lowering(mosaic_gpu_p, _mosaic_gpu_lowering_rule, "cuda")
+@dataclasses.dataclass(frozen=True)
+class MemRefTransform:
+  def apply(self, ref: ir.Value) -> ir.Value:
+    raise NotImplementedError("Subclasses should override this method")
+  def transform_index(self, idx: Sequence[ir.Value]) -> tuple[ir.Value, ...]:
+    raise NotImplementedError("Subclasses should override this method")
+  def transform_shape(self, shape: Sequence[int]) -> tuple[int, ...]:
+    raise NotImplementedError("Subclasses should override this method")
+@dataclasses.dataclass(frozen=True)
+class TileTransform(MemRefTransform):
+  """Tiles a suffix of memref dimensions.
+  For example, given a memref of shape (5, 128, 128) and a tiling of (64, 32),
+  the shape of the result will be (5, 2, 4, 64, 32). The shape always ends with
+  the tile shape, and the size of tiled dimensions is divided by the tile size.
+  This is especially useful for swizzled WGMMA, which expect tiled layouts in
+  shared memory.
+  """
+  tiling: tuple[int, ...]
+  def apply(self, ref: ir.Value) -> ir.Value:
+    untiled_rank = ir.MemRefType(ref.type).rank
+    tiling_rank = len(self.tiling)
+    tiled_rank = untiled_rank + tiling_rank
+    for t, d in zip(self.tiling[::-1], range(untiled_rank)[::-1]):
+      ref = mgpu.memref_unfold(ref, d, (None, t))
+    permutation = (
+        *range(untiled_rank - tiling_rank),
+        *range(untiled_rank - tiling_rank, tiled_rank, 2),
+        *range(untiled_rank - tiling_rank + 1, tiled_rank, 2),
+    )
+    return mgpu.memref_transpose(ref, permutation)
+  def transform_index(self, idx: Sequence[ir.Value]) -> tuple[ir.Value, ...]:
+    index = ir.IndexType.get()
+    tiling_rank = len(self.tiling)
+    return (
+        *idx[:-tiling_rank],
+        *(
+            arith.divui(i, c(t, index))
+            for i, t in zip(idx[-tiling_rank:], self.tiling)
+        ),
+        *(
+            arith.remui(i, c(t, index))
+            for i, t in zip(idx[-tiling_rank:], self.tiling)
+        ),
+    )
+  def transform_shape(self, shape: Sequence[int]) -> tuple[int, ...]:
+    # Note that this also checks that tiled dims are not squeezed. Their slice
+    # size would be 1 if so.
+    tiling_rank = len(self.tiling)
+    for size, tile_size in zip(shape[-tiling_rank:], self.tiling):
+      if size % tile_size:
+        raise ValueError(
+            f"Expected GMEM slice shape {shape} suffix to be a multiple"
+            f" of tiling {self.tiling}"
+        )
+    return (
+        *shape[:-tiling_rank],
+        *(s // t for s, t in zip(shape[-tiling_rank:], self.tiling)),
+        *self.tiling,
+    )
+@dataclasses.dataclass(frozen=True)
+class TransposeTransform(MemRefTransform):
+  """Transposes memref dimensions."""
+  permutation: tuple[int, ...]
+  def __post_init__(self):
+    if len(self.permutation) != len(set(self.permutation)):
+      raise ValueError("Permutation must be a permutation")
+  def apply(self, ref: ir.Value) -> ir.Value:
+    return mgpu.memref_transpose(ref, self.permutation)
+  def transform_index(self, idx: Sequence[ir.Value]) -> tuple[ir.Value, ...]:
+    return tuple(idx[p] for p in self.permutation)
+  def transform_shape(self, shape: Sequence[int]) -> tuple[int, ...]:
+    return tuple(shape[p] for p in self.permutation)
+OnDeviceProfiler = profiler.OnDeviceProfiler
+@dataclasses.dataclass()
+class LaunchContext:
+  launch_op: gpu.LaunchOp
+  gmem_scratch_ptr: ir.Value
+  profiler: OnDeviceProfiler | None = None
+  next_scratch_offset: int = 0
+  host_scratch_init: list[Callable[[ir.Value], None]] = dataclasses.field(
+      default_factory=list, init=False
+  )
+  tma_descriptors: dict[
+      tuple[ir.Value, tuple[int, ...], int | None, tuple[MemRefTransform, ...]],
+      ir.Value,
+  ] = dataclasses.field(default_factory=dict, init=False)
+  @contextlib.contextmanager
+  def named_region(self, *args, **kwargs):
+    if self.profiler is not None:
+      with self.profiler.record(*args, **kwargs):
+        yield
+    else:
+      yield
+  def _alloc_scratch(
+      self,
+      size: int,
+      alignment: int | None = None,
+      host_init: Callable[[ir.Value], None] = lambda _: None,
+      device_init: Callable[[ir.Value], Any] = lambda x: x,
+  ) -> ir.Value:
+    """Allocates a GMEM scratch buffer.
+    The buffer is initialized on the host and then copied to GMEM before the
+    kernel launch.
+    """
+    i8 = ir.IntegerType.get_signless(8)
+    ptr_ty = ir.Type.parse("!llvm.ptr")
+    if alignment is None:
+      alignment = size
+    if self.next_scratch_offset % alignment:
+      raise NotImplementedError  # TODO(apaszke): Pad to match alignment
+    alloc_base = self.next_scratch_offset
+    self.next_scratch_offset += size
+    def host_init_wrapped(host_ptr):
+      with ir.InsertionPoint(self.launch_op):
+        host_init(
+            llvm.getelementptr(ptr_ty, host_ptr, [], [alloc_base], i8)
+        )
+    self.host_scratch_init.append(host_init_wrapped)
+    with ir.InsertionPoint.at_block_begin(self.launch_op.body.blocks[0]):
+      return device_init(llvm.getelementptr(
+          ptr_ty, self.gmem_scratch_ptr, [], [alloc_base], i8
+      ))
+  def _get_tma_desc(
+      self,
+      ref,
+      gmem_transform: tuple[MemRefTransform, ...],
+      transformed_slice_shape: tuple[int, ...],
+      swizzle: int | None,
+  ):
+    index = ir.IndexType.get()
+    ref_ty = ir.MemRefType(ref.type)
+    tma_desc_key = (ref, transformed_slice_shape, swizzle, gmem_transform)
+    if (tma_desc := self.tma_descriptors.get(tma_desc_key, None)) is None:
+      swizzle_str = f"swizzle_{swizzle}b" if swizzle is not None else "none"
+      default_tensor_map_attrs = dict(
+          swizzle=swizzle_str, l2promo="none", oob="zero", interleave="none"
+      )
+      tensor_map_ty = utils.get_tensormap_descriptor(
+          tensor=(
+              f"memref<{'x'.join(map(str, transformed_slice_shape))}x{ref_ty.element_type}, 3>"
+          ),
+          **default_tensor_map_attrs,
+      )
+      with ir.InsertionPoint(self.launch_op):
+        for t in gmem_transform:
+          ref = t.apply(ref)
+        ref_ty = ir.MemRefType(ref.type)
+        i64 = ir.IntegerType.get_signless(64)
+        ptr_ty = ir.Type.parse("!llvm.ptr")
+        def init_tma_desc(host_ptr):
+          _, offset, *sizes_and_strides = memref.extract_strided_metadata(ref)
+          aligned_ptr_idx = memref.extract_aligned_pointer_as_index(ref)
+          as_i64 = lambda i: arith.index_cast(i64, i)
+          alloc_ptr = llvm.inttoptr(ptr_ty, as_i64(aligned_ptr_idx))
+          llvm_dyn = -2147483648  # TODO(apaszke): Improve the MLIR bindings...
+          base_ptr = llvm.getelementptr(
+              ptr_ty, alloc_ptr, [as_i64(offset)], [llvm_dyn], ref_ty.element_type,
+          )
+          rank = ref_ty.rank
+          assert rank * 2 == len(sizes_and_strides)
+          args = [
+              host_ptr,
+              base_ptr,
+              c(utils.bytewidth(ref_ty.element_type), i64),
+              c(rank, i64),
+              utils.pack_array([as_i64(i) for i in sizes_and_strides[:rank]]),
+              utils.pack_array([as_i64(i) for i in sizes_and_strides[rank:]]),
+              c(0 if swizzle is None else swizzle, i64),
+              utils.pack_array([c(v, i64) for v in transformed_slice_shape]),
+          ]
+          func.call([], "mosaic_gpu_init_tma_desc", args)
+        def cast_tma_desc(device_ptr):
+          # TODO(apaszke): Investigate why prefetching can cause launch failures
+          # nvvm.prefetch_tensormap(device_ptr)
+          return builtin.unrealized_conversion_cast(
+              [tensor_map_ty], [device_ptr]
+          )
+        tma_desc = self._alloc_scratch(
+            TMA_DESCRIPTOR_BYTES,
+            alignment=TMA_DESCRIPTOR_ALIGNMENT,
+            host_init=init_tma_desc,
+            device_init=cast_tma_desc,
+        )
+        self.tma_descriptors[tma_desc_key] = tma_desc
+    return tma_desc
+  def async_copy(
+      self,
+      *,
+      src_ref,
+      dst_ref,
+      gmem_slice: Any = (),
+      gmem_transform: MemRefTransform | tuple[MemRefTransform, ...] = (),
+      barrier: mgpu.Barrier | None = None,
+      swizzle: int | None = None,
+      arrive: bool | None = None,
+      uniform: bool = True,
+  ):
+    index = ir.IndexType.get()
+    smem = ir.Attribute.parse("#gpu.address_space<workgroup>")
+    src_ref_ty = ir.MemRefType(src_ref.type)
+    dst_ref_ty = ir.MemRefType(dst_ref.type)
+    element_type = src_ref_ty.element_type
+    if element_type != dst_ref_ty.element_type:
+      raise ValueError(
+          f"Expected same element type, got {element_type} and"
+          f" {dst_ref_ty.element_type}"
+      )
+    if not isinstance(gmem_transform, tuple):
+      gmem_transform = (gmem_transform,)
+    if src_ref_ty.memory_space is None and dst_ref_ty.memory_space == smem:
+      gmem_ref, smem_ref = src_ref, dst_ref
+      if barrier is None:
+        raise ValueError("Barriers are required for GMEM -> SMEM copies")
+      if arrive is None:
+        arrive = True  # Arrive by default
+    elif src_ref_ty.memory_space == smem and dst_ref_ty.memory_space is None:
+      gmem_ref, smem_ref = dst_ref, src_ref
+      if barrier is not None:
+        raise ValueError("Barriers are unsupported for SMEM -> GMEM copies")
+      if arrive is not None:
+        raise ValueError("arrive is unsupported for SMEM -> GMEM copies")
+    else:
+      raise ValueError("Only SMEM <-> GMEM copies supported")
+    # TODO(apaszke): This is a very approximate check. Improve it!
+    expected_name = "builtin.unrealized_conversion_cast"
+    if (
+        gmem_ref.owner is None
+        or gmem_ref.owner.opview.OPERATION_NAME != expected_name
+    ):
+      raise ValueError("GMEM reference in async_copy must be a kernel argument")
+    base_indices, slice_shape, is_squeezed = utils.parse_indices(
+        gmem_slice, ir.MemRefType(gmem_ref.type).shape
+    )
+    dyn_base_indices = tuple(
+        c(i, index) if not isinstance(i, ir.Value) else i for i in base_indices
+    )
+    slice_shape = tuple(slice_shape)
+    for t in gmem_transform:
+      dyn_base_indices = t.transform_index(dyn_base_indices)
+      slice_shape = t.transform_shape(slice_shape)
+    for dim, squeezed in enumerate(is_squeezed):
+      if squeezed:
+        smem_ref = mgpu.memref_unsqueeze(smem_ref, dim)
+    smem_ref_ty = ir.MemRefType(smem_ref.type)
+    if slice_shape != tuple(smem_ref_ty.shape):
+      raise ValueError(
+          "Expected the SMEM reference to have the same shape as the tiled"
+          f" slice: {tuple(smem_ref_ty.shape)} != {slice_shape}"
+      )
+    tma_desc = self._get_tma_desc(
+        gmem_ref, gmem_transform, slice_shape, swizzle,
+    )
+    # nvgpu TMA instructions expect reversed indices...
+    rev_dyn_based_indices = reversed(dyn_base_indices)
+    uniform_ctx = mgpu.single_thread if uniform else contextlib.nullcontext
+    if gmem_ref is src_ref:
+      with uniform_ctx():
+        assert barrier is not None  # for pytype
+        barrier_group = barrier.barrier_array.value
+        barrier_idx = barrier.offset
+        if arrive:
+          slice_bytes = c(
+              np.prod(slice_shape) * mgpu.bytewidth(element_type), index
+          )
+          nvgpu.mbarrier_arrive_expect_tx(
+              barrier_group, slice_bytes, barrier_idx
+          )
+        nvgpu.tma_async_load(
+            smem_ref, barrier_group, tma_desc, rev_dyn_based_indices, barrier_idx
+        )
+    else:
+      with uniform_ctx():
+        nvgpu.tma_async_store(smem_ref, tma_desc, rev_dyn_based_indices)
+        nvvm.cp_async_bulk_commit_group()
+  def await_async_copy(
+      self, allow_groups: int, await_read_only: bool = False
+  ):
+    nvvm.cp_async_bulk_wait_group(allow_groups, read=await_read_only)
+    gpu.barrier()  # Groups are supposedly tracked per-thread
+# ShapeTrees currently can not contain unions.
+ShapeTree = Any
+RefTree = Any
+T = TypeVar('T')
+@dataclasses.dataclass(frozen=True)
+class Union(Generic[T]):
+  members: Sequence[T]
+def _count_buffer_bytes(shape_dtype: jax.ShapeDtypeStruct) -> int:
+  return np.prod(shape_dtype.shape) * np.dtype(shape_dtype.dtype).itemsize
+def _construct_smem_reftree(
+    dynamic_smem: ir.Value, smem_buffers: ShapeTree) -> RefTree:
+  index = ir.IndexType.get()
+  smem = ir.Attribute.parse("#gpu.address_space<workgroup>")
+  flat_ref_tys, smem_buffer_tree = jax.tree.flatten(smem_buffers)
+  smem_refs = []
+  dynamic_smem_offset = 0
+  for ref_ty in flat_ref_tys:
+    mlir_dtype = mlir.dtype_to_ir_type(ref_ty.dtype)
+    tile_smem = memref.view(
+        ir.MemRefType.get(ref_ty.shape, mlir_dtype, memory_space=smem),
+        dynamic_smem, c(dynamic_smem_offset, index), [],
+    )
+    dynamic_smem_offset += _count_buffer_bytes(ref_ty)
+    smem_refs.append(tile_smem)
+  return jax.tree.unflatten(smem_buffer_tree, smem_refs)
+# TODO(apaszke): Inline this
+@contextlib.contextmanager
+def _launch(
+    token,
+    grid,
+    block,
+    gmem_scratch_ptr,
+    smem_buffers: ShapeTree | Union[ShapeTree],
+    profiler_spec: profiler.ProfilerSpec | None = None,
+    maybe_prof_buffer: ir.Value | None = None,
+):
+  if (profiler_spec is None) != (maybe_prof_buffer is None):
+    raise ValueError
+  index = ir.IndexType.get()
+  i32 = ir.IntegerType.get_signless(32)
+  i8 = ir.IntegerType.get_signless(8)
+  grid_vals = [c(i, index) for i in grid]
+  block_vals = [c(i, index) for i in block]
+  if isinstance(smem_buffers, Union):
+    smem_disjoint_live_buffers_collections = smem_buffers.members
+    compute_smem_bytes = max(
+        sum(_count_buffer_bytes(l) for l in jax.tree.leaves(s))
+            for s in smem_buffers.members)
+  else:
+    smem_disjoint_live_buffers_collections = [smem_buffers]
+    compute_smem_bytes = sum(
+        _count_buffer_bytes(l) for l in jax.tree.leaves(smem_buffers))
+  smem_bytes = compute_smem_bytes
+  if profiler_spec is not None:
+    smem_bytes += profiler_spec.smem_bytes(block=block)
+  # TODO(cperivol): Query the shared memory size programmatically.
+  if smem_bytes > 228 * 1024:
+    raise ValueError(f"Mosaic GPU kernel exceeds available shared memory {smem_bytes=} > 228000")
+  launch_op = gpu.LaunchOp(
+      token.type, [token], *grid_vals, *block_vals,
+      dynamicSharedMemorySize=c(smem_bytes, i32))
+  launch_op.body.blocks.append(*([index] * 12))  # Append an empty block
+  smem = ir.Attribute.parse("#gpu.address_space<workgroup>")
+  with ir.InsertionPoint(launch_op.body.blocks[0]):
+    dynamic_smem = gpu.dynamic_shared_memory(
+        ir.MemRefType.get(
+            (ir.ShapedType.get_dynamic_size(),), i8, memory_space=smem
+        )
+    )
+    smem_ref_trees = []
+    for smem_live_buffers_collection in smem_disjoint_live_buffers_collections:
+      smem_ref_tree = _construct_smem_reftree(
+          dynamic_smem, smem_live_buffers_collection)
+      smem_ref_trees.append(smem_ref_tree)
+    if profiler_spec:
+      prof_smem = memref.view(
+          ir.MemRefType.get(
+              (profiler_spec.smem_i32_elements(block=block),),
+              i32, memory_space=smem,
+          ),
+          dynamic_smem, c(compute_smem_bytes, index), [],
+      )
+      prof = profiler.OnDeviceProfiler(
+          profiler_spec, prof_smem, maybe_prof_buffer
+      )
+    else:
+      prof = None
+    if isinstance(smem_buffers, Union):
+      smem_ref_tree: Union[RefTree] = Union(smem_ref_trees)
+    else:
+      smem_ref_tree: RefTree = smem_ref_trees[0] if smem_ref_trees else []
+    yield LaunchContext(launch_op, gmem_scratch_ptr, prof), smem_ref_tree
+    if prof is not None:
+      prof.finalize(grid=grid, block=block)
+    gpu.terminator()
+def _lower_as_gpu_kernel(
+    body,
+    grid: tuple[int, ...],
+    block: tuple[int, ...],
+    in_shapes: tuple[Any, ...],
+    out_shape,
+    smem_scratch_shape: ShapeTree | Union[ShapeTree],
+    prof_spec: profiler.ProfilerSpec | None = None,
+):
+  ptr_ty = ir.Type.parse("!llvm.ptr")
+  token_ty = ir.Type.parse("!gpu.async.token")
+  i8 = ir.IntegerType.get_signless(8)
+  i64 = ir.IntegerType.get_signless(64)
+  def _shape_to_ref_ty(shape: jax.ShapeDtypeStruct) -> ir.MemRefType:
+    return ir.MemRefType.get(shape.shape, mlir.dtype_to_ir_type(shape.dtype))
+  in_ref_tys = [_shape_to_ref_ty(t) for t in in_shapes]
+  unwrap_output_tuple = False
+  if isinstance(out_shape, list):
+    out_shape = tuple(out_shape)
+  elif not isinstance(out_shape, tuple):
+    out_shape = (out_shape,)
+    unwrap_output_tuple = True
+  out_ref_tys = [_shape_to_ref_ty(t) for t in out_shape]
+  if prof_spec is not None:
+    out_shape = (*out_shape, prof_spec.jax_buffer_type(grid, block))
+    out_ref_tys.append(prof_spec.mlir_buffer_type(grid, block))
+  module = ir.Module.create()
+  with ir.InsertionPoint(module.body):
+    _declare_runtime_functions()
+    gmem_scratch_bytes = 0
+    @func.FuncOp.from_py_func(ptr_ty, ptr_ty)
+    def main(token_ptr, buffers):
+      nonlocal gmem_scratch_bytes
+      token = builtin.unrealized_conversion_cast([token_ty], [token_ptr])
+      arg_refs = []
+      i = -1
+      for i, ref_ty in enumerate([*in_ref_tys, *out_ref_tys]):
+        ptr = llvm.LoadOp(ptr_ty, llvm.GEPOp(ptr_ty, buffers, [], [i], ptr_ty))
+        arg_refs.append(utils.ptr_as_memref(ptr, ir.MemRefType(ref_ty)))
+      gmem_scratch_ptr = llvm.LoadOp(
+          ptr_ty, llvm.GEPOp(ptr_ty, buffers, [], [i + 1], ptr_ty)
+      )
+      in_refs = arg_refs[:len(in_ref_tys)]
+      out_refs = arg_refs[len(in_ref_tys):]
+      prof_buffer = out_refs.pop() if prof_spec is not None else None
+      with _launch(
+          token, grid, block, gmem_scratch_ptr, smem_scratch_shape,
+          prof_spec, prof_buffer
+      ) as (launch_ctx, smem_refs):
+        body(launch_ctx, *in_refs, *out_refs, smem_refs)
+        gmem_scratch_bytes = launch_ctx.next_scratch_offset
+      # Allocate and initialize the host buffer right before the launch.
+      # Note that we couldn't do that before, because we had to run the body
+      # to learn what the scratch contains.
+      with ir.InsertionPoint(launch_ctx.launch_op):
+        host_scratch_ptr = llvm.alloca(ptr_ty, c(gmem_scratch_bytes, i64), i8)
+        for init_callback in launch_ctx.host_scratch_init:
+          init_callback(host_scratch_ptr)
+        func.call(
+            [],
+            "mosaic_gpu_memcpy_async_h2d",
+            [
+                gmem_scratch_ptr,
+                host_scratch_ptr,
+                c(gmem_scratch_bytes, i64),
+                token_ptr,
+            ],
+        )
+    main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
+  module.operation.verify()
+  return module, out_shape, gmem_scratch_bytes, unwrap_output_tuple
+def as_gpu_kernel(
+    body,
+    grid: tuple[int, ...],
+    block: tuple[int, ...],
+    in_shape,
+    out_shape,
+    smem_scratch_shape: ShapeTree | Union[ShapeTree],
+    prof_spec: profiler.ProfilerSpec | None = None,
+):
+  if isinstance(in_shape, list):
+    in_shape = tuple(in_shape)
+  elif not isinstance(in_shape, tuple):
+    in_shape = (in_shape,)
+  module, out_shape, gmem_scratch_bytes, unwrap_output_tuple = (
+      _lower_as_gpu_kernel(
+          body, grid, block, in_shape, out_shape, smem_scratch_shape, prof_spec
+      )
+  )
+  expected_arg_treedef = jax.tree.structure(in_shape)
+  def _check_args(*args):
+    arg_treedef = jax.tree.structure(args)
+    if arg_treedef != expected_arg_treedef:
+      raise ValueError(
+          f"Invalid argument structure: expected {expected_arg_treedef}, got"
+          f" {arg_treedef}, ({args=})"
+      )
+  def bind(*args):
+    return mosaic_gpu_p.bind(
+        *args,
+        out_types=out_shape,
+        module=module,
+        gmem_scratch_bytes=gmem_scratch_bytes,
+    )
+  if prof_spec is not None:
+    @jax.jit
+    def prof_kernel(*args):
+      _check_args(*args)
+      *results, prof_buffer = bind(*args)
+      def dump_profile(prof_buffer):
+        out_file = os.path.join(
+            os.getenv("TEST_UNDECLARED_OUTPUTS_DIR"),
+            f"{time.time_ns()}-trace.json",
+        )
+        try:
+          with open(out_file, "x") as f:
+            prof_spec.dump(prof_buffer, f, grid=grid, block=block)
+        except FileExistsError:
+          pass  # TODO: Retry
+      jax.debug.callback(dump_profile, prof_buffer)
+      return results[0] if unwrap_output_tuple else results
+    return prof_kernel
+  else:
+    @jax.jit
+    def kernel(*args):
+      _check_args(*args)
+      results = bind(*args)
+      return results[0] if unwrap_output_tuple else results
+    return kernel
+def _declare_runtime_functions():
+  """Declares the runtime functions that can be used by the generated code."""
+  ptr_ty = ir.Type.parse("!llvm.ptr")
+  i64 = ir.IntegerType.get_signless(64)
+  arg_tys = [ptr_ty, ptr_ty, i64, i64, ptr_ty, ptr_ty, i64, ptr_ty]
+  init_tma_desc_type = ir.FunctionType.get(arg_tys, [])
+  func.FuncOp(
+      "mosaic_gpu_init_tma_desc", init_tma_desc_type, visibility="private"
+  )
+  memcpy_async_type = ir.FunctionType.get([ptr_ty, ptr_ty, i64, ptr_ty], [])
+  func.FuncOp(
+      "mosaic_gpu_memcpy_async_h2d", memcpy_async_type, visibility="private"
+  )

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/fragmented_array.py ADDED Viewed

	@@ -0,0 +1,661 @@

+# Copyright 2024 The JAX Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for code generator."""
+import dataclasses
+import jax
+from jaxlib.mlir import ir
+from jaxlib.mlir.dialects import arith
+from jaxlib.mlir.dialects import gpu
+from jaxlib.mlir.dialects import llvm
+from jaxlib.mlir.dialects import math as mlir_math
+from jaxlib.mlir.dialects import memref
+from jaxlib.mlir.dialects import nvvm
+from jaxlib.mlir.dialects import vector
+import numpy as np
+from . import dsl as mgpu
+from . import utils
+# mypy: ignore-errors
+WARPGROUP_SIZE = utils.WARPGROUP_SIZE
+c = utils.c
+@dataclasses.dataclass(frozen=True)
+class WGSplatFragLayout:
+  """A fragmented array where all the values are equal represented as a register per thread.
+  FragmentedArrays in this layout can be are always the result of a
+  splat, each thread in the warpgroup has a single copy of the value,
+  while the FragmentedArray pretends it has whatever shape the user
+  wants. This means we can trivially broadcast, reshape and do
+  elementwise operations with all other layouts.
+  Example:
+  To load a value in
+  ```
+  FragmentedArray.splat(memref.load(ref_1d, [1]), (10,20,2))
+  ```
+  A shape is always provided for sanity check reasons.
+  """
+  shape: tuple[int, ...] = ()
+  def can_broadcast_to(self, shape) -> bool:
+    """Check that the shape can be broadcast.
+    Only dimensions of size 1 can be broadcast. All other dimensions
+    must be the same as the argument shape.
+    """
+    return all(dim1 == dim2 or dim1 == 1 for dim1, dim2 in zip(self.shape[::-1], shape[::-1]))
+@dataclasses.dataclass(frozen=True)
+class WGMMAFragLayout:
+  """[m, n] matrix, where m % 64 == 0 == n % 8."""
+@dataclasses.dataclass(frozen=True)
+class WGMMARowFragLayout:
+  """[m] matrix, where m % 64 == 0."""
+@dataclasses.dataclass(frozen=True)
+class WGStridedFragLayout:
+  """Convert the array to 1D and then shard across threads."""
+  shape: tuple[int, ...]
+  vec_size: int
+  def __post_init__(self):
+    if np.prod(self.shape) % (self.vec_size * WARPGROUP_SIZE) != 0:
+      raise ValueError((self, WARPGROUP_SIZE))
+  @classmethod
+  def from_memref_type(cls, memref_ty: ir.Type):
+    if not ir.MemRefType.isinstance(memref_ty):
+      raise TypeError(memref_ty)
+    memref_type = ir.MemRefType(memref_ty)
+    bw = mgpu.bytewidth(memref_type.element_type)
+    assert 8 % bw == 0 and 8 // bw != 0, bw
+    if np.prod(memref_type.shape) % WARPGROUP_SIZE != 0:
+      raise ValueError(
+          "Ref must have a number of elements that is a multiple of"
+          f" {WARPGROUP_SIZE}"
+      )
+    max_vec_size = np.prod(memref_type.shape) // WARPGROUP_SIZE
+    return cls(
+        shape=tuple(memref_type.shape), vec_size=min(8 // bw, max_vec_size)
+    )
+  def thread_vec_idxs(self):
+    """The indexes to be used for vector load/store WGStridedFragLayout.
+    Yields:
+      The indices of the vector that correspond to the current thread.
+    """
+    index = ir.IndexType.get()
+    cardinality = np.prod(self.shape)
+    assert cardinality % (WARPGROUP_SIZE * self.vec_size) == 0
+    reg_num = cardinality // (WARPGROUP_SIZE * self.vec_size)
+    tidx = arith.remui(gpu.thread_id(gpu.Dimension.x), c(WARPGROUP_SIZE, index))
+    off = arith.muli(tidx, c(self.vec_size, tidx.type))
+    for i in range(reg_num):
+      yield [arith.addi(off, c(i * WARPGROUP_SIZE * self.vec_size, tidx.type))]
+FragmentedLayout = WGSplatFragLayout | WGStridedFragLayout | WGMMAFragLayout | WGMMARowFragLayout
+WGMMA_LAYOUT = WGMMAFragLayout()
+WGMMA_ROW_LAYOUT = WGMMARowFragLayout()
+@jax.tree_util.register_pytree_node_class
+class FragmentedArray:
+  registers: np.ndarray  # of ir.Value, see checks in init for shapes.
+  layout: FragmentedLayout
+  def __init__(self, *, _registers: np.ndarray, _layout: FragmentedLayout):
+    self.registers = _registers
+    self.layout = _layout
+    match self.layout:
+      # Registers are [m_tiles, n_tiles, 2 rows, 1 cols] in WGMMA layout
+      # Each element is a vector<2xdtype>
+      case WGMMAFragLayout():
+        if self.registers.ndim != 4 or self.registers.shape[2:] != (2, 1):
+          raise ValueError("Invalid register array shape")
+      # Registers are [m_tiles, 2 rows] in WGMMA_ROW layout
+      # Each element is a dtype scalar
+      case WGMMARowFragLayout():
+        if self.registers.ndim != 2 or self.registers.shape[-1] != 2:
+          raise ValueError("Invalid register array shape")
+      # Registers are flat
+      case WGStridedFragLayout(shape):
+        (reg_size,) = ir.VectorType(_registers.flat[0].type).shape
+        if np.prod(shape) != np.prod(_registers.shape) * WARPGROUP_SIZE * reg_size:
+          raise ValueError((reg_size, shape, _registers.shape, WARPGROUP_SIZE), _registers.flat[0].type)
+      # Just a single register
+      case WGSplatFragLayout():
+        if _registers.size != 1:
+          raise ValueError(f"WGStridedFragLayout requires a single value {_registers.shape} ({_registers.size})")
+      case _:
+        raise NotImplementedError
+  @classmethod
+  def load_strided(cls, ref: ir.Value):
+    if not ir.MemRefType.isinstance(ref.type):
+      raise TypeError(ref.type)
+    ref_ty = ir.MemRefType(ref.type)
+    ref_1d = mgpu.memref_fold(ref, 0, len(ref_ty.shape))
+    layout = WGStridedFragLayout.from_memref_type(ref_ty)
+    vec_ty = ir.VectorType.get((layout.vec_size,), ref_ty.element_type)
+    vecs = [vector.load(vec_ty, ref_1d, vec_idx) for vec_idx in layout.thread_vec_idxs()]
+    return cls(_registers=np.array(vecs), _layout=layout)
+  @classmethod
+  def splat(cls, value, shape, layout=None):
+    layout = layout or WGSplatFragLayout(shape)
+    match layout:
+      case WGMMARowFragLayout():
+        if len(shape) != 1:
+          raise ValueError
+        if shape[0] % 64:
+          raise ValueError
+        reg_shape = (shape[0] // 64, 2)
+      case WGMMAFragLayout():
+        if len(shape) != 2:
+          raise ValueError
+        if shape[0] % 64 or shape[1] % 8:
+          raise ValueError
+        reg_shape = (shape[0] // 64, shape[1] // 8, 2, 1)
+        value = vector.splat(ir.VectorType.get((2,), value.type), value)
+      case WGStridedFragLayout(vec_size=vec_size):
+        assert shape == layout.shape
+        elems = np.prod(shape)
+        reg_shape = (elems // (WARPGROUP_SIZE * vec_size),)
+        value = vector.splat(ir.VectorType.get((vec_size,), value.type), value)
+      case WGSplatFragLayout():
+        assert shape == layout.shape
+        reg_shape = ()
+      case _:
+        raise NotImplementedError(layout)
+    return cls(
+        _registers=np.full(reg_shape, value, dtype=object),
+        _layout=layout,
+    )
+  @property
+  def shape(self):
+    match self.layout:
+      case WGMMAFragLayout():
+        row_tiles, col_tiles = self.registers.shape[:2]
+        return (row_tiles * 64, col_tiles * 8)
+      case WGMMARowFragLayout():
+        row_tiles = self.registers.shape[0]
+        return (row_tiles * 64,)
+      case WGStridedFragLayout(shape):
+        return shape
+      case WGSplatFragLayout(shape=shape):
+        return shape
+  @property
+  def mlir_dtype(self):
+    reg_ty = self.registers.flat[0].type
+    match self.layout:
+      case WGMMAFragLayout() | WGStridedFragLayout():
+        return ir.VectorType(reg_ty).element_type
+      case WGMMARowFragLayout() | WGSplatFragLayout():
+        return reg_ty
+  def _pointwise(self, op, *other):
+    other_arrs = []
+    for o in other:
+      if not isinstance(o, FragmentedArray):
+        if not isinstance(o, ir.Value):
+          raise NotImplementedError(o)
+        o = FragmentedArray.splat(o, shape=self.shape, layout=self.layout)
+      if isinstance(o.layout, WGSplatFragLayout):
+        if not o.layout.can_broadcast_to(self.shape):
+          raise ValueError("Can't broadcast shape.")
+        o = FragmentedArray.splat(o.registers.flat[0], shape=self.shape, layout=self.layout)
+      else:
+        if self.layout != o.layout:
+          raise ValueError("Incompatible FragmentedArray layouts")
+        if self.registers.shape != o.registers.shape:
+          raise ValueError("Incompatible FragmentedArray shapes")
+      other_arrs.append(o)
+    new_regs = np.empty_like(self.registers)
+    for idx, reg in np.ndenumerate(self.registers):
+      new_regs[idx] = op(reg, *(o.registers[idx] for o in other_arrs))
+    return FragmentedArray(_registers=new_regs, _layout=self.layout)
+  def __add__(self, other):
+    if ir.FloatType.isinstance(self.mlir_dtype):
+      return self._pointwise(arith.addf, other)
+    elif ir.IntegerType.isinstance(self.mlir_dtype):
+      return self._pointwise(arith.addi, other)
+    else:
+      raise NotImplementedError(self.mlir_dtype)
+  def __mul__(self, other):
+    if ir.FloatType.isinstance(self.mlir_dtype):
+      return self._pointwise(arith.mulf, other)
+    elif ir.IntegerType.isinstance(self.mlir_dtype):
+      return self._pointwise(arith.muli, other)
+    else:
+      raise NotImplementedError(self.mlir_dtype)
+  def __sub__(self, other):
+    if not ir.FloatType.isinstance(self.mlir_dtype):
+      raise NotImplementedError
+    return self._pointwise(arith.subf, other)
+  def __truediv__(self, other):
+    if not ir.FloatType.isinstance(self.mlir_dtype):
+      raise NotImplementedError
+    return self._pointwise(arith.divf, other)
+  def max(self, other):
+    if not ir.FloatType.isinstance(self.mlir_dtype):
+      raise NotImplementedError
+    return self._pointwise(arith.maximumf, other)
+  def exp(self, approx: bool = False):
+    if not ir.FloatType.isinstance(self.mlir_dtype):
+      raise NotImplementedError
+    def fast_exp(x):
+      f32 = ir.F32Type.get()
+      if self.mlir_dtype != f32:
+        raise NotImplementedError
+      log2e = arith.constant(f32, ir.FloatAttr.get(f32, 1.4426950408889634))
+      if x.type == f32:
+        scaled = arith.mulf(x, log2e)
+        return llvm.inline_asm(
+            f32, [scaled], "ex2.approx.f32 $0,$1;", "=f,f", asm_dialect=0
+        )
+      elif ir.VectorType.isinstance(x.type):
+        index = ir.IndexType.get()
+        result = llvm.mlir_undef(x.type)
+        for i in range(2):
+          v = vector.extractelement(x, position=c(i, index))
+          vr = fast_exp(v)
+          result = vector.insertelement(vr, result, position=c(i, index))
+        return result
+      else:
+        raise NotImplementedError(x.type)
+    return self._pointwise(fast_exp if approx else mlir_math.exp)
+  def rsqrt(self):
+    return self._pointwise(mlir_math.rsqrt)
+  def __and__(self, other):
+    if not ir.IntegerType.isinstance(self.mlir_dtype):
+      raise ValueError(
+          "Bitwise operations only defined for integer types, not"
+          f" {self.mlir_dtype}"
+      )
+    return self._pointwise(arith.andi, other)
+  def bitcast(self, elt: ir.Type):
+    reg_type = self.registers.flat[0].type
+    if ir.VectorType.isinstance(reg_type):
+      reg_shape = ir.VectorType(reg_type).shape
+      ty = ir.VectorType.get(reg_shape, elt)
+    else:
+      ty = elt
+    return self._pointwise(lambda x: arith.bitcast(ty, x))
+  def __getitem__(self, idx):
+    if self.layout != WGMMA_LAYOUT:
+      raise NotImplementedError("Only WGMMA layouts support slicing")
+    base_idx, slice_shape, is_squeezed = utils.parse_indices(idx, self.shape)
+    if any(is_squeezed):
+      raise NotImplementedError("Only slicing implemented")
+    if (
+        base_idx[0] % 64
+        or slice_shape[0] % 64
+        or base_idx[1] % 8
+        or slice_shape[1] % 8
+    ):
+      raise NotImplementedError("Only tile aligned slicing supported")
+    base_idx[0] //= 64
+    slice_shape[0] //= 64
+    base_idx[1] //= 8
+    slice_shape[1] //= 8
+    new_regs = self.registers[
+        base_idx[0] : base_idx[0] + slice_shape[0],
+        base_idx[1] : base_idx[1] + slice_shape[1],
+    ]
+    return FragmentedArray(_registers=new_regs, _layout=self.layout)
+  # TODO(apaszke): Support JAX dtypes here as well?
+  def astype(self, new_dtype: ir.Type):
+    cur_dtype = self.mlir_dtype
+    if cur_dtype == new_dtype:
+      return self
+    from_float = ir.FloatType.isinstance(cur_dtype)
+    to_float = ir.FloatType.isinstance(new_dtype)
+    from_integer = ir.IntegerType.isinstance(cur_dtype)
+    to_integer = ir.IntegerType.isinstance(new_dtype)
+    if from_float and to_float:
+      if ir.FloatType(cur_dtype).width > ir.FloatType(new_dtype).width:
+        convert = arith.truncf
+      else:
+        convert = arith.extf
+    elif from_integer and to_integer:
+      if ir.IntegerType(cur_dtype).width > ir.IntegerType(new_dtype).width:
+        convert = arith.trunci
+      else:
+        convert = arith.extsi
+    elif from_integer and to_float:
+      convert = arith.sitofp
+    elif from_float and to_integer:
+      convert = arith.fptosi
+    new_registers = np.empty_like(self.registers)
+    match self.layout:
+      case WGMMAFragLayout():
+        new_reg_ty = ir.VectorType.get((2,), new_dtype)
+      case WGStridedFragLayout(vec_size=vec_size):
+        new_reg_ty = ir.VectorType.get((vec_size,), new_dtype)
+      case WGMMARowFragLayout() | WGSplatFragLayout():
+        new_reg_ty = new_dtype
+      case _:
+        raise NotImplementedError(f"Unsupported layout {self.layout}")
+    for idx, reg in np.ndenumerate(self.registers):
+      new_registers[idx] = convert(new_reg_ty, reg)
+    return FragmentedArray(_registers=new_registers, _layout=self.layout)
+  def reduce_sum(self, scratch) -> ir.Value:
+    index = ir.IndexType.get()
+    if not isinstance(self.layout, WGStridedFragLayout):
+      raise NotImplementedError(f"Unsupported layout {self.layout}")
+    result = c(0, self.mlir_dtype)
+    for reg in self.registers:
+      result = arith.addf(
+          result,
+          vector.reduction(self.mlir_dtype, vector.CombiningKind.ADD, reg),
+      )
+    scratch_ty = ir.MemRefType(scratch.type)
+    if scratch_ty.element_type != self.mlir_dtype or scratch_ty.shape != [4]:
+      raise ValueError(f"Expected shape={(4,)}, {self.mlir_dtype} (got {scratch_ty})")
+    if ir.FloatType.isinstance(self.mlir_dtype):
+      op = arith.addf
+    elif ir.IntegerType.isinstance(self.mlir_dtype):
+      op = arith.addi
+    else:
+      raise NotImplementedError(self.mlir_dtype)
+    warp_result = utils.warp_tree_reduce(result, op, 32)
+    warp_id = arith.divui(gpu.thread_id(gpu.Dimension.x), c(32, index))
+    memref.store(warp_result, scratch, [warp_id])
+    utils.commit_shared()
+    zero_index = c(0, index)
+    with mgpu.single_thread():
+      scratch_vec = vector.load(
+          ir.VectorType.get((4,), self.mlir_dtype),
+          scratch,
+          [zero_index],
+      )
+      scratch_sum = vector.reduction(
+          self.mlir_dtype, vector.CombiningKind.ADD, scratch_vec
+      )
+      memref.store(scratch_sum, scratch, [zero_index])
+    utils.commit_shared()
+    return memref.load(scratch, [zero_index])
+  def reduce(self, op, axis):
+    if self.layout != WGMMA_LAYOUT:
+      raise NotImplementedError(self.layout)
+    if axis != 1:
+      raise NotImplementedError
+    index = ir.IndexType.get()
+    i32 = ir.IntegerType.get_signless(32)
+    new_regs = np.empty(self.registers.shape[::2], dtype=object)
+    assert self.registers.shape[-1] == 1
+    for row_tile, row_subtile in np.ndindex(new_regs.shape):
+      # Reduce the registers owned by the current thread over n tiles
+      thread_result_vec = self.registers[row_tile, 0, row_subtile, 0]
+      for n_tile in range(1, self.registers.shape[1]):
+        thread_result_vec = op(
+            thread_result_vec, self.registers[row_tile, n_tile, row_subtile, 0]
+        )
+      thread_result = op(
+          vector.extractelement(thread_result_vec, position=c(0, index)),
+          vector.extractelement(thread_result_vec, position=c(1, index)),
+      )
+      # Do a shuffle to reduce in groups of 4 consecutive threads.
+      result = thread_result
+      for i in (1, 2):
+        other_result = nvvm.shfl_sync(
+            result.type,
+            c(0xFFFFFFFF, i32),
+            result,
+            c(i, i32),
+            c(0x1F, i32),
+            nvvm.ShflKind.bfly,
+        )
+        result = op(result, other_result)
+      new_regs[row_tile, row_subtile] = result
+    return FragmentedArray(_registers=new_regs, _layout=WGMMA_ROW_LAYOUT)
+  def broadcast(self, shape):
+    if not isinstance(self.layout, WGSplatFragLayout):
+      raise NotImplementedError(self.layout)
+    if self.shape == shape:
+      return self
+    if not self.layout.can_broadcast_to(shape):
+      raise ValueError(f"Can't broadcast {self.shape} to {shape}")
+    return FragmentedArray(_registers=self.registers, _layout=WGSplatFragLayout(shape))
+  def reshape(self, shape):
+    if self.shape == shape:
+      return self
+    if not isinstance(self.layout, WGSplatFragLayout):
+      raise NotImplementedError(self.layout)
+    if np.prod(shape) != np.prod(self.shape):
+      raise ValueError(f"Can't reshape {self.shape} to {shape}")
+    return FragmentedArray(_registers=self.registers, _layout=WGSplatFragLayout(shape))
+  def broadcast_minor(self, n):
+    if self.layout != WGMMA_ROW_LAYOUT:
+      raise NotImplementedError
+    num_row_tiles = self.registers.shape[0]
+    num_col_tiles, rem = divmod(n, 8)
+    if rem:
+      raise ValueError("Number of columns must be divisible by 8")
+    new_regs = np.empty((num_row_tiles, num_col_tiles, 2, 1), dtype=object)
+    dtype = self.mlir_dtype
+    for (row_tile, row_subtile), reg in np.ndenumerate(self.registers):
+      new_regs[row_tile, :, row_subtile, :] = vector.splat(
+          ir.VectorType.get((2,), dtype), reg
+      )
+    return FragmentedArray(_registers=new_regs, _layout=WGMMA_LAYOUT)
+  def store_untiled(self, ref: ir.Value):
+    if not ir.MemRefType.isinstance(ref.type):
+      raise ValueError(ref)
+    match self.layout:
+      case WGMMAFragLayout():
+        self._store_untiled_wgmma(ref)
+      case WGStridedFragLayout():
+        self._store_untiled_wg_strided(ref)
+      case _:
+        raise NotImplementedError(self.layout)
+  def _store_untiled_wg_strided(self, ref: ir.Value):
+    ref_ty = ir.MemRefType(ref.type)
+    ref_shape = tuple(ref_ty.shape)
+    if ref_shape != self.shape:
+      raise ValueError((ref_shape, self.shape))
+    smem_1d = mgpu.memref_fold(ref, 0, len(ref_ty.shape))
+    for idx, reg in zip(self.layout.thread_vec_idxs(), self.registers.flat):
+      vector.store(reg, smem_1d, idx)
+  def _store_untiled_wgmma(self, ref: ir.Value):
+    """Stores accumulator to a 2D memref. Not optimized at the moment."""
+    assert self.layout == WGMMA_LAYOUT
+    index = ir.IndexType.get()
+    m, n = self.shape
+    ref_ty = ir.MemRefType(ref.type)
+    if ref_ty.shape != [m, n]:
+      raise ValueError(ref.type, (m, n))
+    def c(x):
+      return arith.ConstantOp(index, ir.IntegerAttr.get(index, x))
+    tidx = arith.remui(gpu.thread_id(gpu.Dimension.x), c(WARPGROUP_SIZE))
+    lane_id = arith.remui(tidx, c(32))  # {0, 1, ..., 31}
+    warp_id = arith.divui(tidx, c(32))  # {0, 1, 2, 3}
+    row_base = arith.addi(
+        arith.divui(lane_id, c(4)), arith.muli(warp_id, c(16))
+    )
+    col_base = arith.muli(arith.remui(lane_id, c(4)), c(2))  # {0, 2, 4, 6}
+    it = np.ndenumerate(self.registers)
+    for (row_tile, col_tile, row_idx, col_zero), elem in it:
+      del col_zero
+      row = arith.addi(row_base, c(row_tile * 64 + row_idx * 8))
+      for col_idx in range(2):
+        value = vector.extractelement(elem, position=c(col_idx))
+        col = arith.addi(col_base, c(col_tile * 8 + col_idx))
+        memref.store(value, ref, [row, col])
+  def store_tiled(self, ref, swizzle: int | None):
+    if self.layout != WGMMA_LAYOUT:
+      raise NotImplementedError
+    dtype = self.mlir_dtype
+    bw = mgpu.bytewidth(dtype)
+    m, n = self.shape
+    assert m % 64 == 0  # This is implied by the layout.
+    cols_per_tile = 128 // bw
+    expected_shape = [m // 64, n // cols_per_tile, 64, cols_per_tile]
+    if ir.MemRefType(ref.type).shape != expected_shape:
+      raise ValueError(ref.type, (m, n))
+    for get, _, idxs in self.transfer_tiled(self.shape, dtype, swizzle):
+      vector.store(get(self.registers), ref, idxs)
+  @classmethod
+  def load_tiled(cls, ref, swizzle: int | None):
+    ref_ty = ir.MemRefType(ref.type)
+    dtype = ref_ty.element_type
+    bw = mgpu.bytewidth(dtype)
+    m_tiles, n_tiles, m_tile_size, n_tile_size = ref_ty.shape
+    if m_tile_size != 64 or n_tile_size != (128 // bw):
+      raise ValueError
+    m, n = m_tiles * m_tile_size, n_tiles * n_tile_size
+    assert m % 64 == 0  # This is implied by the layout.
+    registers = np.full(
+        (m_tiles, n // 8, 2, 1),
+        vector.splat(ir.VectorType.get((2,), dtype), c(0, dtype)),
+        dtype=object,
+    )
+    for _, update, idxs in cls.transfer_tiled((m, n), dtype, swizzle):
+      update(registers, vector.load(ir.VectorType.get((2,), dtype), ref, idxs))
+    return cls(_registers=registers, _layout=WGMMA_LAYOUT)
+  @staticmethod
+  def transfer_tiled(shape, dtype, swizzle: int | None):
+    bw = mgpu.bytewidth(dtype)
+    m, n = shape
+    if n % 32 != 0:
+      raise NotImplementedError
+    cols_per_tile = 128 // bw
+    if swizzle != 128:
+      raise NotImplementedError("Only 128B swizzle supported")
+    c = arith.ConstantOp.create_index
+    tidx = arith.remui(gpu.thread_id(gpu.Dimension.x), c(WARPGROUP_SIZE))
+    lane_id = arith.remui(tidx, c(32))  # {0, 1, ..., 31}
+    warp_id = arith.divui(tidx, c(32))  # {0, 1, 2, 3}
+    sub_row_base = arith.divui(lane_id, c(4))  # {0, 1, ..., 7}
+    if bw > 2:  # Stagger is only necessary for values larger than 16bit.
+      is_even_row = arith.cmpi(
+          arith.CmpIPredicate.eq, arith.remui(sub_row_base, c(2)), c(0)
+      )
+    else:
+      # We rely on canonicalization to clean up the selects.
+      i1 = ir.IntegerType.get_signless(1)
+      is_even_row = arith.constant(i1, ir.BoolAttr.get(True))
+    row_base = arith.addi(sub_row_base, arith.muli(warp_id, c(16)))
+    col_base = arith.muli(arith.remui(lane_id, c(4)), c(2))  # {0, 2, 4, 6}
+    # The swizzle pattern is constant for a given thread.
+    col_swizzle_bits = arith.muli(sub_row_base, c(16 // bw))
+    for row_group in range(m // 64):
+      for col_group in range(n // cols_per_tile):
+        for row_subidx in range(2):
+          row = arith.addi(row_base, c(row_subidx * 8))
+          for col_subidx in range(cols_per_tile // 8):
+            # We stagger the even and odd rows a little to avoid bank conflicts.
+            # It seems that the STS.64 is 2x faster (and the hardware reports no
+            # conflicts) when the conflicts are split between half-warps, as
+            # opposed to having them within the half-warp. This requires a
+            # little more work for the selects, but is ultimately worth it.
+            col_subidx_even = col_subidx
+            col_subidx_odd = col_subidx ^ 2
+            col_off = arith.select(
+                is_even_row, c(col_subidx_even * 8), c(col_subidx_odd * 8)
+            )
+            col = arith.addi(col_base, col_off)
+            col = arith.xori(col, col_swizzle_bits)
+            reg_idx_even = col_subidx_even + col_group * (cols_per_tile // 8)
+            reg_idx_odd = col_subidx_odd + col_group * (cols_per_tile // 8)
+            even_idx = row_group, reg_idx_even, row_subidx, 0
+            odd_idx = row_group, reg_idx_odd, row_subidx, 0
+            idx = c(row_group), c(col_group), row, col
+            def get_register(regs, even_idx=even_idx, odd_idx=odd_idx):
+              value_even = regs[even_idx]
+              value_odd = regs[odd_idx]
+              return arith.select(is_even_row, value_even, value_odd)
+            def update_registers(regs, new, even_idx=even_idx, odd_idx=odd_idx):
+              regs[even_idx] = arith.select(is_even_row, new, regs[even_idx])
+              regs[odd_idx] = arith.select(is_even_row, regs[odd_idx], new)
+            yield get_register, update_registers, idx
+  def tree_flatten(self):
+    return list(self.registers.flat), (self.layout, self.registers.shape)
+  @classmethod
+  def tree_unflatten(cls, aux, flat_registers):
+    layout, reg_shape = aux
+    registers = np.asarray(flat_registers, dtype=object).reshape(reg_shape)
+    return cls(_registers=registers, _layout=layout)

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/profiler.py ADDED Viewed

	@@ -0,0 +1,289 @@

+# Copyright 2024 The JAX Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import contextlib
+import ctypes
+import functools
+import json
+import math
+import jax
+from jax._src.interpreters import mlir
+from jax._src.lib import mosaic_gpu as mosaic_gpu_lib
+from jax._src.lib import xla_client
+import jax.numpy as jnp
+from jaxlib.mlir import ir
+from jaxlib.mlir.dialects import arith
+from jaxlib.mlir.dialects import gpu
+from jaxlib.mlir.dialects import memref
+from jaxlib.mlir.dialects import scf
+import numpy as np
+from .utils import *  # noqa: F403
+# ruff: noqa: F405
+# mypy: ignore-errors
+xla_client.register_custom_call_target(
+    "mosaic_gpu_record_event",
+    mosaic_gpu_lib._mosaic_gpu_ext._record_event_capsule(),
+    platform="CUDA",
+)
+record_event_p = jax.core.Primitive("record_event")
+record_event_p.multiple_results = True
+@record_event_p.def_abstract_eval
+def _record_event_abstract_eval(*args, event):
+  del event  # Unused.
+  return args
+@functools.partial(mlir.register_lowering, record_event_p, platform="cuda")
+def _record_event_lowering_rule(ctx, *args, event):
+  ptr_bytes = ctypes.cast(event, ctypes.c_void_p).value.to_bytes(
+      8, byteorder="little"
+  )  # pytype: disable=attribute-error
+  op = mlir.custom_call(
+      "mosaic_gpu_record_event",
+      result_types=[mlir.aval_to_ir_type(aval) for aval in ctx.avals_out],
+      operands=args,
+      backend_config=ptr_bytes,
+      operand_output_aliases={i: i for i in range(len(args))},
+  )
+  return op.results
+def _record_event(args, event):
+  flat_args, treedef = jax.tree.flatten(args)
+  return jax.tree.unflatten(
+      treedef, record_event_p.bind(*flat_args, event=event)
+  )
+def measure(f, *args):
+  # TODO(apaszke): Raise if this is called under jit.
+  start_event = mosaic_gpu_lib._mosaic_gpu_ext._gpu_event_create()
+  end_event = mosaic_gpu_lib._mosaic_gpu_ext._gpu_event_create()
+  try:
+    @jax.jit
+    def run(*args):
+      return _record_event(f(*_record_event(args, start_event)), end_event)
+    jax.block_until_ready(run(*args))  # Warmup.
+    results = jax.block_until_ready(run(*args))
+    elapsed = mosaic_gpu_lib._mosaic_gpu_ext._gpu_event_elapsed(
+        start_event, end_event
+    )
+  finally:
+    mosaic_gpu_lib._mosaic_gpu_ext._gpu_event_destroy(start_event)
+    mosaic_gpu_lib._mosaic_gpu_ext._gpu_event_destroy(end_event)
+  return results, elapsed
+class ProfilerSpec:
+  ENTER = 0
+  EXIT = 1 << 31
+  def __init__(self, entries_per_warpgroup: int):
+    self.entries_per_warpgroup = entries_per_warpgroup
+    self.interned_names = {}
+  def _num_warpgroups(
+      self, grid: tuple[int, ...], block: tuple[int, ...]
+  ) -> int:
+    if math.prod(block) % WARPGROUP_SIZE:
+      raise ValueError("Block size is not a multiple of warpgroup size")
+    return math.prod(grid) * math.prod(block) // WARPGROUP_SIZE
+  def mlir_buffer_type(
+      self, grid: tuple[int, ...], block: tuple[int, ...]
+  ) -> ir.Type:
+    return ir.MemRefType.get(
+        (self._num_warpgroups(grid, block) * self.entries_per_warpgroup,),
+        ir.IntegerType.get_signless(32),
+    )
+  def jax_buffer_type(
+      self, grid: tuple[int, ...], block: tuple[int, ...]
+  ) -> ir.Type:
+    return jax.ShapeDtypeStruct(
+        (self._num_warpgroups(grid, block) * self.entries_per_warpgroup,),
+        jnp.uint32,
+    )
+  def smem_i32_elements(self, block: tuple[int, ...]):
+    num_warpgroups = self._num_warpgroups((), block)
+    return int(num_warpgroups * self.entries_per_warpgroup)
+  def smem_bytes(self, block: tuple[int, ...]):
+    bytes_per_entry = 4
+    return self.smem_i32_elements(block) * bytes_per_entry
+  def intern_name(self, name: str) -> int:
+    if name_id := self.interned_names.get(name, None):
+      return name_id
+    name_id = self.interned_names[name] = len(self.interned_names)
+    if name_id & self.EXIT:
+      raise RuntimeError("Allocated too many names")
+    return name_id
+  def dump(self, buffer, f, grid: tuple[int, ...], block: tuple[int, ...]):
+    buffer = np.asarray(buffer)
+    num_blocks = math.prod(grid)
+    warpgroups_per_block = self._num_warpgroups((), block)
+    entries = buffer.reshape(
+        num_blocks, warpgroups_per_block, self.entries_per_warpgroup
+    )
+    start_times = entries[..., :2].astype(np.int64)
+    start_times = (start_times[..., 0] << 32) + start_times[..., 1]
+    start_times -= start_times.min()  # Normalize
+    entries_used = entries[..., 2]
+    if np.any(entries_used > self.entries_per_warpgroup - 2):
+      raise RuntimeError("Insufficient space to capture a full trace")
+    traces = entries[..., 3:]
+    unintern = {v: k for k, v in self.interned_names.items()}
+    events = []
+    for block_idx, wg_idx in np.ndindex(num_blocks, warpgroups_per_block):
+      valid_entries = entries_used[block_idx, wg_idx] - 3
+      local_clock_offset = None
+      assert valid_entries % 2 == 0, valid_entries
+      start_time = start_times[block_idx, wg_idx]
+      block_events = []
+      for i in range(0, valid_entries, 2):
+        tag = traces[block_idx, wg_idx, i]
+        time = traces[block_idx, wg_idx, i + 1]
+        if local_clock_offset is None:
+          local_clock_offset = time
+        time -= local_clock_offset
+        time -= i * 6  # Account for the overhead of profiling.
+        if time < 0:
+          break  # Detect a timer wraparound
+        name_id = tag
+        begin = True
+        if name_id & ProfilerSpec.EXIT:
+          name_id = name_id ^ ProfilerSpec.EXIT
+          begin = False
+        name = unintern[name_id]
+        block_events.append({
+            "name": name,
+            "ph": "B" if begin else "E",
+            "ts": float(start_time + time) / 1e3,
+            "pid": 1 + block_idx,
+            "tid": 1 + wg_idx,
+        })
+      else:  # If we didn't break
+        events.extend(block_events)
+    return json.dump({"displayTimeUnit": "ns", "traceEvents": events}, f)
+class OnDeviceProfiler:
+  def __init__(self, spec: ProfilerSpec, smem_buffer: ir.Value, gmem_buffer: ir.Value):
+    self.spec = spec
+    # self.should_store = gpu.thread_id(gpu.Dimension.x)
+    i32 = ir.IntegerType.get_signless(32)
+    index = ir.IndexType.get()
+    self.entries_per_wg = spec.entries_per_warpgroup
+    wg_idx = warpgroup_idx(sync=False)
+    self.smem_buffer = memref_slice(
+        smem_buffer,
+        ds(
+            arith.index_cast(
+                index, arith.muli(wg_idx, c(self.entries_per_wg, i32))
+            ),
+            self.entries_per_wg,
+        ),
+    )
+    self.gmem_buffer = gmem_buffer
+    # Hopefully mem2reg will remove the allocation.
+    self.offset = memref.alloca(ir.MemRefType.get((), i32), [], [])
+    memref.store(c(0, i32), self.offset, [])
+  @contextlib.contextmanager
+  def record(self, name: str):
+    i32 = ir.IntegerType.get_signless(32)
+    index = ir.IndexType.get()
+    name_id = self.spec.intern_name(name)
+    def store(modifier):
+      cur = arith.index_cast(index, memref.load(self.offset, []))
+      # TODO(apaszke): Clamp indices
+      # bound = arith.subi(self.entries_per_block, c(2, index))
+      # cur = arith.select(
+      #     arith.cmpi(arith.CmpIPredicate.ult, cur, bound), cur, bound
+      # )
+      memref.store(c(modifier | name_id, i32), self.smem_buffer, [cur])
+      memref.store(
+          clock(), self.smem_buffer, [arith.addi(cur, c(1, cur.type))]
+      )
+      memref.store(
+          arith.index_cast(i32, arith.addi(cur, c(2, cur.type))),
+          self.offset,
+          [],
+      )
+    store(ProfilerSpec.ENTER)
+    yield
+    store(ProfilerSpec.EXIT)
+  def finalize(self, grid: tuple[int, ...], block: tuple[int, ...]):
+    index = ir.IndexType.get()
+    i32 = ir.IntegerType.get_signless(32)
+    gpu.barrier()   # Make sure all warpgroups are done.
+    block_idx = c(0, index)
+    for dim in gpu.Dimension:  # pytype: disable=wrong-arg-types
+      block_idx = arith.addi(
+          arith.muli(block_idx, gpu.grid_dim(dim)), gpu.block_id(dim)
+      )
+    wg_idx = warpgroup_idx(sync=False)
+    wg_per_block = math.prod(block) // WARPGROUP_SIZE
+    global_wg_idx = arith.addi(
+        arith.muli(block_idx, c(wg_per_block, index)),
+        arith.index_cast(index, wg_idx),
+    )
+    start_offset = arith.muli(global_wg_idx, c(self.entries_per_wg, index))
+    wg_gmem_buffer = memref.subview(
+        self.gmem_buffer, [start_offset], [self.entries_per_wg], [1],
+        result_type=ir.Type.parse(
+            f"memref<{self.entries_per_wg}xi32, strided<[1], offset: ?>>"
+        ),
+    )
+    thread_in_wg = arith.remui(thread_idx(), c(128, i32))
+    if_first = scf.IfOp(
+        arith.cmpi(arith.CmpIPredicate.eq, thread_in_wg, c(0, i32))
+    )
+    with ir.InsertionPoint(if_first.then_block):
+      # TODO(apaszke): Either use globaltimer or delete
+      # memref.store(globaltimer("high"), block_gmem_buffer, [c(0, index)])
+      # memref.store(globaltimer("low"), block_gmem_buffer, [c(1, index)])
+      memref.store(c(0, i32), wg_gmem_buffer, [c(0, index)])
+      memref.store(c(0, i32), wg_gmem_buffer, [c(1, index)])
+      memref.store(
+          arith.addi(memref.load(self.offset, []), c(3, i32)),
+          wg_gmem_buffer,
+          [c(2, index)],
+      )
+      for_op = scf.ForOp(
+          c(0, index),
+          c(self.entries_per_wg - 3, index),
+          c(1, index),
+      )
+      with ir.InsertionPoint(for_op.body):
+        x = memref.load(self.smem_buffer, [for_op.induction_variable])
+        memref.store(
+            x,
+            wg_gmem_buffer,
+            [arith.addi(for_op.induction_variable, c(3, index))],
+        )
+        scf.yield_([])
+      scf.yield_([])

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/utils.py ADDED Viewed

	@@ -0,0 +1,699 @@

+# Copyright 2024 The JAX Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for code generator."""
+from collections.abc import Iterator
+import contextlib
+import dataclasses
+from typing import Any, Literal, Sequence
+import jax
+from jaxlib.mlir import ir
+from jaxlib.mlir.dialects import arith
+from jaxlib.mlir.dialects import builtin
+from jaxlib.mlir.dialects import gpu
+from jaxlib.mlir.dialects import llvm
+from jaxlib.mlir.dialects import memref
+from jaxlib.mlir.dialects import nvgpu
+from jaxlib.mlir.dialects import nvvm
+from jaxlib.mlir.dialects import scf
+from jaxlib.mlir.dialects import vector
+import numpy as np
+# mypy: ignore-errors
+WARPGROUP_SIZE: int = 128
+DYNAMIC = -9223372036854775808
+# pylint: disable=line-too-long, wildcard-import, missing-function-docstring, bad-continuation, g-bad-todo, protected-access, g-explicit-length-test, missing-class-docstring, g-doc-return-or-yield, g-inconsistent-quotes
+def ptr_as_memref(ptr, memref_ty: ir.MemRefType):
+  if len(memref_ty.shape) == 0:
+    raise NotImplementedError
+  i64 = ir.IntegerType.get_signless(64)
+  rank = len(memref_ty.shape)
+  desc_ty = ir.Type.parse(
+      f"!llvm.struct<(ptr, ptr, i64, array<{rank} x i64>, array<{rank} x i64>)>"
+  )
+  desc = llvm.UndefOp(desc_ty)
+  desc = llvm.InsertValueOp(desc, ptr, [0])  # Allocation
+  desc = llvm.InsertValueOp(desc, ptr, [1])  # Aligned Base
+  desc = llvm.InsertValueOp(
+      desc, llvm.ConstantOp(i64, ir.IntegerAttr.get(i64, 0)), [2]
+  )
+  for i, s in enumerate(memref_ty.shape):
+    desc = llvm.InsertValueOp(
+        desc, llvm.ConstantOp(i64, ir.IntegerAttr.get(i64, s)), [3, i]
+    )
+  for i, s in enumerate(get_contiguous_strides(memref_ty.shape)):
+    desc = llvm.InsertValueOp(
+        desc, llvm.ConstantOp(i64, ir.IntegerAttr.get(i64, s)), [4, i]
+    )
+  return builtin.unrealized_conversion_cast([memref_ty], [desc])
+def pack_array(values):
+  if not values:
+    raise ValueError("Empty array")
+  elem_ty = values[0].type
+  i64 = ir.IntegerType.get_signless(64)
+  ptr_ty = ir.Type.parse("!llvm.ptr")
+  arr_ptr = llvm.alloca(ptr_ty, c(len(values), i64), elem_ty)
+  for i, v in enumerate(values):
+    elem_ptr = llvm.getelementptr(ptr_ty, arr_ptr, [], [i], elem_ty)
+    llvm.store(v, elem_ptr)
+  return arr_ptr
+def get_contiguous_strides(xs):
+  strides_ret = []
+  stride = 1
+  for x in xs[::-1]:
+    strides_ret.append(stride)
+    stride *= x
+  return strides_ret[::-1]
+def c(val: int | float, ty):
+  if ir.IntegerType.isinstance(ty) or ir.IndexType.isinstance(ty):
+    if not isinstance(val, (int, np.integer)):
+      raise TypeError(type(val))
+    attr = ir.IntegerAttr.get(ty, val)
+  elif ir.FloatType.isinstance(ty):
+    attr = ir.FloatAttr.get(ty, val)
+  elif ir.VectorType.isinstance(ty):
+    return vector.splat(ty, c(val, ir.VectorType(ty).element_type))
+  else:
+    raise NotImplementedError(ty)
+  return arith.constant(ty, attr)
+def get_tensormap_descriptor(**attrs):
+  return ir.Type.parse(
+      f"!nvgpu.tensormap.descriptor<{', '.join(k + '=' + v for k, v in attrs.items())}>"
+  )
+def debug_print(fmt, *args, uniform=True):
+  type_formats = []
+  new_args = []
+  for arg in args:
+    ty_format = None
+    if ir.IndexType.isinstance(arg.type):
+      ty_format = "%llu"
+    if ir.IntegerType.isinstance(arg.type):
+      width = ir.IntegerType(arg.type).width
+      if width == 64:
+        ty_format = "%llu"
+      elif width == 1:
+        ty_format = "%llu"
+        arg = arith.extui(ir.IntegerType.get_signless(64), arg)
+    if ir.F32Type.isinstance(arg.type):
+      ty_format = "%f"
+    if ir.F16Type.isinstance(arg.type):
+      ty_format = "%f"
+      arg = arith.extf(ir.F32Type.get(), arg)
+    if ty_format is None:
+      raise NotImplementedError(arg.type)
+    type_formats.append(ty_format)
+    new_args.append(arg)
+  ctx = single_thread if uniform else contextlib.nullcontext
+  with ctx():
+    gpu.printf(fmt.format(*type_formats) + "\n", new_args)
+@dataclasses.dataclass(frozen=True)
+class ForResult:
+  op: scf.ForOp
+  results: tuple[Any, ...]
+  @property
+  def result(self):
+    if len(self.results) != 1:
+      raise ValueError
+    return self.results[0]
+def fori(bound, carrys):
+  unwrap = False
+  if not isinstance(carrys, (list, tuple)):
+    carrys = [carrys]
+    unwrap = True
+  flat_carrys, carry_treedef = jax.tree.flatten(carrys)
+  def wrapper(f):
+    index = ir.IndexType.get()
+    c0 = arith.ConstantOp(index, ir.IntegerAttr.get(index, 0))
+    c1 = arith.ConstantOp(index, ir.IntegerAttr.get(index, 1))
+    for_op = scf.ForOp(c0, bound, c1, flat_carrys)
+    with ir.InsertionPoint(for_op.body):
+      i = for_op.induction_variable
+      inner_carrys = jax.tree.unflatten(carry_treedef, for_op.inner_iter_args)
+      if unwrap:
+        [inner_carrys] = inner_carrys
+      new_carrys = f(i, inner_carrys)
+      if unwrap:
+        new_carrys = [new_carrys]
+      new_flat_carrys, new_carry_treedef = jax.tree.flatten(new_carrys)
+      if new_carry_treedef != carry_treedef:
+        raise ValueError(new_carry_treedef, carry_treedef)
+      scf.YieldOp(new_flat_carrys)
+    final_flat_carrys = for_op.results
+    return ForResult(
+        for_op, jax.tree.unflatten(carry_treedef, final_flat_carrys)
+    )
+  return wrapper
+def thread_idx():
+  i32 = ir.IntegerType.get_signless(32)
+  as_i32 = lambda x: arith.index_cast(i32, x)
+  tidx = as_i32(gpu.thread_id(gpu.Dimension.x))
+  stride = as_i32(gpu.block_dim(gpu.Dimension.x))
+  for dim in (gpu.Dimension.y, gpu.Dimension.z):
+    tidx = arith.addi(tidx, arith.muli(as_i32(gpu.thread_id(dim)), stride))
+    stride = arith.muli(stride, as_i32(gpu.block_dim(dim)))
+  return tidx
+def _warp_bcast(val, lane_idx=0):
+  i32 = ir.IntegerType.get_signless(32)
+  mask = c(0xFFFFFFFF, i32)
+  return nvvm.shfl_sync(
+      val.type, mask, val, c(lane_idx, i32), c(0x1F, i32), nvvm.ShflKind.idx
+  )
+def warp_idx(sync=True):
+  i32 = ir.IntegerType.get_signless(32)
+  warp_idx = arith.shrui(thread_idx(), c(5, i32))
+  # Performing a warp broadcast improves performance as compiler understands
+  # that the value is uniform across the warp.
+  return _warp_bcast(warp_idx) if sync else warp_idx
+def warpgroup_idx(sync=True):
+  i32 = ir.IntegerType.get_signless(32)
+  wg_idx = arith.shrui(thread_idx(), c(7, i32))
+  # Performing a warp broadcast improves performance as compiler understands
+  # that the value is uniform across the warp.
+  return _warp_bcast(wg_idx) if sync else wg_idx
+# True withon `once()` contexts.
+_ONCE_REGION_ACTIVE = False
+@contextlib.contextmanager
+def single_thread():
+  """Runs the context only from a single thread."""
+  global _ONCE_REGION_ACTIVE
+  if _ONCE_REGION_ACTIVE:
+    yield
+    return
+  warp = warp_idx()
+  first_warp = arith.cmpi(arith.CmpIPredicate.eq, warp, c(0, warp.type))
+  elected = nvvm.elect_sync(ir.IntegerType.get_signless(1))
+  should_run = arith.andi(first_warp, elected)
+  if_op = scf.IfOp(should_run)
+  _ONCE_REGION_ACTIVE = True
+  try:
+    with ir.InsertionPoint(if_op.then_block):
+      yield
+      scf.YieldOp([])
+  finally:
+    _ONCE_REGION_ACTIVE = False
+def clock():
+  i32 = ir.IntegerType.get_signless(32)
+  return llvm.inline_asm(
+      i32, [], "mov.u32  $0,%clock;", "=r", asm_dialect=0, has_side_effects=True
+  )
+def globaltimer(kind: Literal["low", "high"] | None = None):
+  if kind is None:
+    i64 = ir.IntegerType.get_signless(64)
+    return llvm.inline_asm(
+        i64, [], "mov.u32  $0,%globaltimer;",
+        "=l", asm_dialect=0, has_side_effects=True,
+    )
+  i32 = ir.IntegerType.get_signless(32)
+  return llvm.inline_asm(
+      i32, [], f"mov.u32  $0,%globaltimer_{kind[:2]};",
+      "=r", asm_dialect=0, has_side_effects=True,
+  )
+def bytewidth(ty: ir.Type):
+  if ir.IntegerType.isinstance(ty):
+    return ir.IntegerType(ty).width // 8
+  if ir.FloatType.isinstance(ty):
+    return ir.FloatType(ty).width // 8
+  raise NotImplementedError(ty)
+@dataclasses.dataclass(frozen=True)
+class DynamicSlice:
+  base: ir.Value | int
+  length: int
+ds = DynamicSlice
+def memref_slice(ref: ir.Value, index) -> ir.Value:
+  ref_ty = ir.MemRefType(ref.type)
+  base_indices, slice_shape, is_squeezed = parse_indices(index, ref_ty.shape)
+  memref_strides, offset = ref_ty.get_strides_and_offset()
+  new_offset = offset
+  for idx, stride in zip(base_indices, memref_strides):
+    if isinstance(idx, int):
+      new_offset += idx * stride
+    else:
+      new_offset = ir.ShapedType.get_dynamic_stride_or_offset()
+      break
+  new_strides = [
+      s for s, squeeze in zip(memref_strides, is_squeezed) if not squeeze
+  ]
+  new_shape = [s for s, squeeze in zip(slice_shape, is_squeezed) if not squeeze]
+  new_layout = ir.StridedLayoutAttr.get(new_offset, new_strides)
+  ref_slice = memref.subview(
+      ref, base_indices, slice_shape, [1] * len(ref_ty.shape),
+      result_type=ir.MemRefType.get(
+          new_shape, ref_ty.element_type, new_layout, ref_ty.memory_space
+      ),
+  )
+  return ref_slice
+def _is_contiguous_shape_slice(
+    ref_ty: ir.MemRefType, dim_slice: slice | None = slice(None)
+):
+  # If it's not a strided layout then we are definitely contiguous.
+  if not ir.StridedLayoutAttr.isinstance(ref_ty.layout):
+    return True
+  strides = ir.StridedLayoutAttr(ref_ty.layout).strides[dim_slice]
+  shape = ref_ty.shape[dim_slice]
+  # Check that each dimension fits exactly it the immediately larger stride.
+  ss = sorted(zip(strides, shape), key=lambda x: x[0], reverse=True)
+  for (prev_stride, _), (stride, shape) in zip(ss, ss[1:]):
+    if stride * shape != prev_stride:
+      return False
+  return True
+def memref_fold(ref: ir.Value, dim, fold_rank) -> ir.Value:
+  ref_ty = ir.MemRefType(ref.type)
+  new_shape = list(ref_ty.shape)
+  new_shape[dim : dim + fold_rank] = [np.prod(new_shape[dim : dim + fold_rank])]
+  identity = ir.AffineMapAttr.get(ir.AffineMap.get_identity(ref_ty.rank))
+  contig_strided_1d = ir.Attribute.parse("strided<[1]>")
+  # Not sure why but MLIR expects the strided 1D layout to disappear in this op.
+  if ref_ty.layout == identity or ref_ty.layout == contig_strided_1d:
+    new_layout = ir.AffineMapAttr.get(
+        ir.AffineMap.get_identity(ref_ty.rank - fold_rank + 1)
+    )
+  elif _is_contiguous_shape_slice(ref_ty, slice(dim, dim + fold_rank)):
+    new_strides, offset = ref_ty.get_strides_and_offset()
+    new_strides[dim : dim + fold_rank] = [new_strides[dim + fold_rank - 1]]
+    new_layout = ir.StridedLayoutAttr.get(offset, new_strides)
+  else:
+    raise NotImplementedError(
+        f"strides={ref_ty.get_strides_and_offset()[0]}, {ref_ty.shape=},"
+        f" {dim=}, {fold_rank=}"
+    )
+  new_ty = ir.MemRefType.get(
+      new_shape, ref_ty.element_type, new_layout, ref_ty.memory_space
+  )
+  assoc = [[d] for d in range(dim)]
+  assoc.append([dim + i for i in range(fold_rank)])
+  assoc.extend([d] for d in range(dim + fold_rank, ref_ty.rank))
+  assert len(assoc) == new_ty.rank
+  return memref.collapse_shape(new_ty, ref, assoc)
+def memref_unfold(ref: ir.Value, dim, factors) -> ir.Value:
+  """Unfolds dim into two dimensions, the size of leading one given be major_factor."""
+  ref_ty = ir.MemRefType(ref.type)
+  new_shape = list(ref_ty.shape)
+  if sum(f is None for f in factors) > 1:
+    raise ValueError("Can only infer one dimension")
+  known_factor_prod = np.prod([f for f in factors if f is not None])
+  if new_shape[dim] % known_factor_prod:
+    raise ValueError("Non-divisible unfold:", new_shape[dim], factors)
+  factors = tuple(
+      new_shape[dim] // known_factor_prod if f is None else f for f in factors
+  )
+  new_shape[dim : dim + 1] = factors
+  identity = ir.AffineMapAttr.get(ir.AffineMap.get_identity(ref_ty.rank))
+  if ref_ty.layout == identity:
+    new_layout = ir.AffineMapAttr.get(
+        ir.AffineMap.get_identity(ref_ty.rank + len(factors) - 1)
+    )
+  else:
+    new_strides, offset = ref_ty.get_strides_and_offset()
+    prev_stride = new_strides[dim]
+    inserted_strides = []
+    for f in reversed(factors):
+      inserted_strides.append(prev_stride)
+      prev_stride *= f
+    new_strides[dim : dim + 1] = reversed(inserted_strides)
+    new_layout = ir.StridedLayoutAttr.get(offset, new_strides)
+  new_ty = ir.MemRefType.get(
+      new_shape, ref_ty.element_type, new_layout, ref_ty.memory_space
+  )
+  if dim == ref_ty.rank:
+    assoc = [[d] for d in range(ref_ty.rank)]
+    assoc[-1].extend(range(ref_ty.rank, ref_ty.rank + len(factors) - 1))
+  else:
+    assoc = [[d] for d in range(dim)]
+    assoc.append(list(range(dim, dim + len(factors))))
+    assoc.extend([d + len(factors) - 1] for d in range(dim + 1, ref_ty.rank))
+  assert len(assoc) == ref_ty.rank
+  return memref.expand_shape(new_ty, ref, assoc, [], new_ty.shape)
+def memref_unsqueeze(ref: ir.Value, dim) -> ir.Value:
+  """Inserts a singleton dimension."""
+  ref_ty = ir.MemRefType(ref.type)
+  if dim == ref_ty.rank:
+    new_shape = list(ref_ty.shape)
+    new_shape.append(1)
+    identity = ir.AffineMapAttr.get(ir.AffineMap.get_identity(ref_ty.rank))
+    if ref_ty.layout == identity:
+      new_layout = ir.AffineMapAttr.get(
+          ir.AffineMap.get_identity(ref_ty.rank + 1)
+      )
+    else:
+      new_strides, offset = ref_ty.get_strides_and_offset()
+      new_strides.append(1)
+      new_layout = ir.StridedLayoutAttr.get(offset, new_strides)
+    new_ty = ir.MemRefType.get(
+        new_shape, ref_ty.element_type, new_layout, ref_ty.memory_space
+    )
+    assoc = [[d] for d in range(ref_ty.rank)]
+    assoc[-1].append(ref_ty.rank)
+    return memref.expand_shape(new_ty, ref, assoc, [], new_ty.shape)
+  else:
+    return memref_unfold(ref, dim, (1, None))
+def memref_transpose(ref: ir.Value, permutation: Sequence[int]) -> ir.Value:
+  ref_ty = ir.MemRefType(ref.type)
+  strides, offset = ref_ty.get_strides_and_offset()
+  new_strides = [strides[p] for p in permutation]
+  new_shape = [ref_ty.shape[p] for p in permutation]
+  new_layout = ir.StridedLayoutAttr.get(offset, new_strides)
+  new_ty = ir.MemRefType.get(
+      new_shape, ref_ty.element_type, new_layout, ref_ty.memory_space
+  )
+  return memref.transpose(
+      new_ty, ref, ir.AffineMap.get_permutation(permutation)
+  )
+def parse_indices(
+    index, shape: tuple[int, ...]
+) -> tuple[list[ir.Value | int], list[int], list[bool]]:
+  if not isinstance(index, tuple):
+    index = (index,)
+  if trailing_dims := len(shape) - len(index):
+    index += (slice(None),) * trailing_dims
+  base_indices = []
+  slice_shape = []
+  is_squeezed = []
+  for idx, bound in zip(index, shape):
+    if isinstance(idx, (ir.Operation, ir.OpView)):
+      idx = idx.result
+    if isinstance(idx, int):
+      base_indices.append(idx)
+      slice_shape.append(1)
+      is_squeezed.append(True)
+    elif isinstance(idx, slice):
+      if idx.step is not None:
+        raise NotImplementedError("Strided slices not implemented")
+      base_indices.append(idx.start or 0)
+      slice_shape.append((idx.stop or bound) - (idx.start or 0))
+      is_squeezed.append(False)
+    elif isinstance(idx, DynamicSlice):
+      base_indices.append(idx.base)
+      slice_shape.append(idx.length)
+      is_squeezed.append(False)
+    elif isinstance(idx, ir.Value):
+      if not ir.IndexType.isinstance(idx.type):
+        raise ValueError("Expected an index-typed index")
+      base_indices.append(idx)
+      slice_shape.append(1)
+      is_squeezed.append(True)
+    else:
+      raise NotImplementedError(type(idx))
+  assert len(base_indices) == len(slice_shape) == len(is_squeezed) == len(shape)
+  return base_indices, slice_shape, is_squeezed
+def commit_shared():
+  gpu.barrier()
+  nvvm.fence_proxy(
+      nvvm.ProxyKind.async_shared, space=nvvm.SharedSpace.shared_cta
+  )
+class BarrierArray:
+  def __init__(self, num_barriers: int, arrival_count: int = 1):
+    barrier_group_ty = ir.Type.parse(
+        "!nvgpu.mbarrier.group<memorySpace=#gpu.address_space<workgroup>,"
+        f" num_barriers={num_barriers}>"
+    )
+    self.num_barriers = num_barriers
+    self.value = nvgpu.mbarrier_create(barrier_group_ty)
+    self.num_barriers = num_barriers
+    index = ir.IndexType.get()
+    if num_barriers > 32:
+      raise NotImplementedError("Only up to 32 barriers per group supported")
+    i32 = ir.IntegerType.get_signless(32)
+    self.phases = memref.alloca(ir.MemRefType.get((), i32), [], [])
+    memref.store(c(0, i32), self.phases, [])
+    with single_thread():
+      for i in range(num_barriers):
+        nvgpu.mbarrier_init(self.value, c(arrival_count, index), c(i, index))
+    gpu.barrier()
+  def __iter__(self) -> Iterator["Barrier"]:
+    for offset in range(self.num_barriers):
+      yield self[offset]
+  def __getitem__(self, offset: ir.Value | int):
+    if isinstance(offset, int):
+      offset = c(offset, ir.IndexType.get())
+    return Barrier(self, offset)
+@dataclasses.dataclass(frozen=True)
+class Barrier:
+  barrier_array: BarrierArray
+  offset: ir.Value
+  def wait_parity(self, parity):
+    index = ir.IndexType.get()
+    nvgpu.mbarrier_try_wait_parity(
+        self.barrier_array.value, parity, c(10000000, index), self.offset,
+    )
+  def wait(self):
+    i32 = ir.IntegerType.get_signless(32)
+    parities = memref.load(self.barrier_array.phases, [])
+    offset_i32 = arith.index_castui(i32, self.offset)
+    bitmask = arith.shli(c(1, i32), offset_i32)
+    parity = arith.cmpi(
+        arith.CmpIPredicate.ne, arith.andi(parities, bitmask), c(0, i32)
+    )
+    new_parities = arith.xori(parities, bitmask)
+    memref.store(new_parities, self.barrier_array.phases, [])
+    self.wait_parity(parity)
+  def arrive(self):
+    token_ty = ir.Type.parse("!nvgpu.mbarrier.token")
+    nvgpu.mbarrier_arrive(token_ty, self.barrier_array.value, self.offset)
+class Partition:
+  source_bounds: tuple[int, ...]
+  target_bounds: tuple[int, ...]
+  partition: tuple[int | None, ...]
+  base_offset: tuple[ir.Value, ...] | None
+  def __init__(
+      self,
+      elements: tuple[int, ...],
+      *,
+      partition: tuple[int | None, ...],
+      base_offset: tuple[ir.Value, ...] | None = None,
+      num_chunks: tuple[int, ...] | None = None,
+      chunk_size: tuple[int, ...] | None = None,
+  ):
+    self.target_bounds = elements
+    self.partition = partition
+    self.base_offset = base_offset
+    if len(self.target_bounds) != len(self.partition):
+      raise ValueError
+    if num_chunks is None == chunk_size is None:
+      raise ValueError(
+          "Exactly one of num_chunks and chunk_size must be specified"
+      )
+    if num_chunks is not None:
+      self.source_bounds = num_chunks
+    else:
+      if len(chunk_size) != len(self.target_bounds):
+        raise ValueError
+      source_bounds = []
+      for els, chunk in zip(elements, chunk_size):
+        if els % chunk:
+          raise ValueError("Non-divisible partition", elements, chunk_size)
+        source_bounds.append(els // chunk)
+      self.source_bounds = tuple(source_bounds)
+    seen_dims = set()
+    for p in self.partition:
+      if p is None:
+        continue
+      if not (0 <= p < len(self.source_bounds)):
+        raise ValueError
+      if p in seen_dims:
+        raise ValueError
+      seen_dims.add(p)
+    for tb, p in zip(self.target_bounds, self.partition):
+      if p is not None and tb % self.source_bounds[p]:
+        raise ValueError("Non-divisible partitioning")
+  @property
+  def num_chunks(self) -> tuple[int, ...]:
+    return self.source_bounds
+  @property
+  def target_block_shape(self):
+    return tuple(tb if p is None else tb // self.source_bounds[p]
+                 for tb, p in zip(self.target_bounds, self.partition))
+  def get_base(self, *source_coords: ir.Value | int) -> list[ir.Value]:
+    coords = []
+    index = ir.IndexType.get()
+    for i, (tbs, p) in enumerate(zip(self.target_block_shape, self.partition)):
+      if p is None:
+        dim_base = c(0, index)
+      else:
+        dim_base = arith.muli(c(tbs, index), source_coords[p])
+      if self.base_offset is not None:
+        dim_base = arith.addi(self.base_offset[i], dim_base)
+      coords.append(dim_base)
+    return coords
+class Partition1D:
+  partition: Partition
+  def __init__(
+      self,
+      elements: int,
+      *,
+      base_offset: ir.Value | None = None,
+      num_chunks: int | None = None,
+      chunk_size: int | None = None,
+  ):
+    self.base_offset = base_offset
+    if num_chunks is None == chunk_size is None:
+      raise ValueError(
+          "Exactly one of num_chunks and chunk_size must be specified"
+      )
+    common_kwargs = dict(elements=(elements,), partition=(0,))
+    if base_offset is not None:
+      common_kwargs["base_offset"] = (base_offset,)
+    if num_chunks is not None:
+      self.partition = Partition(num_chunks=(num_chunks,), **common_kwargs)
+    else:
+      self.partition = Partition(chunk_size=(chunk_size,), **common_kwargs)
+  @property
+  def num_chunks(self) -> int:
+    return self.partition.source_bounds[0]
+  def get_base(self, source_coords: ir.Value) -> ir.Value:
+    return self.partition.get_base(source_coords)[0]
+  def refine(
+      self,
+      *,
+      chunk: ir.Value | None = None,
+      num_chunks: int | None = None,
+      chunk_size: int | None = None,
+  ):
+    return Partition1D(
+        self.partition.target_block_shape[0],
+        num_chunks=num_chunks,
+        chunk_size=chunk_size,
+        base_offset=self.get_base(chunk) if chunk is not None else None,
+    )
+def tile_shape(shape, tiling):
+  if len(tiling) > len(shape):
+    raise ValueError
+  if not tiling:
+    return shape
+  tiling_rank = len(tiling)
+  for s, t in zip(shape[-tiling_rank:], tiling):
+    if s % t:
+      raise ValueError("Non-divisible tiling:", shape, tiling)
+  return (
+      *shape[:-tiling_rank],
+      *(s // t for s, t in zip(shape[-tiling_rank:], tiling)),
+      *tiling,
+  )
+def warp_tree_reduce(value, op, group_size):
+  """Reduce a value across the warpgroup."""
+  assert 32 % group_size == 0 and group_size <= 32
+  i32 = ir.IntegerType.get_signless(32)
+  result = value
+  iters = np.log2(group_size)
+  if not iters.is_integer():
+    raise ValueError(f"Warp reduction group size should be a power of 2 (got {group_size})")
+  iters = int(iters)
+  for i in range(iters):
+    other_result = nvvm.shfl_sync(
+        result.type,
+        c(0xFFFFFFFF, i32),
+        result,
+        c(1 << i, i32),
+        c(0x1F, i32),
+        nvvm.ShflKind.bfly,
+    )
+    result = op(result, other_result)
+  return result

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/mosaic/gpu/wgmma.py ADDED Viewed

	@@ -0,0 +1,518 @@

+# Copyright 2024 The JAX Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import dataclasses
+import enum
+import functools
+import itertools
+import jax
+from jaxlib.mlir import ir
+from jaxlib.mlir.dialects import arith
+from jaxlib.mlir.dialects import builtin
+from jaxlib.mlir.dialects import llvm
+from jaxlib.mlir.dialects import nvvm
+from jaxlib.mlir.dialects import vector
+import numpy as np
+from . import dsl as mgpu
+# mypy: ignore-errors
+c = mgpu.c
+bytewidth = mgpu.bytewidth
+@jax.tree_util.register_pytree_node_class
+@dataclasses.dataclass
+class WGMMAAccumulator:
+  """A FragmentedArray that has is synchronized with the async proxy.
+  This implies that it requires no additional synchronization when passed in
+  as a WGMMA accumulator. In particular, when created from a
+  FragmentedArray, the necessary synchronization is inserted at construction.
+  """
+  value: mgpu.FragmentedArray
+  def __init__(self, *, _value: mgpu.FragmentedArray, _sync: bool = True):
+    if _value.layout != mgpu.WGMMA_LAYOUT:
+      raise ValueError("Only WGMMA layouts supported in WGMMAAccumulator")
+    self.value = _value
+    if _sync:
+      self._value = wgmma_fence(_value)
+  @classmethod
+  def zero(cls, m, n, dtype=None):
+    if m % 64 or n % 8:
+      raise ValueError
+    f32 = ir.F32Type.get()
+    if dtype is None:
+      dtype = f32
+    zero = arith.constant(dtype, ir.FloatAttr.get(dtype, 0.0))
+    return cls(
+        _value=mgpu.FragmentedArray.splat(zero, (m, n), mgpu.WGMMA_LAYOUT)
+    )
+  @classmethod
+  def from_registers(cls, registers):
+    return cls(_value=registers)
+  def tree_flatten(self):
+    return (self.value,), ()
+  @classmethod
+  def tree_unflatten(cls, aux, value):
+    del aux
+    return cls(_value=value[0], _sync=False)
+def wgmma_encode(x: int):
+  result = (x & 0x3FFFF) >> 4
+  if result << 4 != x:
+    raise ValueError("Cannot encode value in a WGMMA descriptor")
+  return result
+def llvm_mul(x, y):
+  return llvm.mul(x, y, overflow_flags=llvm.IntegerOverflowFlags.none)
+def llvm_add(x, y):
+  return llvm.add(x, y, overflow_flags=llvm.IntegerOverflowFlags.none)
+def get_memref_base(memref_arg, memory_space=None):
+  i64 = ir.IntegerType.get_signless(64)
+  memref_ty = ir.MemRefType(memref_arg.type)
+  if len(memref_ty.shape) == 0:
+    raise NotImplementedError
+  elem_bytewidth = bytewidth(memref_ty.element_type)
+  rank = len(memref_ty.shape)
+  # TODO: Read out memory space from memref
+  space = "" if memory_space is None else "<" + str(memory_space) + ">"
+  ptr_ty = ir.Type.parse("!llvm.ptr" + space)
+  desc_ty = ir.Type.parse(
+      f"!llvm.struct<({ptr_ty}, {ptr_ty}, i64, array<{rank} x i64>,"
+      f" array<{rank} x i64>)>"
+  )
+  desc = builtin.UnrealizedConversionCastOp([desc_ty], [memref_arg])
+  aligned_ptr = llvm.extractvalue(ptr_ty, desc, [1])
+  offset_elems = llvm.extractvalue(i64, desc, [2])
+  offset_bytes = llvm_mul(offset_elems, c(elem_bytewidth, i64))
+  return llvm.inttoptr(
+      ptr_ty, llvm_add(llvm.ptrtoint(i64, aligned_ptr), offset_bytes)
+  )
+def create_descriptor(
+    memref_arg,
+    leading_byte_offset: int,
+    stride_byte_offset: int,
+    swizzle: int | None,
+    memory_space: int | None = None,
+    nvgpu_type=None,
+):
+  i64 = ir.IntegerType.get_signless(64)
+  ptr_val = llvm.ptrtoint(i64, get_memref_base(memref_arg, memory_space))
+  if swizzle is None:
+    swizzle_encoding = 0
+  elif swizzle == 128:
+    swizzle_encoding = 1
+  else:
+    raise NotImplementedError(swizzle)
+  encoded_base_addr = llvm.LShrOp(
+      llvm.AndOp(ptr_val, c(0x3FFFF, i64)), c(4, i64)
+  )
+  desc_const = (
+      (wgmma_encode(leading_byte_offset) << 16)
+      | (wgmma_encode(stride_byte_offset) << 32)
+      |
+      # We ignore the offset
+      (swizzle_encoding << 62)
+  )
+  desc = llvm.OrOp(encoded_base_addr, c(desc_const, i64))
+  if nvgpu_type is not None:
+    desc = builtin.UnrealizedConversionCastOp([nvgpu_type], [desc])
+  return desc.result
+def _unpack_i32(vec_ty, r):
+  i32 = ir.IntegerType.get_signless(32)
+  return vector.bitcast(
+      vec_ty, vector.splat(ir.VectorType.get((1,), i32), r)
+  )
+def _supported_wgmma_types(dtype, abtype) -> bool:
+  input_types_are = lambda ty: ty.isinstance(abtype)
+  if ir.F32Type.isinstance(dtype):
+    return any(input_types_are(ty) for ty in (ir.FloatTF32Type, ir.BF16Type, ir.F16Type))
+  elif ir.F16Type.isinstance(dtype):
+    return input_types_are(ir.F16Type)
+  else:
+    return False
+def wgmma_m64k128B(
+    acc: np.ndarray,  # of register Values
+    a,
+    b_descriptor: ir.Value,
+    a_transpose: bool | None,
+    b_transpose: bool,
+    a_k_stride: int | None,
+    b_k_stride: int,
+    n: int,
+    element_type: ir.Type,
+):
+  out_ty = ir.VectorType(acc.flat[0].type).element_type
+  if not _supported_wgmma_types(out_ty, element_type):
+    raise ValueError(f"Usupported wgmma types {(out_ty, element_type)=}")
+  f16 = ir.F16Type.get()
+  i32 = ir.IntegerType.get_signless(32)
+  i64 = ir.IntegerType.get_signless(64)
+  index = ir.IndexType.get()
+  if b_k_stride % 16:
+    raise ValueError
+  if n % (128 // bytewidth(element_type)):
+    raise ValueError
+  # Only 16-bit types support transposes
+  supports_transpose = bytewidth(element_type) == 2
+  if not supports_transpose and (a_transpose or b_transpose):
+    raise ValueError("Only f16 WGMMA supports transposes")
+  if a_in_regs := isinstance(a, mgpu.FragmentedArray):
+    if a.mlir_dtype != ir.F16Type.get() and a.mlir_dtype != ir.BF16Type.get():
+      raise ValueError(f"Unsupported A register array dtype: {a.mlir_dtype}")
+    if a.layout != mgpu.WGMMA_LAYOUT or a.shape != (64, 64):
+      raise ValueError("Unsupported A register array layout")
+    if a_k_stride is not None or a_transpose is not None:
+      raise ValueError("Unsupported WGMMA features with A in registers")
+  else:
+    if a_k_stride is None or a_k_stride % 16:
+      raise ValueError
+    if a_transpose is None:
+      raise ValueError
+  if ir.F32Type.isinstance(out_ty):
+    num_acc_regs = n // 2
+    out_ty_field = out_ty
+    acc_regs = [  # pylint: disable=g-complex-comprehension
+        vector.extractelement(reg, position=c(pos, index))
+        for reg in acc.flat
+        for pos in range(2)
+    ]
+    to_acc_vec_regs = functools.partial(_as_fragmented_reg_ndarray, dtype=out_ty, shape=acc.shape)
+    acc_constraint = "f"
+  elif ir.F16Type.isinstance(out_ty):
+    num_acc_regs = n // 4
+    out_ty_field = i32
+    acc_regs = [_as_i32_reg(reg) for reg in acc.flat]
+    vec_ty = ir.VectorType(acc.flat[0].type)
+    to_acc_vec_regs = lambda regs : np.array([_unpack_i32(vec_ty, reg) for reg in regs]).reshape(acc.shape)
+    acc_constraint = "r"
+  else:
+    raise ValueError(f"WGMMA instruciton only supports f32 and f16 out (got {out_ty})")
+  num_imm_regs = 4 if supports_transpose else 2
+  if a_in_regs:
+    a_reg_constraints = ["r"] * 4  # 4x f16x2 registers
+    num_imm_regs -= 1  # transpose not supported for a in registers
+  else:
+    a_reg_constraints = ["l"]  # descriptor
+  # Reference for i/o aliasing: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html
+  # Seems like it's not actually documented in LLVM IR docs.
+  reg_constraints_list = (
+      [f"={acc_constraint}"] * num_acc_regs  # accumulator registers
+      + [str(i) for i in range(num_acc_regs)]  # we alias outputs as inputs, too.
+      + a_reg_constraints  # a descriptor / registers
+      + ["l"] * 1  # b descriptor
+      + ["n"] * (1 + num_imm_regs)  # literal constants
+  )
+  reg_constraints = ",".join(reg_constraints_list)
+  reg_count = itertools.count()
+  def take_regs(n):
+    return (f"${i}" for i in itertools.islice(reg_count, n))
+  acc_reg_vector = "{" + ",".join(take_regs(num_acc_regs)) + "}"
+  for _ in take_regs(num_acc_regs):  # Ignore next entries: aliasing.
+    pass
+  if a_in_regs:
+    a_regs = "{" + ",".join(take_regs(len(a_reg_constraints))) + "}"
+  else:
+    a_regs, = take_regs(1)
+  b_desc_reg, use_out_reg = take_regs(2)
+  imm_regs = ", ".join(take_regs(num_imm_regs))  # Immediate regs (scale, ...).
+  assert next(reg_count) == len(reg_constraints_list)
+  el_ty = element_type
+  k_instr = 32 // bytewidth(element_type)
+  wgmma_instr = (
+      f"wgmma.mma_async.sync.aligned.m64n{n}k{k_instr}.{out_ty}.{el_ty}.{el_ty} "
+      f"{acc_reg_vector}, {a_regs}, {b_desc_reg}, p, {imm_regs};"
+  )
+  ptx = f"{{ .reg .pred p; setp.ne.b32 p, {use_out_reg}, 0; {wgmma_instr} }}\n"
+  def lc(x):
+    return llvm.ConstantOp(i32, ir.IntegerAttr.get(i32, x)).result
+  use_out = scale_a = scale_b = lc(1)
+  imms = [use_out, scale_a, scale_b]
+  if supports_transpose and a_transpose is not None:
+    imms += [lc(int(a_transpose)), lc(int(b_transpose))]
+  elif supports_transpose:
+    imms += [lc(int(b_transpose))]
+  if acc.ndim != 4 or acc.shape[0] != 1 or acc.shape[2:] != (2, 1):
+    raise ValueError(acc.shape)
+  acc_struct_type = ir.Type.parse(
+      f"!llvm.struct<({','.join(str(out_ty_field) for _ in acc_regs)})>"
+  )
+  for i in range(4):
+    # Slice out the relevant part of A or advance the A descriptor.
+    if a_in_regs:
+      a_slice = a[:, (i * 16) : ((i + 1) * 16)]
+      a_args = [_as_i32_reg(v) for v in a_slice.registers.flat]
+    else:
+      if i > 0:
+        a = llvm_add(
+            a,
+            llvm.ConstantOp(i64, ir.IntegerAttr.get(i64, a_k_stride >> 4)),
+        )
+      a_args = [a]
+    # Advance the B descriptor.
+    if i > 0:
+      b_descriptor = llvm_add(
+          b_descriptor,
+          llvm.ConstantOp(i64, ir.IntegerAttr.get(i64, b_k_stride >> 4)),
+      )
+    assert len(a_args) == len(a_reg_constraints)
+    acc_struct = llvm.inline_asm(
+        acc_struct_type,
+        [*acc_regs, *a_args, b_descriptor, *imms],
+        ptx,
+        reg_constraints,
+        asm_dialect=0,
+        has_side_effects=True,
+    )
+    acc_regs = [
+        llvm.extractvalue(out_ty_field, acc_struct, [i]) for i in range(len(acc_regs))
+    ]
+  return to_acc_vec_regs(acc_regs)
+class WGMMALayout(enum.Enum):
+  ROW_MAJOR = enum.auto()
+  COL_MAJOR = enum.auto()
+# TODO(apaszke): Remove WGMMALayout. Make input shapes logical and infer
+# transpositions from memref strides.
+def wgmma(
+    acc: WGMMAAccumulator,
+    a,
+    b,
+    *,
+    # Order only applies within each tile!
+    a_order: WGMMALayout | None = None,
+    b_order: WGMMALayout = WGMMALayout.ROW_MAJOR,
+):
+  if a_in_regs := isinstance(a, mgpu.FragmentedArray):
+    a_element_type = a.mlir_dtype
+    a_shape = a.shape
+  else:
+    a_ty = ir.MemRefType(a.type)
+    a_element_type = a_ty.element_type
+    a_shape = a_ty.shape
+  b_ty = ir.MemRefType(b.type)
+  supported_types = {ir.F16Type.get(), ir.BF16Type.get(), ir.F32Type.get()}
+  if a_element_type not in supported_types:
+    raise ValueError(a_element_type)
+  if b_ty.element_type not in supported_types:
+    raise ValueError(b_ty.element_type)
+  if (element_type := a_element_type) != b_ty.element_type:
+    raise ValueError
+  element_bytewidth = bytewidth(element_type)
+  kn_tile = 128 // element_bytewidth
+  groups_k, groups_n = b_ty.shape[:2]
+  if b_ty.shape[2:] != [kn_tile, kn_tile]:
+    raise ValueError(b_ty.shape)
+  if a_in_regs:
+    if a_element_type != ir.F16Type.get() and a_element_type != ir.BF16Type.get():
+      raise ValueError(a_element_type)
+    if a_shape[0] % 64 or a_shape[1] % kn_tile:
+      raise ValueError(a_shape)
+    if a_shape[1] // kn_tile != groups_k:
+      raise ValueError(a_shape[1] // kn_tile, groups_k)
+    groups_m = a_shape[0] // 64
+    if a_order is not None:
+      raise ValueError(
+          "a_order can only be specified when A is in shared memory"
+      )
+  else:
+    groups_m = a_shape[0]
+    if a_shape[1] != groups_k:
+      raise ValueError(a_shape[1], groups_k)
+    if a_shape[2:] != [64, kn_tile]:
+      raise ValueError(a_shape)
+    if a_order is None:
+      a_order = WGMMALayout.ROW_MAJOR
+  row_major = WGMMALayout.ROW_MAJOR
+  col_major = WGMMALayout.COL_MAJOR
+  a_desc_fields = dict(
+      leading_byte_offset=((1 if a_order == row_major else 512) << 4),
+      stride_byte_offset=(64 << 4),
+      swizzle=128,
+      memory_space=3,
+  )
+  b_desc_fields = dict(
+      leading_byte_offset=((512 if b_order == row_major else 1) << 4),
+      stride_byte_offset=(64 << 4),
+      swizzle=128,
+      memory_space=3,
+  )
+  wgmma_params = dict(
+      a_transpose=a_order == col_major,
+      b_transpose=b_order == row_major,
+      a_k_stride=(2 if a_order == row_major else 128) * 16,
+      b_k_stride=(128 if b_order == row_major else 2) * 16,
+      n=(groups_n * kn_tile),
+      element_type=ir.FloatTF32Type.get()
+      if ir.F32Type.isinstance(element_type)
+      else element_type,
+  )
+  if a_in_regs:
+    wgmma_params["a_k_stride"] = wgmma_params["a_transpose"] = None
+  if a_in_regs:
+    a = wgmma_fence(a)  # Make sure the registers are ready.
+    a_m_byte_stride = a_k_byte_stride = a_desc_base = None  # Silence pytype.
+  else:
+    a_desc_base = create_descriptor(a, **a_desc_fields)
+    a_strides, _ = ir.MemRefType(a.type).get_strides_and_offset()
+    a_byte_strides = [s * element_bytewidth for s in a_strides]
+    a_m_byte_stride, a_k_byte_stride = a_byte_strides[:2]
+    if a_byte_strides[2:] != [128, element_bytewidth]:
+      raise ValueError(a_byte_strides)
+  b_desc_base = create_descriptor(b, **b_desc_fields)
+  b_strides, _ = b_ty.get_strides_and_offset()
+  b_byte_strides = [s * element_bytewidth for s in b_strides]
+  b_k_byte_stride = b_byte_strides[0]
+  if b_byte_strides[1:] != [128 * kn_tile, 128, element_bytewidth]:
+    raise ValueError(b_byte_strides)
+  i64 = ir.IntegerType.get_signless(64)
+  new_acc_regs = acc.value.registers.copy()
+  for mi in range(groups_m):
+    for ki in range(groups_k):
+      if a_in_regs:
+        a_mk = a[mi * 64 : (mi + 1) * 64, ki * kn_tile : (ki + 1) * kn_tile]
+      else:
+        a_mk = llvm_add(
+            a_desc_base,
+            c(wgmma_encode(mi * a_m_byte_stride + ki * a_k_byte_stride), i64),
+        )
+      b_k = llvm_add(b_desc_base, c(wgmma_encode(ki * b_k_byte_stride), i64))
+      new_acc_regs[mi : mi + 1] = wgmma_m64k128B(
+          new_acc_regs[mi : mi + 1], a_mk, b_k, **wgmma_params
+      )
+  return WGMMAAccumulator(
+      _value=mgpu.FragmentedArray(
+          _registers=new_acc_regs, _layout=mgpu.WGMMA_LAYOUT
+      ),
+      _sync=False,
+  )
+def wgmma_fence(array: mgpu.FragmentedArray):
+  """Fences the array construction from WGMMA instructions.
+  This is a little workaround to force LLVM to initialize the PTX registers
+  before the wgmma.fence.sync.aligned instruction. Otherwise, LLVM treats
+  in-register computation as pure and can move it after the fence, which is
+  explicitly disallowed by the PTX programming model.
+  """
+  i32 = ir.IntegerType.get_signless(32)
+  index = ir.IndexType.get()
+  dtype = array.mlir_dtype
+  src_vec_ty = ir.VectorType(array.registers.flat[0].type)
+  assert src_vec_ty.shape == [2]
+  if dtype == ir.F32Type.get():
+    regs = [  # pylint: disable=g-complex-comprehension
+        vector.extractelement(reg, position=c(pos, index))
+        for reg in array.registers.flat
+        for pos in range(2)
+    ]
+    reg_dtype = dtype
+    reg_constraints_list = ["=f"] * len(regs) + ["f"] * len(regs)
+    ptx_lines = [f"mov.f32 ${i}, ${len(regs)+i}" for i in range(len(regs))]
+  elif dtype == ir.F16Type.get() or dtype == ir.BF16Type.get():
+    regs = [_as_i32_reg(reg) for reg in array.registers.flat]
+    reg_dtype = i32
+    reg_constraints_list = ["=r"] * len(regs) + ["r"] * len(regs)
+    ptx_lines = [f"mov.b32 ${i}, ${len(regs)+i}" for i in range(len(regs))]
+  else:
+    raise NotImplementedError(dtype)
+  reg_constraints = ",".join(reg_constraints_list)
+  # Copy over the registers. ptxas should be able to remove the moves.
+  ptx_lines.append("wgmma.fence.sync.aligned")
+  ptx = ";\n".join(ptx_lines) + ";\n"
+  dtype_str = str(reg_dtype)
+  struct_ty = ir.Type.parse(
+      f"!llvm.struct<({','.join(dtype_str for _ in regs)})>"
+  )
+  acc_struct = llvm.inline_asm(
+      struct_ty, regs, ptx, reg_constraints,
+      asm_dialect=0, has_side_effects=True,
+  )
+  regs = [
+      llvm.extractvalue(reg_dtype, acc_struct, [i]) for i in range(len(regs))
+  ]
+  if dtype == ir.F32Type.get():
+    registers = _as_fragmented_reg_ndarray(
+          regs, array.mlir_dtype, array.registers.shape
+    )
+  elif dtype == ir.F16Type.get() or dtype == ir.BF16Type.get():
+    regs = [_unpack_i32(src_vec_ty, r) for r in regs]
+    registers = np.asarray(regs, dtype=object).reshape(array.registers.shape)
+  else:
+    raise NotImplementedError(dtype)
+  return mgpu.FragmentedArray(_registers=registers, _layout=array.layout)
+def _as_fragmented_reg_ndarray(flat_regs, dtype: ir.Type, shape: tuple[int, ...]):
+  vec_regs = []
+  for first, second in zip(flat_regs[::2], flat_regs[1::2]):
+    vec = llvm.mlir_undef(ir.VectorType.get((2,), dtype))
+    vec = llvm.insertelement(vec, first, position=_lc(0))
+    vec = llvm.insertelement(vec, second, position=_lc(1))
+    vec_regs.append(vec)
+  return np.asarray(vec_regs, dtype=object).reshape(shape)
+def _as_i32_reg(v):
+  i32 = ir.IntegerType.get_signless(32)
+  return llvm.extractelement(
+      vector.bitcast(ir.VectorType.get((1,), i32), v), _lc(0)
+  )
+def _lc(x):
+  i32 = ir.IntegerType.get_signless(32)
+  return llvm.ConstantOp(i32, ir.IntegerAttr.get(i32, x)).result

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/__init__.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# Copyright 2023 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module for pallas, a JAX extension for custom kernels."""
+from jax._src import pallas
+from jax._src.pallas.core import BlockSpec
+from jax._src.pallas.core import no_block_spec
+from jax._src.pallas.core import Unblocked
+from jax._src.pallas.core import unblocked
+from jax._src.pallas.pallas_call import pallas_call
+from jax._src.pallas.pallas_call import pallas_call_p
+from jax._src.pallas.primitives import atomic_add
+from jax._src.pallas.primitives import atomic_and
+from jax._src.pallas.primitives import atomic_cas
+from jax._src.pallas.primitives import atomic_max
+from jax._src.pallas.primitives import atomic_min
+from jax._src.pallas.primitives import atomic_or
+from jax._src.pallas.primitives import atomic_xchg
+from jax._src.pallas.primitives import atomic_xor
+from jax._src.pallas.primitives import debug_print
+from jax._src.pallas.primitives import dot
+from jax._src.pallas.primitives import load
+from jax._src.pallas.primitives import max_contiguous
+from jax._src.pallas.primitives import multiple_of
+from jax._src.pallas.primitives import num_programs
+from jax._src.pallas.primitives import program_id
+from jax._src.pallas.primitives import store
+from jax._src.pallas.primitives import swap
+from jax._src.pallas.utils import cdiv
+from jax._src.pallas.utils import next_power_of_2
+from jax._src.pallas.utils import strides_from_shape
+from jax._src.pallas.utils import when
+from jax._src.state.indexing import ds
+from jax._src.state.indexing import dslice
+from jax._src.state.indexing import Slice
+from jax._src.state.primitives import broadcast_to

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/gpu.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright 2023 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton-specific Pallas APIs."""
+from jax._src.pallas.triton.primitives import approx_tanh
+from jax._src.pallas.triton.primitives import elementwise_inline_asm

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/ops/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copyright 2023 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# All files within ops should be treated as user code.
+import os
+import jax._src.source_info_util
+jax._src.source_info_util.register_inclusion(os.path.dirname(__file__))
+del os, jax

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/ops/gpu/attention.py ADDED Viewed

	@@ -0,0 +1,573 @@

+# Copyright 2023 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing fused attention forward and backward pass."""
+from __future__ import annotations
+import functools
+from typing import Any, Optional
+import jax
+from jax import lax
+from jax.experimental import pallas as pl
+import jax.numpy as jnp
+import numpy as np
+DEFAULT_MASK_VALUE = -0.7 * float(np.finfo(np.dtype("float32")).max)
+def mha_forward_kernel(
+    q_ref,
+    k_ref,
+    v_ref,  # Input arrays
+    segment_ids_ref: jax.Array | None,  # segment_id arrays
+    o_ref: Any,  # Output
+    *residual_refs: Any,  # Residual outputs
+    num_heads: int,
+    sm_scale: float,
+    causal: bool,
+    block_q: int,
+    block_d: int,
+    block_k: int,
+):
+  seq_len = q_ref.shape[0]
+  start_q = pl.program_id(0)
+  # o is the buffer where we accumulate the output on sram.
+  # m_i and l_i (see FlashAttention paper) are updated during the k,v loop.
+  m_i = jnp.zeros(block_q, dtype=jnp.float32) - float('inf')
+  l_i = jnp.zeros(block_q, dtype=jnp.float32)
+  # acc is the buffer where we accumulate the output on sram.
+  o = jnp.zeros((block_q, block_d), dtype=jnp.float32)
+  # Load q: it will stay in L1 throughout. Indices form a matrix because we
+  # read, compute, and write all in 2d chunks. 1 element ~= 1 CUDA thread index.
+  # q tile has shape [block_q, block_d], block_d == head_dim.
+  curr_q_slice = pl.dslice(start_q * block_q, block_q)
+  q = pl.load(q_ref, (curr_q_slice, pl.dslice(None)))
+  q_segment_ids = (
+      None
+      if segment_ids_ref is None
+      else pl.load(segment_ids_ref, (curr_q_slice,))
+  )
+  # In FlashAttention algorithm 1 there are 2 loops: slow over tiles of kv (size
+  # (Bc == block_k here), and fast over blocks of q (size Br == block_q here).
+  # Here we only loop over blocks of kv to process entire seq_len, the loop over
+  # blocks of q is carried out by the grid.
+  def body(start_k, carry):
+    o_prev, m_prev, l_prev = carry
+    curr_k_slice = pl.dslice(start_k * block_k, block_k)
+    k = pl.load(k_ref, (curr_k_slice, slice(None)))
+    kv_segment_ids = (
+        None
+        if segment_ids_ref is None
+        else pl.load(segment_ids_ref, (curr_k_slice,))
+    )
+    qk = pl.dot(q, k.T)   # [block_q, block_k]
+    if sm_scale != 1.:
+      qk *= sm_scale  # [block_q, block_k]
+    # Avoids Triton crash.
+    # if num_heads > 2:
+    #   qk = qk.astype(q_ref.dtype)
+    #   qk = qk.astype(jnp.float32)
+    if causal or segment_ids_ref is not None:
+      mask = None
+      if segment_ids_ref is not None:
+        mask = segment_mask(q_segment_ids, kv_segment_ids)
+      if causal:
+        span_q = start_q * block_q + jnp.arange(block_q)
+        span_k = start_k * block_k + jnp.arange(block_k)
+        causal_mask = span_q[:, None] >= span_k[None, :]
+        mask = (
+            causal_mask if mask is None else jnp.logical_and(mask, causal_mask)
+        )
+      # Apply mask to qk.
+      qk = jnp.where(mask, qk, DEFAULT_MASK_VALUE)
+    m_curr = qk.max(axis=-1)
+    m_next = jnp.maximum(m_prev, m_curr)
+    correction = jnp.exp(m_prev - m_next)
+    l_prev_corr = correction * l_prev
+    s_curr = jnp.exp(
+        qk - m_next[:, None]
+    )  # Use m_next instead of m_curr to avoid a correction on l_curr
+    l_curr = s_curr.sum(axis=-1)
+    l_next = l_prev_corr + l_curr
+    l_next_rcp = 1. / l_next
+    s_curr = s_curr * l_next_rcp[:, None]
+    o_prev_corr = (l_prev_corr * l_next_rcp)[:, None] * o_prev
+    v = pl.load(v_ref, (curr_k_slice, pl.dslice(block_d)))
+    o_curr = pl.dot(s_curr.astype(v.dtype), v)
+    o_next = o_prev_corr + o_curr
+    return o_next, m_next, l_next
+  if causal:
+    # Ceildiv (`pl.cdiv` and `//` do not work due to type of start_q)
+    upper_bound = lax.div(block_q * (start_q + 1) + block_k - 1, block_k)
+  else:
+    upper_bound = pl.cdiv(seq_len, block_k)  # type: ignore
+  o, m_i, l_i = lax.fori_loop(0, upper_bound, body, (o, m_i, l_i))
+  if residual_refs:
+    l_ref, m_ref = residual_refs
+    pl.store(l_ref, (curr_q_slice,), l_i)
+    pl.store(m_ref, (curr_q_slice,), m_i)
+  # Write output to dram.
+  o = o.astype(o_ref.dtype)
+  pl.store(o_ref, (curr_q_slice, pl.dslice(None)), o)
+def segment_mask(
+    q_segment_ids: jax.Array,
+    kv_segment_ids: jax.Array,
+):
+  # [B, T, 1] or [T, 1]
+  q_segment_ids = jnp.expand_dims(q_segment_ids, axis=-1)
+  # [B, 1, S] or [1, S]
+  if kv_segment_ids.ndim == 1:
+    kv_segment_ids = jnp.expand_dims(kv_segment_ids, axis=0)
+  else:
+    kv_segment_ids = jnp.expand_dims(kv_segment_ids, axis=1)
+  return jnp.equal(q_segment_ids, kv_segment_ids).astype(jnp.bool_)
+@functools.partial(
+    jax.custom_vjp, nondiff_argnums=[4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
+)
+@functools.partial(
+    jax.jit,
+    static_argnames=[
+        "sm_scale",
+        "causal",
+        "block_q",
+        "block_k",
+        "backward_pass_impl",
+        "num_warps",
+        "num_stages",
+        "grid",
+        "interpret",
+        "debug",
+    ],
+)
+def mha(
+    q,
+    k,
+    v,
+    segment_ids: jnp.ndarray | None,
+    sm_scale: float = 1.0,
+    causal: bool = False,
+    block_q: int = 128,
+    block_k: int = 128,
+    backward_pass_impl: str = "triton",
+    num_warps: int | None = None,
+    num_stages: int = 2,
+    grid: tuple[int, ...] | None = None,
+    interpret: bool = False,
+    debug: bool = False,
+):
+  del backward_pass_impl
+  batch_size, seq_len, num_heads, head_dim = q.shape
+  block_q = min(block_q, seq_len)
+  block_k = min(block_k, seq_len)
+  # Heuristics.
+  grid_ = grid
+  if grid_ is None:
+    grid_ = (pl.cdiv(seq_len, block_q), batch_size, num_heads)
+  num_warps_ = num_warps
+  if num_warps_ is None:
+    num_warps_ = 4 if head_dim <= 64 else 8
+  kernel = functools.partial(mha_forward_kernel, num_heads=num_heads,
+                             sm_scale=sm_scale, block_q=block_q,
+                             block_k=block_k, block_d=head_dim,
+                             causal=causal)
+  in_specs = [
+      pl.BlockSpec(
+          lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+      ),
+      pl.BlockSpec(
+          lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+      ),
+      pl.BlockSpec(
+          lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+      ),
+  ]
+  in_specs.append(
+      None  # type: ignore[arg-type]
+      if segment_ids is None
+      else pl.BlockSpec(lambda _, j, k: (j, 0), (None, seq_len))
+  )
+  out_shape = jax.ShapeDtypeStruct(shape=q.shape, dtype=q.dtype)
+  return pl.pallas_call(
+      kernel,
+      grid=grid_,
+      in_specs=in_specs,
+      out_specs=pl.BlockSpec(
+          lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+      ),
+      compiler_params=dict(
+          triton=dict(num_warps=num_warps_, num_stages=num_stages)
+      ),
+      out_shape=out_shape,
+      debug=debug,
+      interpret=interpret,
+      name="mha_forward",
+  )(q, k, v, segment_ids)
+def _mha_forward(
+    q,
+    k,
+    v,
+    segment_ids: jax.Array | None,
+    sm_scale: float,
+    causal: bool,
+    block_q: int,
+    block_k: int,
+    backward_pass_impl: str,
+    num_warps: int | None,
+    num_stages: int,
+    grid: Any,
+    interpret: bool,
+    debug: bool,
+):
+  del backward_pass_impl
+  batch_size, seq_len, num_heads, head_dim = q.shape
+  block_q = min(block_q, seq_len)
+  block_k = min(block_k, seq_len)
+  # Heuristics.
+  grid_ = grid
+  if grid_ is None:
+    grid_ = (pl.cdiv(seq_len, block_q), batch_size, num_heads)
+  num_warps_ = num_warps
+  if num_warps_ is None:
+    num_warps_ = 4 if head_dim <= 64 else 8
+  kernel = functools.partial(mha_forward_kernel, num_heads=num_heads,
+                             sm_scale=sm_scale, causal=causal, block_q=block_q,
+                             block_k=block_k, block_d=head_dim)
+  out_shape = [
+      jax.ShapeDtypeStruct(shape=q.shape, dtype=q.dtype), # out
+      jax.ShapeDtypeStruct(shape=(batch_size, num_heads, seq_len), # l
+                           dtype=jnp.float32),
+      jax.ShapeDtypeStruct(shape=(batch_size, num_heads, seq_len), # m
+                           dtype=jnp.float32)
+  ]
+  in_specs = [
+      pl.BlockSpec(
+          lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+      ),
+      pl.BlockSpec(
+          lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+      ),
+      pl.BlockSpec(
+          lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+      ),
+  ]
+  in_specs.append(
+      None  # type: ignore[arg-type]
+      if segment_ids is None
+      else pl.BlockSpec(lambda _, j, k: (j, 0), (None, seq_len))
+  )
+  out, l, m = pl.pallas_call(
+      kernel,
+      grid=grid_,
+      in_specs=in_specs,
+      out_specs=[
+          pl.BlockSpec(
+              lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+          ),
+          pl.BlockSpec(lambda _, j, k: (j, k, 0), (None, None, seq_len)),
+          pl.BlockSpec(lambda _, j, k: (j, k, 0), (None, None, seq_len)),
+      ],
+      compiler_params=dict(
+          triton=dict(num_warps=num_warps_, num_stages=num_stages)
+      ),
+      out_shape=out_shape,
+      debug=debug,
+      interpret=interpret,
+      name="mha_forward",
+  )(q, k, v, segment_ids)
+  return out, (q, k, v, segment_ids, out, l, m)
+def _preprocess_backward_kernel(out_ref, dout_ref, l_ref,
+                                new_dout_ref, delta_ref, *,
+                                block_q: int):
+  pid_m = pl.program_id(0)
+  off_m = pl.ds(pid_m * block_q, block_q)
+  # load
+  o = pl.load(out_ref, (off_m, slice(None))).astype(jnp.float32)
+  do = pl.load(dout_ref, (off_m, slice(None))).astype(jnp.float32)
+  denom = pl.load(l_ref, (off_m,)).astype(jnp.float32)
+  # compute
+  do = do / denom[:, None]
+  delta = jnp.sum(o * do, axis=1)
+  # write-back
+  pl.store(new_dout_ref, (off_m, slice(None)),
+           do.astype(new_dout_ref.dtype))
+  pl.store(delta_ref, (off_m,), delta.astype(delta_ref.dtype))
+@jax.named_scope("preprocess_backward")
+def _preprocess_backward(out, do, l, block_q: int,
+                         debug: bool, interpret: bool):
+  batch_size, seq_len, num_heads, head_dim = out.shape
+  out_shape = [
+      jax.ShapeDtypeStruct(do.shape, do.dtype),
+      jax.ShapeDtypeStruct(l.shape, l.dtype),
+  ]
+  do_scaled, delta = pl.pallas_call(
+      functools.partial(_preprocess_backward_kernel, block_q=block_q),
+      grid=(pl.cdiv(seq_len, block_q), batch_size, num_heads),
+      in_specs=[
+        pl.BlockSpec(lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)),
+        pl.BlockSpec(lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)),
+        pl.BlockSpec(lambda _, j, k: (j, k, 0), (None, None, seq_len)),
+      ],
+      out_specs=[
+        pl.BlockSpec(lambda _, j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)),
+        pl.BlockSpec(lambda _, j, k: (j, k, 0), (None, None, seq_len)),
+      ],
+      compiler_params=dict(
+          triton=dict(num_warps=4, num_stages=3)
+      ),
+      out_shape=out_shape,
+      debug=debug,
+      interpret=interpret,
+      name="mha_preprocess_backward")(out, do, l)
+  return do_scaled, delta
+def mha_backward_kernel(
+    # Inputs
+    q_ref,
+    k_ref,
+    v_ref,
+    segment_ids_ref: jax.Array | None,
+    out_ref,
+    do_scaled_ref,
+    l_ref,
+    m_ref,
+    delta_ref,
+    _,
+    # Outputs
+    dq_ref,
+    dk_ref,
+    dv_ref,
+    *,
+    sm_scale: float,
+    causal: bool,
+    block_q: int,
+    block_d: int,
+    block_k: int,
+):
+  del out_ref, l_ref  # Not needed
+  seq_len = q_ref.shape[0]
+  def outer_loop(start_k, _):
+    dv = jnp.zeros([block_k, block_d], dtype=jnp.float32)
+    dk = jnp.zeros([block_k, block_d], dtype=jnp.float32)
+    k = pl.load(k_ref, (pl.ds(start_k * block_k, block_k), slice(None)))
+    v = pl.load(v_ref, (pl.ds(start_k * block_k, block_k), slice(None)))
+    span_k = start_k * block_k + jnp.arange(block_k)
+    kv_segment_ids = (
+        None
+        if segment_ids_ref is None
+        else pl.load(segment_ids_ref, (pl.ds(start_k * block_k, block_k),))
+    )
+    def inner_loop(start_q, carry):
+      dv, dk = carry
+      q = pl.load(q_ref, (pl.ds(start_q * block_q, block_q), slice(None)))
+      qk = pl.dot(q, k.T)
+      qk = qk.astype(q_ref.dtype)
+      qk = qk.astype(jnp.float32)
+      if sm_scale != 1.0:
+        qk *= sm_scale
+      q_segment_ids = (
+          None
+          if segment_ids_ref is None
+          else pl.load(segment_ids_ref, (pl.ds(start_q * block_q, block_q),))
+      )
+      if causal or segment_ids_ref is not None:
+        mask = None
+        if segment_ids_ref is not None:
+          mask = segment_mask(q_segment_ids, kv_segment_ids)
+        if causal:
+          span_q = start_q * block_q + jnp.arange(block_q)
+          causal_mask = span_q[:, None] >= span_k[None, :]
+          mask = (
+              causal_mask
+              if mask is None
+              else jnp.logical_and(mask, causal_mask)
+          )
+        qk = jnp.where(mask, qk, DEFAULT_MASK_VALUE)
+      m = pl.load(m_ref, (pl.ds(start_q * block_q, block_q),))
+      p = jnp.exp(qk - m[:, None])
+      do = pl.load(do_scaled_ref, (pl.ds(start_q * block_q, block_q), slice(None)))
+      dv = dv + pl.dot(p.astype(do.dtype).T, do)
+      di = pl.load(delta_ref, (pl.ds(start_q * block_q, block_q),))
+      dp = jnp.zeros((block_q, block_k), dtype=jnp.float32) - di[:, None]
+      dp = dp + pl.dot(do, v.T)
+      ds = p * dp
+      if sm_scale != 1.0:
+        ds = ds * sm_scale
+      dk = dk + pl.dot(ds.astype(q_ref.dtype).T, q)
+      dq = pl.load(dq_ref, (pl.ds(start_q * block_q, block_q),
+                            slice(None)), eviction_policy="evict_last")
+      dq = dq + pl.dot(ds.astype(k.dtype), k).astype(dq.dtype)
+      pl.store(dq_ref, (pl.ds(start_q * block_q, block_q),
+                        slice(None)), dq, eviction_policy="evict_last")
+      return dv, dk
+    if causal:
+      lower_bound = lax.div(start_k * block_k, block_q)
+    else:
+      lower_bound = 0
+    dv, dk = lax.fori_loop(lower_bound, pl.cdiv(seq_len, block_q), inner_loop,
+                           (dv, dk))
+    pl.store(dv_ref, (pl.ds(start_k * block_k, block_k),
+                      slice(None)), dv.astype(dv_ref.dtype))
+    pl.store(dk_ref, (pl.ds(start_k * block_k, block_k),
+                      slice(None)), dk.astype(dk_ref.dtype))
+  lax.fori_loop(0, pl.cdiv(seq_len, block_k), outer_loop, None)
+def _mha_backward(sm_scale: float, causal: bool, block_q: int, block_k: int,
+                  backward_pass_impl: str, num_warps: int | None,
+                  num_stages: int, grid: Any, interpret: bool,
+                  debug: bool, res, do):
+  del num_warps, num_stages, grid
+  q, k, v, segment_ids, out, l, m = res
+  if backward_pass_impl == "xla":
+    return jax.vjp(
+        functools.partial(mha_reference, sm_scale=sm_scale, causal=causal),
+        q,
+        k,
+        v,
+        segment_ids,
+    )[1](do)
+  elif backward_pass_impl == "triton":
+    batch_size, seq_len, num_heads, head_dim = q.shape
+    block_q = min(block_q, seq_len)
+    block_k = min(block_k, seq_len)
+    do_scaled, delta = _preprocess_backward(out, do, l, block_q, debug, interpret)
+    # We accumulate into dq so we need to initialize it to zeros.
+    dq = jnp.zeros(q.shape, jnp.float32)
+    out_shapes = [
+      jax.ShapeDtypeStruct(dq.shape, dq.dtype),
+      jax.ShapeDtypeStruct(k.shape, k.dtype),
+      jax.ShapeDtypeStruct(v.shape, v.dtype),
+    ]
+    in_specs = [
+        pl.BlockSpec(
+            lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+        ),
+        pl.BlockSpec(
+            lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+        ),
+        pl.BlockSpec(
+            lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+        ),
+        pl.BlockSpec(
+            lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+        ),
+        pl.BlockSpec(
+            lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+        ),
+        pl.BlockSpec(lambda j, k: (j, k, 0), (None, None, seq_len)),
+        pl.BlockSpec(lambda j, k: (j, k, 0), (None, None, seq_len)),
+        pl.BlockSpec(lambda j, k: (j, k, 0), (None, None, seq_len)),
+        pl.BlockSpec(
+            lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+        ),
+    ]
+    if segment_ids is None:
+      in_specs.insert(3, None)  # type: ignore[arg-type]
+      input_output_aliases = {8: 0}
+    else:
+      in_specs.insert(3, pl.BlockSpec(lambda j, k: (j, 0), (None, seq_len)))
+      input_output_aliases = {9: 0}
+    grid = (batch_size, num_heads)
+    # TODO(sharadmv): figure out why num_warps=8 doesn't work!
+    num_warps = 8
+    dq, dk, dv = pl.pallas_call(
+        functools.partial(
+            mha_backward_kernel,
+            block_q=block_q,
+            block_d=head_dim,
+            block_k=block_k,
+            sm_scale=sm_scale,
+            causal=causal,
+        ),
+        grid=grid,
+        out_shape=out_shapes,
+        in_specs=in_specs,
+        out_specs=[
+            pl.BlockSpec(
+                lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+            ),
+            pl.BlockSpec(
+                lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+            ),
+            pl.BlockSpec(
+                lambda j, k: (j, 0, k, 0), (None, seq_len, None, head_dim)
+            ),
+        ],
+        name="mha_backward",
+        debug=debug,
+        interpret=interpret,
+        compiler_params=dict(triton=dict(num_warps=num_warps, num_stages=1)),
+        input_output_aliases=input_output_aliases,
+    )(q, k, v, segment_ids, out, do_scaled, l, m, delta, dq)
+  else:
+    raise ValueError(f"Invalid backward pass implementation: {backward_pass_impl}")
+  return dq.astype(q.dtype), dk, dv, None
+mha.defvjp(_mha_forward, _mha_backward)
+@functools.partial(jax.jit, static_argnames=['sm_scale', 'causal'])
+def mha_reference(
+    q,
+    k,
+    v,
+    segment_ids: jnp.ndarray | None,
+    sm_scale=1.0,
+    causal: bool = False,
+):
+  q_seq_len = q.shape[1]
+  kv_seq_len = k.shape[1]
+  logits = jnp.einsum('bqhc,bkhc->bhqk', q, k).astype(jnp.float32)
+  mask = None
+  if segment_ids is not None:
+    mask = jnp.expand_dims(segment_mask(segment_ids, segment_ids), 1)
+    mask = jnp.broadcast_to(mask, logits.shape)
+  if causal:
+    causal_mask = jnp.tril(jnp.ones((1, 1, q_seq_len, kv_seq_len), dtype=bool))
+    causal_mask = jnp.broadcast_to(causal_mask, logits.shape)
+    mask = causal_mask if mask is None else jnp.logical_and(mask, causal_mask)
+  logits = logits if mask is None else jnp.where(mask, logits, float("-inf"))
+  weights = jax.nn.softmax(logits * sm_scale).astype(q.dtype)
+  return jnp.einsum('bhqk,bkhc->bqhc', weights, v)

external/alphageometry/.venv-ag/Lib/site-packages/jax/experimental/pallas/tpu.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# Copyright 2023 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Mosaic-specific Pallas APIs."""
+from jax._src.pallas.mosaic import core
+from jax._src.pallas.mosaic.core import dma_semaphore
+from jax._src.pallas.mosaic.core import PrefetchScalarGridSpec
+from jax._src.pallas.mosaic.core import semaphore
+from jax._src.pallas.mosaic.core import SemaphoreType
+from jax._src.pallas.mosaic.core import TPUMemorySpace
+from jax._src.pallas.mosaic.kernel_regeneration_util import encode_kernel_regeneration_metadata
+from jax._src.pallas.mosaic.kernel_regeneration_util import extract_kernel_regeneration_metadata
+from jax._src.pallas.mosaic.lowering import LoweringException
+from jax._src.pallas.mosaic.pipeline import BufferedRef
+from jax._src.pallas.mosaic.pipeline import emit_pipeline
+from jax._src.pallas.mosaic.pipeline import emit_pipeline_with_allocations
+from jax._src.pallas.mosaic.pipeline import get_pipeline_schedule
+from jax._src.pallas.mosaic.pipeline import make_pipeline_allocations
+from jax._src.pallas.mosaic.primitives import async_copy
+from jax._src.pallas.mosaic.primitives import async_remote_copy
+from jax._src.pallas.mosaic.primitives import bitcast
+from jax._src.pallas.mosaic.primitives import delay
+from jax._src.pallas.mosaic.primitives import device_id
+from jax._src.pallas.mosaic.primitives import DeviceIdType
+from jax._src.pallas.mosaic.primitives import get_barrier_semaphore
+from jax._src.pallas.mosaic.primitives import make_async_copy
+from jax._src.pallas.mosaic.primitives import make_async_remote_copy
+from jax._src.pallas.mosaic.primitives import repeat
+from jax._src.pallas.mosaic.primitives import roll
+from jax._src.pallas.mosaic.primitives import run_scoped
+from jax._src.pallas.mosaic.primitives import semaphore_read
+from jax._src.pallas.mosaic.primitives import semaphore_signal
+from jax._src.pallas.mosaic.primitives import semaphore_wait
+from jax._src.pallas.mosaic.primitives import prng_seed
+from jax._src.pallas.mosaic.primitives import prng_random_bits
+from jax._src.tpu_custom_call import CostEstimate
+ANY = TPUMemorySpace.ANY
+CMEM = TPUMemorySpace.CMEM
+SMEM = TPUMemorySpace.SMEM
+VMEM = TPUMemorySpace.VMEM

external/alphageometry/README.md ADDED Viewed

	@@ -0,0 +1,447 @@

+# Solving Olympiad Geometry without Human Demonstrations
+This repository contains the code necessary to
+reproduce DDAR and AlphaGeometry,
+the two geometry theorem provers
+introduced in the [Nature 2024](https://www.nature.com/articles/s41586-023-06747-5) paper:
+*<center>"Solving Olympiad Geometry without Human Demonstrations".</center>*
+</br>
+<center>
+<img alt="fig1" width="800px" src="fig1.svg">
+</center>
+## Dependencies
+For the instructions presented below,
+we use Python 3.10.9, and dependencies with their exact
+version numbers listed in `requirements.txt`.
+Our code depends on `meliad`, which is
+not a registered package with `pip`. See instructions below
+for how to manually install `meliad`.
+Note that one can still run the DDAR solver
+without the `meliad` and `sentencepiece` dependencies.
+## Run the instructions
+All instructions in this `README.md` can be run in one go by:
+```
+bash run.sh
+```
+Below, we explain these instructions step-by-step.
+## Install dependencies, download weights and vocabulary.
+Installation is done in a virtual environment:
+```
+virtualenv -p python3 .
+source ./bin/activate
+pip install --require-hashes -r requirements.txt
+```
+Download weights and vocabulary:
+```
+bash download.sh
+DATA=ag_ckpt_vocab
+```
+Finally, install `meliad` separately as it is not
+registered with `pip`:
+```
+MELIAD_PATH=meliad_lib/meliad
+mkdir -p $MELIAD_PATH
+git clone https://github.com/google-research/meliad $MELIAD_PATH
+export PYTHONPATH=$PYTHONPATH:$MELIAD_PATH
+```
+## Set up common flags
+Before running the python scripts,
+let us first prepare some commonly used flags.
+The symbolic engine needs definitions and deduction rules to operate.
+These definitions and rules are provided in two text files
+`defs.txt` and `rules.txt`.
+```shell
+DDAR_ARGS=(
+  --defs_file=$(pwd)/defs.txt \
+  --rules_file=$(pwd)/rules.txt \
+);
+```
+Next, we define the flags relevant to the proof search.
+To reproduce the simple examples below,
+we use lightweight values for the proof search parameters:
+```shell
+BATCH_SIZE=2
+BEAM_SIZE=2
+DEPTH=2
+SEARCH_ARGS=(
+  --beam_size=$BEAM_SIZE
+  --search_depth=$DEPTH
+)
+```
+NOTE: The results in our paper can be obtained by setting
+`BATCH_SIZE=32`, `BEAM_SIZE=512`, `DEPTH=16`
+as described in section Methods.
+To stay under IMO time limits, 4 V100-GPUs and 250 CPU workers
+are needed as shown in Extended Data - Figure 1.
+Note that we also strip away other memory/speed optimizations
+due to internal dependencies and to promote code clarity.
+Assume the downloaded checkpoint and vocabulary is placed in `DATA`,
+and the installed `meliad` source code is at `MELIAD_PATH`.
+We make use of the `gin` library to manage model configurations,
+following `meliad` conventions. We now define the flags relevant to the
+language model:
+```shell
+LM_ARGS=(
+  --ckpt_path=$DATA \
+  --vocab_path=$DATA/geometry.757.model
+  --gin_search_paths=$MELIAD_PATH/transformer/configs,$(pwd) \
+  --gin_file=base_htrans.gin \
+  --gin_file=size/medium_150M.gin \
+  --gin_file=options/positions_t5.gin \
+  --gin_file=options/lr_cosine_decay.gin \
+  --gin_file=options/seq_1024_nocache.gin \
+  --gin_file=geometry_150M_generate.gin \
+  --gin_param=DecoderOnlyLanguageModelGenerate.output_token_losses=True \
+  --gin_param=TransformerTaskConfig.batch_size=$BATCH_SIZE \
+  --gin_param=TransformerTaskConfig.sequence_length=128 \
+  --gin_param=Trainer.restore_state_variables=False
+);
+```
+TIP: Note that you can still run the DDAR solver
+without defining `SEARCH_ARGS` and `LM_ARGS`.
+In such case, simply disable the import of the `lm_inference` module
+inside `alphageometry.py`.
+## Run DDAR
+The script loads a problem by reading a list of problems
+from a text file and solves the specific problem in the list according
+to its name. We pass these two pieces of information through the flags
+`--problems_file` and `--problem_name`.
+We use `--mode=ddar` to indicate that we want to use the DDAR solver.
+Below we showed this solver solving IMO 2000 P1:
+```shell
+python -m alphageometry \
+--alsologtostderr \
+--problems_file=$(pwd)/imo_ag_30.txt \
+--problem_name=translated_imo_2000_p1 \
+--mode=ddar \
+"${DDAR_ARGS[@]}"
+```
+Expect the following output
+```shell
+graph.py:468] translated_imo_2000_p1
+graph.py:469] a b = segment a b; g1 = on_tline g1 a a b; g2 = on_tline g2 b b a; m = on_circle m g1 a, on_circle m g2 b; n = on_circle n g1 a, on_circle n g2 b; c = on_pline c m a b, on_circle c g1 a; d = on_pline d m a b, on_circle d g2 b; e = on_line e a c, on_line e b d; p = on_line p a n, on_line p c d; q = on_line q b n, on_line q c d ? cong e p e q
+ddar.py:41] Depth 1/1000 time = 1.7772269248962402
+ddar.py:41] Depth 2/1000 time = 5.63526177406311
+ddar.py:41] Depth 3/1000 time = 6.883412837982178
+ddar.py:41] Depth 4/1000 time = 10.275688409805298
+ddar.py:41] Depth 5/1000 time = 12.048273086547852
+alphageometry.py:190]
+==========================
+ * From theorem premises:
+A B G1 G2 M N C D E P Q : Points
+AG_1 ⟂ AB [00]
+BA ⟂ G_2B [01]
+G_2M = G_2B [02]
+G_1M = G_1A [03]
+...
+[log omitted]
+...
+036. ∠QEB = ∠(QP-EA) [46] & ∠(BE-QP) = ∠AEP [55] ⇒  ∠EQP = ∠QPE [56]
+037. ∠PQE = ∠EPQ [56] ⇒  EP = EQ
+==========================
+```
+The output first includes a list of relevant premises that it uses,
+and then proof steps that gradually build up the proof.
+All predicates are numbered to track how they are derived
+from the premises, and to show that the proof is fully justified.
+TIP: Additionally passing the flag `--out_file=path/to/output/text/file.txt`
+will write the proof to a text file.
+Running on all problems in `imo_ag_30.txt` will yield solutions to
+14 of them, as reported in Table 1 in our paper.
+## Run AlphaGeometry:
+As a simple example, we load `--problem_name=orthocenter`
+from `--problem_file=examples.txt`.
+This time, we pass `--mode=alphageometry` to use the AlphaGeometry solver
+and pass the `SEARCH_ARGS` and `LM_ARGS` flags.
+```shell
+python -m alphageometry \
+--alsologtostderr \
+--problems_file=$(pwd)/examples.txt \
+--problem_name=orthocenter \
+--mode=alphageometry \
+"${DDAR_ARGS[@]}" \
+"${SEARCH_ARGS[@]}" \
+"${LM_ARGS[@]}"
+```
+Expect the following output:
+```shell
+...
+[log omitted]
+...
+training_loop.py:725] Total parameters: 152072288
+training_loop.py:739] Total state size: 0
+training_loop.py:492] Training loop: creating task for mode beam_search
+graph.py:468] orthocenter
+graph.py:469] a b c = triangle a b c; d = on_tline d b a c, on_tline d c a b ? perp a d b c
+ddar.py:41] Depth 1/1000 time = 0.009987592697143555 branch = 4
+ddar.py:41] Depth 2/1000 time = 0.00672602653503418 branch = 0
+alphageometry.py:221] DD+AR failed to solve the problem.
+alphageometry.py:457] Depth 0. There are 1 nodes to expand:
+alphageometry.py:460] {S} a : ; b : ; c : ; d : T a b c d 00 T a c b d 01 ? T a d b c {F1} x00
+alphageometry.py:465] Decoding from {S} a : ; b : ; c : ; d : T a b c d 00 T a c b d 01 ? T a d b c {F1} x00
+...
+[log omitted]
+...
+alphageometry.py:470] LM output (score=-1.102287): "e : C a c e 02 C b d e 03 ;"
+alphageometry.py:471] Translation: "e = on_line e a c, on_line e b d"
+alphageometry.py:480] Solving: "a b c = triangle a b c; d = on_tline d b a c, on_tline d c a b; e = on_line e a c, on_line e b d ? perp a d b c"
+graph.py:468]
+graph.py:469] a b c = triangle a b c; d = on_tline d b a c, on_tline d c a b; e = on_line e a c, on_line e b d ? perp a d b c
+ddar.py:41] Depth 1/1000 time = 0.021120786666870117
+ddar.py:41] Depth 2/1000 time = 0.033370018005371094
+ddar.py:41] Depth 3/1000 time = 0.04297471046447754
+alphageometry.py:140]
+==========================
+ * From theorem premises:
+A B C D : Points
+BD ⟂ AC [00]
+CD ⟂ AB [01]
+ * Auxiliary Constructions:
+E : Points
+E,B,D are collinear [02]
+E,C,A are collinear [03]
+ * Proof steps:
+001. E,B,D are collinear [02] & E,C,A are collinear [03] & BD ⟂ AC [00] ⇒  ∠BEA = ∠CED [04]
+002. E,B,D are collinear [02] & E,C,A are collinear [03] & BD ⟂ AC [00] ⇒  ∠BEC = ∠AED [05]
+003. A,E,C are collinear [03] & E,B,D are collinear [02] & AC ⟂ BD [00] ⇒  EC ⟂ EB [06]
+004. EC ⟂ EB [06] & CD ⟂ AB [01] ⇒  ∠(EC-BA) = ∠(EB-CD) [07]
+005. E,C,A are collinear [03] & E,B,D are collinear [02] & ∠(EC-BA) = ∠(EB-CD) [07] ⇒  ∠BAE = ∠CDE [08]
+006. ∠BEA = ∠CED [04] & ∠BAE = ∠CDE [08] (Similar Triangles)⇒  EB:EC = EA:ED [09]
+007. EB:EC = EA:ED [09] & ∠BEC = ∠AED [05] (Similar Triangles)⇒  ∠BCE = ∠ADE [10]
+008. EB:EC = EA:ED [09] & ∠BEC = ∠AED [05] (Similar Triangles)⇒  ∠EBC = ∠EAD [11]
+009. ∠BCE = ∠ADE [10] & E,C,A are collinear [03] & E,B,D are collinear [02] & ∠EBC = ∠EAD [11] ⇒  AD ⟂ BC
+==========================
+alphageometry.py:505] Solved.
+```
+NOTE: Point `H` is automatically renamed to `D`,
+as the LM is trained on synthetic problems
+where the points are named alphabetically, and so it expects
+the same during test time.
+NOTE: In this implementation of AlphaGeometry,
+we removed all optimizations that are dependent on
+internal infrastructure, e.g.,
+parallelized model inference on multi GPUs,
+parallelized DDAR on multiple CPUs,
+parallel execution of LM and DDAR,
+shared pool of CPU workers across different problems, etc.
+We also removed some memory/speed optimizations and code
+abstractions in favor of code clarity.
+As can be seen in the output, initially DDAR failed to solve the problem.
+The LM proposes two auxiliary constructions (because `BATCH_SIZE=2`):
+* `e = eqdistance e c a b, eqdistance e b a c`, i.e.,
+construct `E` as the intersection of circle (center=C, radius=AB) and
+circle (center=B, radius=AC). This construction has a score of `-1.186`.
+* `e = on_line e a c, on_line e b d`, i.e.,
+`E` is the intersection of `AC` and `BD`.
+This construction has a higher score (`-1.102287`) than the previous.
+Since the second construction has a higher score, DDAR attempted the second
+construction first and found the solution right away.
+The proof search therefore terminates and there is no second iteration.
+## Results
+Before attempting to reproduce the AlphaGeometry numbers in our paper,
+please make sure to pass all tests in the prepared test suite:
+```
+bash run_tests.sh
+```
+NOTE: [Issues#14](https://github.com/google-deepmind/alphageometry/issues/14) reports that although the top beam decodes are still the same, the LM is not giving the same score for different users.
+Then, pass the corresponding values for `--problem_file` (column)
+and `--mode` (row), and
+iterate on all problems to obtain the following results:
+<center>
+<b>Number of solved problems:</b>
+|          | `imo_ag_30.txt`  | `jgex_ag_231.txt` |
+|----------|------------------|-------------------|
+| `ddar`   | 14               | 198               |
+| `alphageometry`     | 25               | 228               |
+</center>
+## Source code description
+Files in this repository include python modules/scripts to run the solvers and
+resource files necessary for the script to execute. We listed below
+each of them and their description.
+| File name              | Description                                                                        |
+|------------------------|------------------------------------------------------------------------------------|
+| `geometry.py`          | Implements nodes (Point, Line, Circle, etc) in the proof state graph.              |
+| `numericals.py`        | Implements the numerical engine in the dynamic geometry environment.               |
+| `graph_utils.py`       | Implements utilities for the proof state graph.                                    |
+| `graph.py`             | Implements the proof state graph.                                                  |
+| `problem.py`           | Implements the classes that represent the problem premises, conclusion, DAG nodes. |
+| `dd.py`                | Implements DD and its traceback.                                                   |
+| `ar.py`                | Implements AR and its traceback.                                                   |
+| `trace_back.py`        | Implements the recursive traceback and dependency difference algorithm.            |
+| `ddar.py`              | Implements the combination DD+AR.                                                  |
+| `beam_search.py`       | Implements beam decoding of a language model in JAX.                               |
+| `models.py`            | Implements the transformer model.                                                  |
+| `transformer_layer.py` | Implements the transformer layer.                                                  |
+| `decoder_stack.py`     | Implements the transformer decoder stack.                                          |
+| `lm_inference.py`      | Implements an interface to a trained LM to perform decoding.                       |
+| `alphageometry.py`                | Main script that loads problems, calls DD+AR or AlphaGeometry solver, and prints solutions.   |
+| `pretty.py`            | Pretty formating the solutions output by solvers.                                  |
+| `*_test.py`            | Tests for the corresponding module.                                                |
+| `download.sh`          | Script to download model checkpoints and LM                                        |
+| `run.sh`               | Script to execute instructions in README.                                          |
+| `run_tests.sh`         | Script to execute the test suite.                                                  |
+Resource files:
+| Resource file name     | Description                                                                        |
+|------------------------|------------------------------------------------------------------------------------|
+| `defs.txt`             | Definitions of different geometric construction actions.                           |
+| `rules.txt`            | Deduction rules for DD.                                                            |
+| `geometry_150M_generate.gin`| Gin config of the LM implemented in meliad.                                   |
+| `imo_ag_30.txt`        | Problems in IMO-AG-30.                                                             |
+| `jgex_ag_231.txt`      | Problems in JGEX-AG-231.                                                           |
+## Citing this work
+```bibtex
+@Article{AlphaGeometryTrinh2024,
+  author  = {Trinh, Trieu and Wu, Yuhuai and Le, Quoc and He, He and Luong, Thang},
+  journal = {Nature},
+  title   = {Solving Olympiad Geometry without Human Demonstrations},
+  year    = {2024},
+  doi     = {10.1038/s41586-023-06747-5}
+}
+```
+## Acknowledgements
+This research is a collaboration between the Google Brain team
+(now Google Deepmind) and
+the Computer Science Department of New York University.
+We thank Rif A. Saurous, Denny Zhou, Christian Szegedy, Delesley Hutchins,
+Thomas Kipf, Hieu Pham, Petar Veličković, Debidatta Dwibedi,
+Kyunghyun Cho, Lerrel Pinto, Alfredo Canziani,
+Thomas Wies, He He’s research group,
+Evan Chen (the USA’s IMO team coach),
+Mirek Olsak, Patrik Bak,
+and all three Nature's referees for their help and support.
+The code of AlphaGeometry communicates with and/or references the following
+separate libraries and packages:
+*   [Abseil](https://github.com/abseil/abseil-py)
+*   [JAX](https://github.com/google/jax/)
+*   [matplotlib](https://matplotlib.org/)
+*   [NumPy](https://numpy.org)
+*   [SciPy](https://scipy.org)
+*   [TensorFlow](https://github.com/tensorflow/tensorflow)
+*   [Meliad](https://github.com/google-research/meliad)
+*   [Flax](https://github.com/google/flax)
+*   [Gin](https://github.com/google/gin-config)
+*   [T5](https://github.com/google-research/text-to-text-transfer-transformer)
+*   [SentencePiece](https://github.com/google/sentencepiece)
+We thank all their contributors and maintainers!
+## Disclaimer
+This is not an officially supported Google product.
+This research code is provided "as-is" to the broader research community.
+Google does not promise to maintain or otherwise support this code in any way.
+## Code License
+Copyright 2023 DeepMind Technologies Limited
+All software is licensed under the Apache License, Version 2.0 (Apache 2.0);
+you may not use this file except in compliance with the Apache 2.0 license.
+You may obtain a copy of the Apache 2.0 license at:
+https://www.apache.org/licenses/LICENSE-2.0
+All other materials are licensed under the Creative Commons Attribution 4.0
+International License (CC-BY). You may obtain a copy of the CC-BY license at:
+https://creativecommons.org/licenses/by/4.0/legalcode
+Unless required by applicable law or agreed to in writing, all software and
+materials distributed here under the Apache 2.0 or CC-BY licenses are
+distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied. See the licenses for the specific language governing
+permissions and limitations under those licenses.
+## Model Parameters License
+The AlphaGeometry checkpoints and vocabulary are made available
+under the terms of the Creative Commons Attribution 4.0
+International (CC BY 4.0) license.
+You can find details at:
+https://creativecommons.org/licenses/by/4.0/legalcode

external/alphageometry/lm_inference_test.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit tests for lm_inference.py."""
+import os
+import unittest
+from absl import flags
+from absl.testing import absltest
+import lm_inference as lm
+_DATA_PATH = flags.DEFINE_string('data_path', '', 'path to ckpt and vocab.')
+_MELIAD_PATH = flags.DEFINE_string(
+    'meliad_path', '', 'path to meliad repository.'
+)  # pylint: disable=line-too-long
+class LmInferenceTest(unittest.TestCase):
+  @classmethod
+  def setUpClass(cls):
+    super().setUpClass()
+    gin_file = [
+        'base_htrans.gin',
+        'size/medium_150M.gin',
+        'options/positions_t5.gin',
+        'options/lr_cosine_decay.gin',
+        'options/seq_1024_nocache.gin',
+        'geometry_150M_generate.gin',
+    ]
+    gin_param = [
+        'DecoderOnlyLanguageModelGenerate.output_token_losses=True',
+        'TransformerTaskConfig.batch_size=2',
+        'TransformerTaskConfig.sequence_length=128',
+        'Trainer.restore_state_variables=False',
+    ]
+    gin_search_paths = [
+        os.path.join(_MELIAD_PATH.value, 'transformer/configs'),
+        os.getcwd(),
+    ]
+    vocab_path = os.path.join(_DATA_PATH.value, 'geometry.757.model')
+    lm.parse_gin_configuration(gin_file, gin_param, gin_paths=gin_search_paths)
+    cls.loaded_lm = lm.LanguageModelInference(
+        vocab_path, _DATA_PATH.value, mode='beam_search'
+    )
+  def test_lm_decode(self):
+    outputs = LmInferenceTest.loaded_lm.beam_decode(
+        '{S} a : ; b : ; c : ; d : T a b c d 00 T a c b d 01 ? T a d b c'
+        ' {F1} x00',
+        eos_tokens=[';'],
+    )
+    self.assertEqual(
+        outputs['seqs_str'],
+        ['e : D a b c e 02 D a c b e 03 ;', 'e : C a c e 02 C b d e 03 ;'],
+    )
+  def test_lm_score_may_fail_numerically_for_external_meliad(self):
+    outputs = LmInferenceTest.loaded_lm.beam_decode(
+        '{S} a : ; b : ; c : ; d : T a b c d 00 T a c b d 01 ? T a d b c'
+        ' {F1} x00',
+        eos_tokens=[';'],
+    )
+    self.assertEqual(
+        outputs['scores'],
+        [-1.18607294559478759765625, -1.10228693485260009765625],
+    )
+if __name__ == '__main__':
+  absltest.main()

external/alphageometry/models.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Transformer language model generate mode."""
+from typing import Any, Tuple
+import beam_search
+import decoder_stack
+import gin
+import jax
+import jax.numpy as jnp
+from transformer import models
+@gin.configurable
+class DecoderOnlyLanguageModelGenerate(models.DecoderOnlyLanguageModel):
+  """Decoder only language modeling in inference mode."""
+  decoder_factory = decoder_stack.DecoderStackGenerate
+  num_heads: int = gin.REQUIRED
+  head_size: int = gin.REQUIRED
+  def get_fake_input(self) -> dict[str, Any]:
+    fake_input_dict = super().get_fake_input()
+    b = self.task_config.batch_size
+    n = self.num_heads
+    h = self.head_size
+    fake_input_dict.update({
+        'dstate': tuple(
+            [{
+                'current_index': jnp.array([0] * b, dtype=jnp.int32),
+                'keys': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16),
+                'values': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16),
+                'recurrent_kvq': None,
+                'relative_position_bias': jnp.zeros(
+                    (b, n, 1, 1024), dtype=jnp.bfloat16
+                ),
+            }]
+            * 12
+        ),
+        'eos': jnp.zeros([1024], dtype=jnp.bfloat16),
+        'mask': jnp.ones([1024], dtype=jnp.bfloat16),
+        'length': 1,
+        'temperature': 1.0,
+    })
+    return fake_input_dict
+  def __call__(self, inputs: ...) -> tuple[Any, dict[str, Any]]:
+    # Make sure this code is not used on untested cases.
+    if self.mode not in ['init', 'beam_search']:
+      raise ValueError(f'{type(self)} cannot do mode {self.mode}')
+    if self.decoder.supports_generate():
+      raise ValueError(f'{type(self)}.decoder cannot supports_generate()')
+    self.decoder(
+        input_tokens=inputs['targets'][:, 0:1],
+        target_tokens=None,
+        start_of_sequence=inputs['start_of_sequence'],
+    )
+    b = inputs['targets'].shape[0]
+    no_start_of_seq = jnp.array([False] * b, dtype=jnp.bool_)
+    # This fn is used in both beam_search or topk_sampling.
+    def tokens_to_logits_fn(
+        input_token: jnp.ndarray, dstate: tuple[dict[str, jnp.ndarray], ...]
+    ) -> tuple[jnp.ndarray, tuple[dict[str, jnp.ndarray], ...]]:
+      (logits, dstate, _) = self.decoder(
+          input_tokens=input_token,
+          target_tokens=None,
+          start_of_sequence=no_start_of_seq,
+          decoder_state=dstate,
+      )
+      return logits[:, -1, :], dstate
+    last_token = jax.lax.dynamic_slice_in_dim(
+        inputs['targets'], inputs['length'] - 1, 1, axis=1
+    )
+    # last token is used to seed beam_search
+    inputs['targets'] = inputs['targets'][:, 0:-1]
+    dstate = jax.lax.cond(
+        inputs['start_of_sequence'][0],
+        lambda: self.generate(inputs)[0],
+        lambda: inputs['dstate'],
+    )
+    # Then we run beam search, init with last_token & dstate.
+    finished_seqs, finished_scores, dstate = beam_search.beam_search_flat(
+        last_token,
+        dstate,
+        tokens_to_logits_fn,
+        max_decode_len=512,
+        eos=inputs['eos'].reshape((1, 1, -1)),
+        mask=inputs['mask'].reshape((1, 1, -1)),
+    )
+    return 0.0, {
+        'finished_seqs': finished_seqs,
+        'finished_scores': finished_scores,
+        'dstate': dstate,
+    }
+  def generate(
+      self, inputs: ...
+  ) -> tuple[tuple[dict[str, jnp.ndarray, ...], ...], jnp.ndarray]:
+    """Generate an output sequence.
+    Args:
+      inputs: the same as argument to _call_.
+    Returns:
+      An array of generated tokens of shape (batch_size, sequence_length).
+    """
+    input_tokens = inputs['targets']  # [b,seq_len]
+    start_of_sequence = inputs['start_of_sequence']  # [b]
+    target_tokens = jnp.pad(input_tokens[:, 1:], [(0, 0), (0, 1)])
+    batch_size = target_tokens.shape[0]
+    # Assuming all sequences start at the same time.
+    start0 = inputs['start_of_sequence'][0]
+    dstate = jax.lax.cond(
+        start0,
+        lambda: self.decoder.init_decoder_state_vanilla(  # pylint: disable=g-long-lambda
+            1024, start_of_sequence
+        ),
+        lambda: inputs['dstate'],
+    )
+    first_token = input_tokens[:, 0:1]
+    no_start_of_seq = jnp.array([False] * batch_size, dtype=jnp.bool_)
+    temperature = 1
+    if 'temperature' in inputs:
+      temperature = inputs['temperature']
+    num_steps = inputs['length']
+    if self.mode == 'beam_search':
+      num_steps -= 1
+    def cond_fn(scan_state) -> jnp.bool_:
+      _, _, i, _ = scan_state
+      return i < num_steps
+    def loop_fn(scan_state: Any) -> Tuple[Any, Any, Any, Any]:
+      (dstate, input_token, i, _) = scan_state
+      (logits, dstate, _) = self.decoder(
+          input_tokens=input_token,
+          target_tokens=None,
+          start_of_sequence=no_start_of_seq,
+          decoder_state=dstate,
+      )
+      logits = logits / temperature
+      output_token = jax.lax.dynamic_slice_in_dim(target_tokens, i, 1, axis=1)
+      return (dstate, output_token, i + 1, logits)
+    # Scan over the sequence length.
+    dummy_logits = jnp.zeros((batch_size, 1, 1024))
+    initial_scan_state = (dstate, first_token, 0, dummy_logits)
+    dstate, _, _, logits = jax.lax.while_loop(
+        cond_fn, loop_fn, initial_scan_state
+    )
+    return dstate, logits

external/alphageometry/numericals.py ADDED Viewed

	@@ -0,0 +1,1921 @@

+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Numerical representation of geometry."""
+from __future__ import annotations
+import math
+from typing import Any, Optional, Union
+import geometry as gm
+import matplotlib
+from matplotlib import pyplot as plt
+import matplotlib.colors as mcolors
+import numpy as np
+from numpy.random import uniform as unif  # pylint: disable=g-importing-member
+matplotlib.use('TkAgg')
+ATOM = 1e-12
+# Some variables are there for better code reading.
+# pylint: disable=unused-assignment
+# pylint: disable=unused-argument
+# pylint: disable=unused-variable
+# Naming in geometry is a little different
+# we stick to geometry naming to better read the code.
+# pylint: disable=invalid-name
+class Point:
+  """Numerical point."""
+  def __init__(self, x, y):
+    self.x = x
+    self.y = y
+  def __lt__(self, other: Point) -> bool:
+    return (self.x, self.y) < (other.x, other.y)
+  def __gt__(self, other: Point) -> bool:
+    return (self.x, self.y) > (other.x, other.y)
+  def __add__(self, p: Point) -> Point:
+    return Point(self.x + p.x, self.y + p.y)
+  def __sub__(self, p: Point) -> Point:
+    return Point(self.x - p.x, self.y - p.y)
+  def __mul__(self, f: float) -> Point:
+    return Point(self.x * f, self.y * f)
+  def __rmul__(self, f: float) -> Point:
+    return self * f
+  def __truediv__(self, f: float) -> Point:
+    return Point(self.x / f, self.y / f)
+  def __floordiv__(self, f: float) -> Point:
+    div = self / f  # true div
+    return Point(int(div.x), int(div.y))
+  def __str__(self) -> str:
+    return 'P({},{})'.format(self.x, self.y)
+  def close(self, point: Point, tol: float = 1e-12) -> bool:
+    return abs(self.x - point.x) < tol and abs(self.y - point.y) < tol
+  def midpoint(self, p: Point) -> Point:
+    return Point(0.5 * (self.x + p.x), 0.5 * (self.y + p.y))
+  def distance(self, p: Union[Point, Line, Circle]) -> float:
+    if isinstance(p, Line):
+      return p.distance(self)
+    if isinstance(p, Circle):
+      return abs(p.radius - self.distance(p.center))
+    dx = self.x - p.x
+    dy = self.y - p.y
+    return np.sqrt(dx * dx + dy * dy)
+  def distance2(self, p: Point) -> float:
+    if isinstance(p, Line):
+      return p.distance(self)
+    dx = self.x - p.x
+    dy = self.y - p.y
+    return dx * dx + dy * dy
+  def rotatea(self, ang: float) -> Point:
+    sinb, cosb = np.sin(ang), np.cos(ang)
+    return self.rotate(sinb, cosb)
+  def rotate(self, sinb: float, cosb: float) -> Point:
+    x, y = self.x, self.y
+    return Point(x * cosb - y * sinb, x * sinb + y * cosb)
+  def flip(self) -> Point:
+    return Point(-self.x, self.y)
+  def perpendicular_line(self, line: Line) -> Line:
+    return line.perpendicular_line(self)
+  def foot(self, line: Line) -> Point:
+    if isinstance(line, Line):
+      l = line.perpendicular_line(self)
+      return line_line_intersection(l, line)
+    elif isinstance(line, Circle):
+      c, r = line.center, line.radius
+      return c + (self - c) * r / self.distance(c)
+    raise ValueError('Dropping foot to weird type {}'.format(type(line)))
+  def parallel_line(self, line: Line) -> Line:
+    return line.parallel_line(self)
+  def norm(self) -> float:
+    return np.sqrt(self.x**2 + self.y**2)
+  def cos(self, other: Point) -> float:
+    x, y = self.x, self.y
+    a, b = other.x, other.y
+    return (x * a + y * b) / self.norm() / other.norm()
+  def dot(self, other: Point) -> float:
+    return self.x * other.x + self.y * other.y
+  def sign(self, line: Line) -> int:
+    return line.sign(self)
+  def is_same(self, other: Point) -> bool:
+    return self.distance(other) <= ATOM
+class Line:
+  """Numerical line."""
+  def __init__(
+      self,
+      p1: Point = None,
+      p2: Point = None,
+      coefficients: tuple[int, int, int] = None,
+  ):
+    if p1 is None and p2 is None and coefficients is None:
+      self.coefficients = None, None, None
+      return
+    a, b, c = coefficients or (
+        p1.y - p2.y,
+        p2.x - p1.x,
+        p1.x * p2.y - p2.x * p1.y,
+    )
+    # Make sure a is always positive (or always negative for that matter)
+    # With a == 0, Assuming a = +epsilon > 0
+    # Then b such that ax + by = 0 with y>0 should be negative.
+    if a < 0.0 or a == 0.0 and b > 0.0:
+      a, b, c = -a, -b, -c
+    self.coefficients = a, b, c
+  def parallel_line(self, p: Point) -> Line:
+    a, b, _ = self.coefficients
+    return Line(coefficients=(a, b, -a * p.x - b * p.y))  # pylint: disable=invalid-unary-operand-type
+  def perpendicular_line(self, p: Point) -> Line:
+    a, b, _ = self.coefficients
+    return Line(p, p + Point(a, b))
+  def greater_than(self, other: Line) -> bool:
+    a, b, _ = self.coefficients
+    x, y, _ = other.coefficients
+    # b/a > y/x
+    return b * x > a * y
+  def __gt__(self, other: Line) -> bool:
+    return self.greater_than(other)
+  def __lt__(self, other: Line) -> bool:
+    return other.greater_than(self)
+  def same(self, other: Line) -> bool:
+    a, b, c = self.coefficients
+    x, y, z = other.coefficients
+    return close_enough(a * y, b * x) and close_enough(b * z, c * y)
+  def equal(self, other: Line) -> bool:
+    a, b, _ = self.coefficients
+    x, y, _ = other.coefficients
+    # b/a == y/x
+    return b * x == a * y
+  def less_than(self, other: Line) -> bool:
+    a, b, _ = self.coefficients
+    x, y, _ = other.coefficients
+    # b/a > y/x
+    return b * x < a * y
+  def intersect(self, obj: Union[Line, Circle]) -> tuple[Point, ...]:
+    if isinstance(obj, Line):
+      return line_line_intersection(self, obj)
+    if isinstance(obj, Circle):
+      return line_circle_intersection(self, obj)
+  def distance(self, p: Point) -> float:
+    a, b, c = self.coefficients
+    return abs(self(p.x, p.y)) / math.sqrt(a * a + b * b)
+  def __call__(self, x: Point, y: Point = None) -> float:
+    if isinstance(x, Point) and y is None:
+      return self(x.x, x.y)
+    a, b, c = self.coefficients
+    return x * a + y * b + c
+  def is_parallel(self, other: Line) -> bool:
+    a, b, _ = self.coefficients
+    x, y, _ = other.coefficients
+    return abs(a * y - b * x) < ATOM
+  def is_perp(self, other: Line) -> bool:
+    a, b, _ = self.coefficients
+    x, y, _ = other.coefficients
+    return abs(a * x + b * y) < ATOM
+  def cross(self, other: Line) -> float:
+    a, b, _ = self.coefficients
+    x, y, _ = other.coefficients
+    return a * y - b * x
+  def dot(self, other: Line) -> float:
+    a, b, _ = self.coefficients
+    x, y, _ = other.coefficients
+    return a * x + b * y
+  def point_at(self, x: float = None, y: float = None) -> Optional[Point]:
+    """Get a point on line closest to (x, y)."""
+    a, b, c = self.coefficients
+    # ax + by + c = 0
+    if x is None and y is not None:
+      if a != 0:
+        return Point((-c - b * y) / a, y)  # pylint: disable=invalid-unary-operand-type
+      else:
+        return None
+    elif x is not None and y is None:
+      if b != 0:
+        return Point(x, (-c - a * x) / b)  # pylint: disable=invalid-unary-operand-type
+      else:
+        return None
+    elif x is not None and y is not None:
+      if a * x + b * y + c == 0.0:
+        return Point(x, y)
+    return None
+  def diff_side(self, p1: Point, p2: Point) -> Optional[bool]:
+    d1 = self(p1.x, p1.y)
+    d2 = self(p2.x, p2.y)
+    if d1 == 0 or d2 == 0:
+      return None
+    return d1 * d2 < 0
+  def same_side(self, p1: Point, p2: Point) -> Optional[bool]:
+    d1 = self(p1.x, p1.y)
+    d2 = self(p2.x, p2.y)
+    if d1 == 0 or d2 == 0:
+      return None
+    return d1 * d2 > 0
+  def sign(self, point: Point) -> int:
+    s = self(point.x, point.y)
+    if s > 0:
+      return 1
+    elif s < 0:
+      return -1
+    return 0
+  def is_same(self, other: Line) -> bool:
+    a, b, c = self.coefficients
+    x, y, z = other.coefficients
+    return abs(a * y - b * x) <= ATOM and abs(b * z - c * y) <= ATOM
+  def sample_within(self, points: list[Point], n: int = 5) -> list[Point]:
+    """Sample a point within the boundary of points."""
+    center = sum(points, Point(0.0, 0.0)) * (1.0 / len(points))
+    radius = max([p.distance(center) for p in points])
+    if close_enough(center.distance(self), radius):
+      center = center.foot(self)
+    a, b = line_circle_intersection(self, Circle(center.foot(self), radius))
+    result = None
+    best = -1.0
+    for _ in range(n):
+      rand = unif(0.0, 1.0)
+      x = a + (b - a) * rand
+      mind = min([x.distance(p) for p in points])
+      if mind > best:
+        best = mind
+        result = x
+    return [result]
+class InvalidLineIntersectError(Exception):
+  pass
+class HalfLine(Line):
+  """Numerical ray."""
+  def __init__(self, tail: Point, head: Point):  # pylint: disable=super-init-not-called
+    self.line = Line(tail, head)
+    self.coefficients = self.line.coefficients
+    self.tail = tail
+    self.head = head
+  def intersect(self, obj: Union[Line, HalfLine, Circle, HoleCircle]) -> Point:
+    if isinstance(obj, (HalfLine, Line)):
+      return line_line_intersection(self.line, obj)
+    exclude = [self.tail]
+    if isinstance(obj, HoleCircle):
+      exclude += [obj.hole]
+    a, b = line_circle_intersection(self.line, obj)
+    if any([a.close(x) for x in exclude]):
+      return b
+    if any([b.close(x) for x in exclude]):
+      return a
+    v = self.head - self.tail
+    va = a - self.tail
+    vb = b - self.tail
+    if v.dot(va) > 0:
+      return a
+    if v.dot(vb) > 0:
+      return b
+    raise InvalidLineIntersectError()
+  def sample_within(self, points: list[Point], n: int = 5) -> list[Point]:
+    center = sum(points, Point(0.0, 0.0)) * (1.0 / len(points))
+    radius = max([p.distance(center) for p in points])
+    if close_enough(center.distance(self.line), radius):
+      center = center.foot(self)
+    a, b = line_circle_intersection(self, Circle(center.foot(self), radius))
+    if (a - self.tail).dot(self.head - self.tail) > 0:
+      a, b = self.tail, a
+    else:
+      a, b = self.tail, b  # pylint: disable=self-assigning-variable
+    result = None
+    best = -1.0
+    for _ in range(n):
+      x = a + (b - a) * unif(0.0, 1.0)
+      mind = min([x.distance(p) for p in points])
+      if mind > best:
+        best = mind
+        result = x
+    return [result]
+def _perpendicular_bisector(p1: Point, p2: Point) -> Line:
+  midpoint = (p1 + p2) * 0.5
+  return Line(midpoint, midpoint + Point(p2.y - p1.y, p1.x - p2.x))
+def same_sign(
+    a: Point, b: Point, c: Point, d: Point, e: Point, f: Point
+) -> bool:
+  a, b, c, d, e, f = map(lambda p: p.sym, [a, b, c, d, e, f])
+  ab, cb = a - b, c - b
+  de, fe = d - e, f - e
+  return (ab.x * cb.y - ab.y * cb.x) * (de.x * fe.y - de.y * fe.x) > 0
+class Circle:
+  """Numerical circle."""
+  def __init__(
+      self,
+      center: Optional[Point] = None,
+      radius: Optional[float] = None,
+      p1: Optional[Point] = None,
+      p2: Optional[Point] = None,
+      p3: Optional[Point] = None,
+  ):
+    if not center:
+      if not (p1 and p2 and p3):
+        self.center = self.radius = self.r2 = None
+        return
+        # raise ValueError('Circle without center need p1 p2 p3')
+      l12 = _perpendicular_bisector(p1, p2)
+      l23 = _perpendicular_bisector(p2, p3)
+      center = line_line_intersection(l12, l23)
+    self.center = center
+    self.a, self.b = center.x, center.y
+    if not radius:
+      if not (p1 or p2 or p3):
+        raise ValueError('Circle needs radius or p1 or p2 or p3')
+      p = p1 or p2 or p3
+      self.r2 = (self.a - p.x) ** 2 + (self.b - p.y) ** 2
+      self.radius = math.sqrt(self.r2)
+    else:
+      self.radius = radius
+      self.r2 = radius * radius
+  def intersect(self, obj: Union[Line, Circle]) -> tuple[Point, ...]:
+    if isinstance(obj, Line):
+      return obj.intersect(self)
+    if isinstance(obj, Circle):
+      return circle_circle_intersection(self, obj)
+  def sample_within(self, points: list[Point], n: int = 5) -> list[Point]:
+    """Sample a point within the boundary of points."""
+    result = None
+    best = -1.0
+    for _ in range(n):
+      ang = unif(0.0, 2.0) * np.pi
+      x = self.center + Point(np.cos(ang), np.sin(ang)) * self.radius
+      mind = min([x.distance(p) for p in points])
+      if mind > best:
+        best = mind
+        result = x
+    return [result]
+class HoleCircle(Circle):
+  """Numerical circle with a missing point."""
+  def __init__(self, center: Point, radius: float, hole: Point):
+    super().__init__(center, radius)
+    self.hole = hole
+  def intersect(self, obj: Union[Line, HalfLine, Circle, HoleCircle]) -> Point:
+    if isinstance(obj, Line):
+      a, b = line_circle_intersection(obj, self)
+      if a.close(self.hole):
+        return b
+      return a
+    if isinstance(obj, HalfLine):
+      return obj.intersect(self)
+    if isinstance(obj, Circle):
+      a, b = circle_circle_intersection(obj, self)
+      if a.close(self.hole):
+        return b
+      return a
+    if isinstance(obj, HoleCircle):
+      a, b = circle_circle_intersection(obj, self)
+      if a.close(self.hole) or a.close(obj.hole):
+        return b
+      return a
+def solve_quad(a: float, b: float, c: float) -> tuple[float, float]:
+  """Solve a x^2 + bx + c = 0."""
+  a = 2 * a
+  d = b * b - 2 * a * c
+  if d < 0:
+    return None  # the caller should expect this result.
+  y = math.sqrt(d)
+  return (-b - y) / a, (-b + y) / a
+def circle_circle_intersection(c1: Circle, c2: Circle) -> tuple[Point, Point]:
+  """Returns a pair of Points as intersections of c1 and c2."""
+  # circle 1: (x0, y0), radius r0
+  # circle 2: (x1, y1), radius r1
+  x0, y0, r0 = c1.a, c1.b, c1.radius
+  x1, y1, r1 = c2.a, c2.b, c2.radius
+  d = math.sqrt((x1 - x0) ** 2 + (y1 - y0) ** 2)
+  if d == 0:
+    raise InvalidQuadSolveError()
+  a = (r0**2 - r1**2 + d**2) / (2 * d)
+  h = r0**2 - a**2
+  if h < 0:
+    raise InvalidQuadSolveError()
+  h = np.sqrt(h)
+  x2 = x0 + a * (x1 - x0) / d
+  y2 = y0 + a * (y1 - y0) / d
+  x3 = x2 + h * (y1 - y0) / d
+  y3 = y2 - h * (x1 - x0) / d
+  x4 = x2 - h * (y1 - y0) / d
+  y4 = y2 + h * (x1 - x0) / d
+  return Point(x3, y3), Point(x4, y4)
+class InvalidQuadSolveError(Exception):
+  pass
+def line_circle_intersection(line: Line, circle: Circle) -> tuple[Point, Point]:
+  """Returns a pair of points as intersections of line and circle."""
+  a, b, c = line.coefficients
+  r = float(circle.radius)
+  center = circle.center
+  p, q = center.x, center.y
+  if b == 0:
+    x = -c / a
+    x_p = x - p
+    x_p2 = x_p * x_p
+    y = solve_quad(1, -2 * q, q * q + x_p2 - r * r)
+    if y is None:
+      raise InvalidQuadSolveError()
+    y1, y2 = y
+    return (Point(x, y1), Point(x, y2))
+  if a == 0:
+    y = -c / b
+    y_q = y - q
+    y_q2 = y_q * y_q
+    x = solve_quad(1, -2 * p, p * p + y_q2 - r * r)
+    if x is None:
+      raise InvalidQuadSolveError()
+    x1, x2 = x
+    return (Point(x1, y), Point(x2, y))
+  c_ap = c + a * p
+  a2 = a * a
+  y = solve_quad(
+      a2 + b * b, 2 * (b * c_ap - a2 * q), c_ap * c_ap + a2 * (q * q - r * r)
+  )
+  if y is None:
+    raise InvalidQuadSolveError()
+  y1, y2 = y
+  return Point(-(b * y1 + c) / a, y1), Point(-(b * y2 + c) / a, y2)
+def _check_between(a: Point, b: Point, c: Point) -> bool:
+  """Whether a is between b & c."""
+  return (a - b).dot(c - b) > 0 and (a - c).dot(b - c) > 0
+def circle_segment_intersect(
+    circle: Circle, p1: Point, p2: Point
+) -> list[Point]:
+  l = Line(p1, p2)
+  px, py = line_circle_intersection(l, circle)
+  result = []
+  if _check_between(px, p1, p2):
+    result.append(px)
+  if _check_between(py, p1, p2):
+    result.append(py)
+  return result
+def line_segment_intersection(l: Line, A: Point, B: Point) -> Point:  # pylint: disable=invalid-name
+  a, b, c = l.coefficients
+  x1, y1, x2, y2 = A.x, A.y, B.x, B.y
+  dx, dy = x2 - x1, y2 - y1
+  alpha = (-c - a * x1 - b * y1) / (a * dx + b * dy)
+  return Point(x1 + alpha * dx, y1 + alpha * dy)
+def line_line_intersection(l1: Line, l2: Line) -> Point:
+  a1, b1, c1 = l1.coefficients
+  a2, b2, c2 = l2.coefficients
+  # a1x + b1y + c1 = 0
+  # a2x + b2y + c2 = 0
+  d = a1 * b2 - a2 * b1
+  if d == 0:
+    raise InvalidLineIntersectError
+  return Point((c2 * b1 - c1 * b2) / d, (c1 * a2 - c2 * a1) / d)
+def check_too_close(
+    newpoints: list[Point], points: list[Point], tol: int = 0.1
+) -> bool:
+  if not points:
+    return False
+  avg = sum(points, Point(0.0, 0.0)) * 1.0 / len(points)
+  mindist = min([p.distance(avg) for p in points])
+  for p0 in newpoints:
+    for p1 in points:
+      if p0.distance(p1) < tol * mindist:
+        return True
+  return False
+def check_too_far(
+    newpoints: list[Point], points: list[Point], tol: int = 4
+) -> bool:
+  if len(points) < 2:
+    return False
+  avg = sum(points, Point(0.0, 0.0)) * 1.0 / len(points)
+  maxdist = max([p.distance(avg) for p in points])
+  for p in newpoints:
+    if p.distance(avg) > maxdist * tol:
+      return True
+  return False
+def check_aconst(args: list[Point]) -> bool:
+  a, b, c, d, num, den = args
+  d = d + a - c
+  ang = ang_between(a, b, d)
+  if ang < 0:
+    ang += np.pi
+  return close_enough(ang, num * np.pi / den)
+def check(name: str, args: list[Union[gm.Point, Point]]) -> bool:
+  """Numerical check."""
+  if name == 'eqangle6':
+    name = 'eqangle'
+  elif name == 'eqratio6':
+    name = 'eqratio'
+  elif name in ['simtri2', 'simtri*']:
+    name = 'simtri'
+  elif name in ['contri2', 'contri*']:
+    name = 'contri'
+  elif name == 'para':
+    name = 'para_or_coll'
+  elif name == 'on_line':
+    name = 'coll'
+  elif name in ['rcompute', 'acompute']:
+    return True
+  elif name in ['fixl', 'fixc', 'fixb', 'fixt', 'fixp']:
+    return True
+  fn_name = 'check_' + name
+  if fn_name not in globals():
+    return None
+  fun = globals()['check_' + name]
+  args = [p.num if isinstance(p, gm.Point) else p for p in args]
+  return fun(args)
+def check_circle(points: list[Point]) -> bool:
+  if len(points) != 4:
+    return False
+  o, a, b, c = points
+  oa, ob, oc = o.distance(a), o.distance(b), o.distance(c)
+  return close_enough(oa, ob) and close_enough(ob, oc)
+def check_coll(points: list[Point]) -> bool:
+  a, b = points[:2]
+  l = Line(a, b)
+  for p in points[2:]:
+    if abs(l(p.x, p.y)) > ATOM:
+      return False
+  return True
+def check_ncoll(points: list[Point]) -> bool:
+  return not check_coll(points)
+def check_sameside(points: list[Point]) -> bool:
+  b, a, c, y, x, z = points
+  # whether b is to the same side of a & c as y is to x & z
+  ba = b - a
+  bc = b - c
+  yx = y - x
+  yz = y - z
+  return ba.dot(bc) * yx.dot(yz) > 0
+def check_para_or_coll(points: list[Point]) -> bool:
+  return check_para(points) or check_coll(points)
+def check_para(points: list[Point]) -> bool:
+  a, b, c, d = points
+  ab = Line(a, b)
+  cd = Line(c, d)
+  if ab.same(cd):
+    return False
+  return ab.is_parallel(cd)
+def check_perp(points: list[Point]) -> bool:
+  a, b, c, d = points
+  ab = Line(a, b)
+  cd = Line(c, d)
+  return ab.is_perp(cd)
+def check_cyclic(points: list[Point]) -> bool:
+  points = list(set(points))
+  (a, b, c), *ps = points
+  circle = Circle(p1=a, p2=b, p3=c)
+  for d in ps:
+    if not close_enough(d.distance(circle.center), circle.radius):
+      return False
+  return True
+def bring_together(
+    a: Point, b: Point, c: Point, d: Point
+) -> tuple[Point, Point, Point, Point]:
+  ab = Line(a, b)
+  cd = Line(c, d)
+  x = line_line_intersection(ab, cd)
+  unit = Circle(center=x, radius=1.0)
+  y, _ = line_circle_intersection(ab, unit)
+  z, _ = line_circle_intersection(cd, unit)
+  return x, y, x, z
+def same_clock(
+    a: Point, b: Point, c: Point, d: Point, e: Point, f: Point
+) -> bool:
+  ba = b - a
+  cb = c - b
+  ed = e - d
+  fe = f - e
+  return (ba.x * cb.y - ba.y * cb.x) * (ed.x * fe.y - ed.y * fe.x) > 0
+def check_const_angle(points: list[Point]) -> bool:
+  """Check if the angle is equal to the given constant."""
+  a, b, c, d, m, n = points
+  a, b, c, d = bring_together(a, b, c, d)
+  ba = b - a
+  dc = d - c
+  a3 = np.arctan2(ba.y, ba.x)
+  a4 = np.arctan2(dc.y, dc.x)
+  y = a3 - a4
+  return close_enough(m / n % 1, y / np.pi % 1)
+def check_eqangle(points: list[Point]) -> bool:
+  """Check if 8 points make 2 equal angles."""
+  a, b, c, d, e, f, g, h = points
+  ab = Line(a, b)
+  cd = Line(c, d)
+  ef = Line(e, f)
+  gh = Line(g, h)
+  if ab.is_parallel(cd):
+    return ef.is_parallel(gh)
+  if ef.is_parallel(gh):
+    return ab.is_parallel(cd)
+  a, b, c, d = bring_together(a, b, c, d)
+  e, f, g, h = bring_together(e, f, g, h)
+  ba = b - a
+  dc = d - c
+  fe = f - e
+  hg = h - g
+  sameclock = (ba.x * dc.y - ba.y * dc.x) * (fe.x * hg.y - fe.y * hg.x) > 0
+  if not sameclock:
+    ba = ba * -1.0
+  a1 = np.arctan2(fe.y, fe.x)
+  a2 = np.arctan2(hg.y, hg.x)
+  x = a1 - a2
+  a3 = np.arctan2(ba.y, ba.x)
+  a4 = np.arctan2(dc.y, dc.x)
+  y = a3 - a4
+  xy = (x - y) % (2 * np.pi)
+  return close_enough(xy, 0, tol=1e-11) or close_enough(
+      xy, 2 * np.pi, tol=1e-11
+  )
+def check_eqratio(points: list[Point]) -> bool:
+  a, b, c, d, e, f, g, h = points
+  ab = a.distance(b)
+  cd = c.distance(d)
+  ef = e.distance(f)
+  gh = g.distance(h)
+  return close_enough(ab * gh, cd * ef)
+def check_cong(points: list[Point]) -> bool:
+  a, b, c, d = points
+  return close_enough(a.distance(b), c.distance(d))
+def check_midp(points: list[Point]) -> bool:
+  a, b, c = points
+  return check_coll(points) and close_enough(a.distance(b), a.distance(c))
+def check_simtri(points: list[Point]) -> bool:
+  """Check if 6 points make a pair of similar triangles."""
+  a, b, c, x, y, z = points
+  ab = a.distance(b)
+  bc = b.distance(c)
+  ca = c.distance(a)
+  xy = x.distance(y)
+  yz = y.distance(z)
+  zx = z.distance(x)
+  tol = 1e-9
+  return close_enough(ab * yz, bc * xy, tol) and close_enough(
+      bc * zx, ca * yz, tol
+  )
+def check_contri(points: list[Point]) -> bool:
+  a, b, c, x, y, z = points
+  ab = a.distance(b)
+  bc = b.distance(c)
+  ca = c.distance(a)
+  xy = x.distance(y)
+  yz = y.distance(z)
+  zx = z.distance(x)
+  tol = 1e-9
+  return (
+      close_enough(ab, xy, tol)
+      and close_enough(bc, yz, tol)
+      and close_enough(ca, zx, tol)
+  )
+def check_ratio(points: list[Point]) -> bool:
+  a, b, c, d, m, n = points
+  ab = a.distance(b)
+  cd = c.distance(d)
+  return close_enough(ab * n, cd * m)
+def draw_angle(
+    ax: matplotlib.axes.Axes,
+    head: Point,
+    p1: Point,
+    p2: Point,
+    color: Any = 'red',
+    alpha: float = 0.5,
+    frac: float = 1.0,
+) -> None:
+  """Draw an angle on plt ax."""
+  d1 = p1 - head
+  d2 = p2 - head
+  a1 = np.arctan2(float(d1.y), float(d1.x))
+  a2 = np.arctan2(float(d2.y), float(d2.x))
+  a1, a2 = a1 * 180 / np.pi, a2 * 180 / np.pi
+  a1, a2 = a1 % 360, a2 % 360
+  if a1 > a2:
+    a1, a2 = a2, a1
+  if a2 - a1 > 180:
+    a1, a2 = a2, a1
+  b1, b2 = a1, a2
+  if b1 > b2:
+    b2 += 360
+  d = b2 - b1
+  # if d >= 90:
+  #   return
+  scale = min(2.0, 90 / d)
+  scale = max(scale, 0.4)
+  fov = matplotlib.patches.Wedge(
+      (float(head.x), float(head.y)),
+      unif(0.075, 0.125) * scale * frac,
+      a1,
+      a2,
+      color=color,
+      alpha=alpha,
+  )
+  ax.add_artist(fov)
+def naming_position(
+    ax: matplotlib.axes.Axes, p: Point, lines: list[Line], circles: list[Circle]
+) -> tuple[float, float]:
+  """Figure out a good naming position on the drawing."""
+  _ = ax
+  r = 0.08
+  c = Circle(center=p, radius=r)
+  avoid = []
+  for p1, p2 in lines:
+    try:
+      avoid.extend(circle_segment_intersect(c, p1, p2))
+    except InvalidQuadSolveError:
+      continue
+  for x in circles:
+    try:
+      avoid.extend(circle_circle_intersection(c, x))
+    except InvalidQuadSolveError:
+      continue
+  if not avoid:
+    return [p.x + 0.01, p.y + 0.01]
+  angs = sorted([ang_of(p, a) for a in avoid])
+  angs += [angs[0] + 2 * np.pi]
+  angs = [(angs[i + 1] - a, a) for i, a in enumerate(angs[:-1])]
+  d, a = max(angs)
+  ang = a + d / 2
+  name_pos = p + Point(np.cos(ang), np.sin(ang)) * r
+  x, y = (name_pos.x - r / 1.5, name_pos.y - r / 1.5)
+  return x, y
+def draw_point(
+    ax: matplotlib.axes.Axes,
+    p: Point,
+    name: str,
+    lines: list[Line],
+    circles: list[Circle],
+    color: Any = 'white',
+    size: float = 15,
+) -> None:
+  """draw a point."""
+  ax.scatter(p.x, p.y, color=color, s=size)
+  if color == 'white':
+    color = 'lightgreen'
+  else:
+    color = 'grey'
+  name = name.upper()
+  if len(name) > 1:
+    name = name[0] + '_' + name[1:]
+  ax.annotate(
+      name, naming_position(ax, p, lines, circles), color=color, fontsize=15
+  )
+def _draw_line(
+    ax: matplotlib.axes.Axes,
+    p1: Point,
+    p2: Point,
+    color: Any = 'white',
+    lw: float = 1.2,
+    alpha: float = 0.8,
+) -> None:
+  """Draw a line in matplotlib."""
+  ls = '-'
+  if color == '--':
+    color = 'black'
+    ls = '--'
+  lx, ly = (p1.x, p2.x), (p1.y, p2.y)
+  ax.plot(lx, ly, color=color, lw=lw, alpha=alpha, ls=ls)
+def draw_line(
+    ax: matplotlib.axes.Axes, line: Line, color: Any = 'white'
+) -> tuple[Point, Point]:
+  """Draw a line."""
+  points = line.neighbors(gm.Point)
+  if len(points) <= 1:
+    return
+  points = [p.num for p in points]
+  p1, p2 = points[:2]
+  pmin, pmax = (p1, 0.0), (p2, (p2 - p1).dot(p2 - p1))
+  for p in points[2:]:
+    v = (p - p1).dot(p2 - p1)
+    if v < pmin[1]:
+      pmin = p, v
+    if v > pmax[1]:
+      pmax = p, v
+  p1, p2 = pmin[0], pmax[0]
+  _draw_line(ax, p1, p2, color=color)
+  return p1, p2
+def _draw_circle(
+    ax: matplotlib.axes.Axes, c: Circle, color: Any = 'cyan', lw: float = 1.2
+) -> None:
+  ls = '-'
+  if color == '--':
+    color = 'black'
+    ls = '--'
+  ax.add_patch(
+      plt.Circle(
+          (c.center.x, c.center.y),
+          c.radius,
+          color=color,
+          alpha=0.8,
+          fill=False,
+          lw=lw,
+          ls=ls,
+      )
+  )
+def draw_circle(
+    ax: matplotlib.axes.Axes, circle: Circle, color: Any = 'cyan'
+) -> Circle:
+  """Draw a circle."""
+  if circle.num is not None:
+    circle = circle.num
+  else:
+    points = circle.neighbors(gm.Point)
+    if len(points) <= 2:
+      return
+    points = [p.num for p in points]
+    p1, p2, p3 = points[:3]
+    circle = Circle(p1=p1, p2=p2, p3=p3)
+  _draw_circle(ax, circle, color)
+  return circle
+def mark_segment(
+    ax: matplotlib.axes.Axes, p1: Point, p2: Point, color: Any, alpha: float
+) -> None:
+  _ = alpha
+  x, y = (p1.x + p2.x) / 2, (p1.y + p2.y) / 2
+  ax.scatter(x, y, color=color, alpha=1.0, marker='o', s=50)
+def highlight_angle(
+    ax: matplotlib.axes.Axes,
+    a: Point,
+    b: Point,
+    c: Point,
+    d: Point,
+    color: Any,
+    alpha: float,
+) -> None:
+  """Highlight an angle between ab and cd with (color, alpha)."""
+  try:
+    a, b, c, d = bring_together(a, b, c, d)
+  except:  # pylint: disable=bare-except
+    return
+  draw_angle(ax, a, b, d, color=color, alpha=alpha, frac=1.0)
+def highlight(
+    ax: matplotlib.axes.Axes,
+    name: str,
+    args: list[gm.Point],
+    lcolor: Any,
+    color1: Any,
+    color2: Any,
+) -> None:
+  """Draw highlights."""
+  args = list(map(lambda x: x.num if isinstance(x, gm.Point) else x, args))
+  if name == 'cyclic':
+    a, b, c, d = args
+    _draw_circle(ax, Circle(p1=a, p2=b, p3=c), color=color1, lw=2.0)
+  if name == 'coll':
+    a, b, c = args
+    a, b = max(a, b, c), min(a, b, c)
+    _draw_line(ax, a, b, color=color1, lw=2.0)
+  if name == 'para':
+    a, b, c, d = args
+    _draw_line(ax, a, b, color=color1, lw=2.0)
+    _draw_line(ax, c, d, color=color2, lw=2.0)
+  if name == 'eqangle':
+    a, b, c, d, e, f, g, h = args
+    x = line_line_intersection(Line(a, b), Line(c, d))
+    if b.distance(x) > a.distance(x):
+      a, b = b, a
+    if d.distance(x) > c.distance(x):
+      c, d = d, c
+    a, b, d = x, a, c
+    y = line_line_intersection(Line(e, f), Line(g, h))
+    if f.distance(y) > e.distance(y):
+      e, f = f, e
+    if h.distance(y) > g.distance(y):
+      g, h = h, g
+    e, f, h = y, e, g
+    _draw_line(ax, a, b, color=lcolor, lw=2.0)
+    _draw_line(ax, a, d, color=lcolor, lw=2.0)
+    _draw_line(ax, e, f, color=lcolor, lw=2.0)
+    _draw_line(ax, e, h, color=lcolor, lw=2.0)
+    if color1 == '--':
+      color1 = 'red'
+    draw_angle(ax, a, b, d, color=color1, alpha=0.5)
+    if color2 == '--':
+      color2 = 'red'
+    draw_angle(ax, e, f, h, color=color2, alpha=0.5)
+  if name == 'perp':
+    a, b, c, d = args
+    _draw_line(ax, a, b, color=color1, lw=2.0)
+    _draw_line(ax, c, d, color=color1, lw=2.0)
+  if name == 'ratio':
+    a, b, c, d, m, n = args
+    _draw_line(ax, a, b, color=color1, lw=2.0)
+    _draw_line(ax, c, d, color=color2, lw=2.0)
+  if name == 'cong':
+    a, b, c, d = args
+    _draw_line(ax, a, b, color=color1, lw=2.0)
+    _draw_line(ax, c, d, color=color2, lw=2.0)
+  if name == 'midp':
+    m, a, b = args
+    _draw_line(ax, a, m, color=color1, lw=2.0, alpha=0.5)
+    _draw_line(ax, b, m, color=color2, lw=2.0, alpha=0.5)
+  if name == 'eqratio':
+    a, b, c, d, m, n, p, q = args
+    _draw_line(ax, a, b, color=color1, lw=2.0, alpha=0.5)
+    _draw_line(ax, c, d, color=color2, lw=2.0, alpha=0.5)
+    _draw_line(ax, m, n, color=color1, lw=2.0, alpha=0.5)
+    _draw_line(ax, p, q, color=color2, lw=2.0, alpha=0.5)
+HCOLORS = None
+def _draw(
+    ax: matplotlib.axes.Axes,
+    points: list[gm.Point],
+    lines: list[gm.Line],
+    circles: list[gm.Circle],
+    goal: Any,
+    equals: list[tuple[Any, Any]],
+    highlights: list[tuple[str, list[gm.Point]]],
+):
+  """Draw everything."""
+  colors = ['red', 'green', 'blue', 'orange', 'magenta', 'purple']
+  pcolor = 'black'
+  lcolor = 'black'
+  ccolor = 'grey'
+  if get_theme() == 'dark':
+    pcolor, lcolor, ccolor = 'white', 'white', 'cyan'
+  elif get_theme() == 'light':
+    pcolor, lcolor, ccolor = 'black', 'black', 'blue'
+  elif get_theme() == 'grey':
+    pcolor, lcolor, ccolor = 'black', 'black', 'grey'
+    colors = ['grey']
+  line_boundaries = []
+  for l in lines:
+    p1, p2 = draw_line(ax, l, color=lcolor)
+    line_boundaries.append((p1, p2))
+  circles = [draw_circle(ax, c, color=ccolor) for c in circles]
+  for p in points:
+    draw_point(ax, p.num, p.name, line_boundaries, circles, color=pcolor)
+  if equals:
+    for i, segs in enumerate(equals['segments']):
+      color = colors[i % len(colors)]
+      for a, b in segs:
+        mark_segment(ax, a, b, color, 0.5)
+    for i, angs in enumerate(equals['angles']):
+      color = colors[i % len(colors)]
+      for a, b, c, d in angs:
+        highlight_angle(ax, a, b, c, d, color, 0.5)
+  if highlights:
+    global HCOLORS
+    if HCOLORS is None:
+      HCOLORS = [k for k in mcolors.TABLEAU_COLORS.keys() if 'red' not in k]
+    for i, (name, args) in enumerate(highlights):
+      color_i = HCOLORS[i % len(HCOLORS)]
+      highlight(ax, name, args, 'black', color_i, color_i)
+  if goal:
+    name, args = goal
+    lcolor = color1 = color2 = 'red'
+    highlight(ax, name, args, lcolor, color1, color2)
+THEME = 'dark'
+def set_theme(theme) -> None:
+  global THEME
+  THEME = theme
+def get_theme() -> str:
+  return THEME
+def draw(
+    points: list[gm.Point],
+    lines: list[gm.Line],
+    circles: list[gm.Circle],
+    segments: list[gm.Segment],
+    goal: Any = None,
+    highlights: list[tuple[str, list[gm.Point]]] = None,
+    equals: list[tuple[Any, Any]] = None,
+    block: bool = True,
+    save_to: str = None,
+    theme: str = 'dark',
+) -> None:
+  """Draw everything on the same canvas."""
+  plt.close()
+  imsize = 512 / 100
+  fig, ax = plt.subplots(figsize=(imsize, imsize), dpi=100)
+  set_theme(theme)
+  if get_theme() == 'dark':
+    ax.set_facecolor((0.0, 0.0, 0.0))
+  else:
+    ax.set_facecolor((1.0, 1.0, 1.0))
+  _draw(ax, points, lines, circles, goal, equals, highlights)
+  plt.axis('equal')
+  fig.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
+  if points:
+    xmin = min([p.num.x for p in points])
+    xmax = max([p.num.x for p in points])
+    ymin = min([p.num.y for p in points])
+    ymax = max([p.num.y for p in points])
+    plt.margins((xmax - xmin) * 0.1, (ymax - ymin) * 0.1)
+  plt.show(block=block)
+def close_enough(a: float, b: float, tol: float = 1e-12) -> bool:
+  return abs(a - b) < tol
+def assert_close_enough(a: float, b: float, tol: float = 1e-12) -> None:
+  assert close_enough(a, b, tol), f'|{a}-{b}| = {abs(a-b)} >= {tol}'
+def ang_of(tail: Point, head: Point) -> float:
+  vector = head - tail
+  arctan = np.arctan2(vector.y, vector.x) % (2 * np.pi)
+  return arctan
+def ang_between(tail: Point, head1: Point, head2: Point) -> float:
+  ang1 = ang_of(tail, head1)
+  ang2 = ang_of(tail, head2)
+  diff = ang1 - ang2
+  # return diff % (2*np.pi)
+  if diff > np.pi:
+    return diff - 2 * np.pi
+  if diff < -np.pi:
+    return 2 * np.pi + diff
+  return diff
+def head_from(tail: Point, ang: float, length: float = 1) -> Point:
+  vector = Point(np.cos(ang) * length, np.sin(ang) * length)
+  return tail + vector
+def random_points(n: int = 3) -> list[Point]:
+  return [Point(unif(-1, 1), unif(-1, 1)) for _ in range(n)]
+def random_rfss(*points: list[Point]) -> list[Point]:
+  """Random rotate-flip-scale-shift a point cloud."""
+  # center point cloud.
+  average = sum(points, Point(0.0, 0.0)) * (1.0 / len(points))
+  points = [p - average for p in points]
+  # rotate
+  ang = unif(0.0, 2 * np.pi)
+  sin, cos = np.sin(ang), np.cos(ang)
+  # scale and shift
+  scale = unif(0.5, 2.0)
+  shift = Point(unif(-1, 1), unif(-1, 1))
+  points = [p.rotate(sin, cos) * scale + shift for p in points]
+  # randomly flip
+  if np.random.rand() < 0.5:
+    points = [p.flip() for p in points]
+  return points
+def reduce(
+    objs: list[Union[Point, Line, Circle, HalfLine, HoleCircle]],
+    existing_points: list[Point],
+) -> list[Point]:
+  """Reduce intersecting objects into one point of intersections."""
+  if all(isinstance(o, Point) for o in objs):
+    return objs
+  elif len(objs) == 1:
+    return objs[0].sample_within(existing_points)
+  elif len(objs) == 2:
+    a, b = objs
+    result = a.intersect(b)
+    if isinstance(result, Point):
+      return [result]
+    a, b = result
+    a_close = any([a.close(x) for x in existing_points])
+    if a_close:
+      return [b]
+    b_close = any([b.close(x) for x in existing_points])
+    if b_close:
+      return [a]
+    return [np.random.choice([a, b])]
+  else:
+    raise ValueError(f'Cannot reduce {objs}')
+def sketch(
+    name: str, args: list[Union[Point, gm.Point]]
+) -> list[Union[Point, Line, Circle, HalfLine, HoleCircle]]:
+  fun = globals()['sketch_' + name]
+  args = [p.num if isinstance(p, gm.Point) else p for p in args]
+  out = fun(args)
+  # out can be one or multiple {Point/Line/HalfLine}
+  if isinstance(out, (tuple, list)):
+    return list(out)
+  return [out]
+def sketch_on_opline(args: tuple[gm.Point, ...]) -> HalfLine:
+  a, b = args
+  return HalfLine(a, a + a - b)
+def sketch_on_hline(args: tuple[gm.Point, ...]) -> HalfLine:
+  a, b = args
+  return HalfLine(a, b)
+def sketch_ieq_triangle(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a = Point(0.0, 0.0)
+  b = Point(1.0, 0.0)
+  c, _ = Circle(a, p1=b).intersect(Circle(b, p1=a))
+  return a, b, c
+def sketch_incenter2(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a, b, c = args
+  l1 = sketch_bisect([b, a, c])
+  l2 = sketch_bisect([a, b, c])
+  i = line_line_intersection(l1, l2)
+  x = i.foot(Line(b, c))
+  y = i.foot(Line(c, a))
+  z = i.foot(Line(a, b))
+  return x, y, z, i
+def sketch_excenter2(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a, b, c = args
+  l1 = sketch_bisect([b, a, c])
+  l2 = sketch_exbisect([a, b, c])
+  i = line_line_intersection(l1, l2)
+  x = i.foot(Line(b, c))
+  y = i.foot(Line(c, a))
+  z = i.foot(Line(a, b))
+  return x, y, z, i
+def sketch_centroid(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a, b, c = args
+  x = (b + c) * 0.5
+  y = (c + a) * 0.5
+  z = (a + b) * 0.5
+  i = line_line_intersection(Line(a, x), Line(b, y))
+  return x, y, z, i
+def sketch_ninepoints(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a, b, c = args
+  x = (b + c) * 0.5
+  y = (c + a) * 0.5
+  z = (a + b) * 0.5
+  c = Circle(p1=x, p2=y, p3=z)
+  return x, y, z, c.center
+def sketch_2l1c(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  """Sketch a circle touching two lines and another circle."""
+  a, b, c, p = args
+  bc, ac = Line(b, c), Line(a, c)
+  circle = Circle(p, p1=a)
+  d, d_ = line_circle_intersection(p.perpendicular_line(bc), circle)
+  if bc.diff_side(d_, a):
+    d = d_
+  e, e_ = line_circle_intersection(p.perpendicular_line(ac), circle)
+  if ac.diff_side(e_, b):
+    e = e_
+  df = d.perpendicular_line(Line(p, d))
+  ef = e.perpendicular_line(Line(p, e))
+  f = line_line_intersection(df, ef)
+  g, g_ = line_circle_intersection(Line(c, f), circle)
+  if bc.same_side(g_, a):
+    g = g_
+  b_ = c + (b - c) / b.distance(c)
+  a_ = c + (a - c) / a.distance(c)
+  m = (a_ + b_) * 0.5
+  x = line_line_intersection(Line(c, m), Line(p, g))
+  return x.foot(ac), x.foot(bc), g, x
+def sketch_3peq(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a, b, c = args
+  ab, bc, ca = Line(a, b), Line(b, c), Line(c, a)
+  z = b + (c - b) * np.random.uniform(-0.5, 1.5)
+  z_ = z * 2 - c
+  l = z_.parallel_line(ca)
+  x = line_line_intersection(l, ab)
+  y = z * 2 - x
+  return x, y, z
+def try_to_sketch_intersect(
+    name1: str,
+    args1: list[Union[gm.Point, Point]],
+    name2: str,
+    args2: list[Union[gm.Point, Point]],
+    existing_points: list[Point],
+) -> Optional[Point]:
+  """Try to sketch an intersection between two objects."""
+  obj1 = sketch(name1, args1)[0]
+  obj2 = sketch(name2, args2)[0]
+  if isinstance(obj1, Line) and isinstance(obj2, Line):
+    fn = line_line_intersection
+  elif isinstance(obj1, Circle) and isinstance(obj2, Circle):
+    fn = circle_circle_intersection
+  else:
+    fn = line_circle_intersection
+    if isinstance(obj2, Line) and isinstance(obj1, Circle):
+      obj1, obj2 = obj2, obj1
+  try:
+    x = fn(obj1, obj2)
+  except:  # pylint: disable=bare-except
+    return None
+  if isinstance(x, Point):
+    return x
+  x1, x2 = x
+  close1 = check_too_close([x1], existing_points)
+  far1 = check_too_far([x1], existing_points)
+  if not close1 and not far1:
+    return x1
+  close2 = check_too_close([x2], existing_points)
+  far2 = check_too_far([x2], existing_points)
+  if not close2 and not far2:
+    return x2
+  return None
+def sketch_acircle(args: tuple[gm.Point, ...]) -> Circle:
+  a, b, c, d, f = args
+  de = sketch_aline([c, a, b, f, d])
+  fe = sketch_aline([a, c, b, d, f])
+  e = line_line_intersection(de, fe)
+  return Circle(p1=d, p2=e, p3=f)
+def sketch_aline(args: tuple[gm.Point, ...]) -> HalfLine:
+  """Sketch the construction aline."""
+  A, B, C, D, E = args
+  ab = A - B
+  cb = C - B
+  de = D - E
+  dab = A.distance(B)
+  ang_ab = np.arctan2(ab.y / dab, ab.x / dab)
+  dcb = C.distance(B)
+  ang_bc = np.arctan2(cb.y / dcb, cb.x / dcb)
+  dde = D.distance(E)
+  ang_de = np.arctan2(de.y / dde, de.x / dde)
+  ang_ex = ang_de + ang_bc - ang_ab
+  X = E + Point(np.cos(ang_ex), np.sin(ang_ex))
+  return HalfLine(E, X)
+def sketch_amirror(args: tuple[gm.Point, ...]) -> HalfLine:
+  """Sketch the angle mirror."""
+  A, B, C = args  # pylint: disable=invalid-name
+  ab = A - B
+  cb = C - B
+  dab = A.distance(B)
+  ang_ab = np.arctan2(ab.y / dab, ab.x / dab)
+  dcb = C.distance(B)
+  ang_bc = np.arctan2(cb.y / dcb, cb.x / dcb)
+  ang_bx = 2 * ang_bc - ang_ab
+  X = B + Point(np.cos(ang_bx), np.sin(ang_bx))  # pylint: disable=invalid-name
+  return HalfLine(B, X)
+def sketch_bisect(args: tuple[gm.Point, ...]) -> Line:
+  a, b, c = args
+  ab = a.distance(b)
+  bc = b.distance(c)
+  x = b + (c - b) * (ab / bc)
+  m = (a + x) * 0.5
+  return Line(b, m)
+def sketch_exbisect(args: tuple[gm.Point, ...]) -> Line:
+  a, b, c = args
+  return sketch_bisect(args).perpendicular_line(b)
+def sketch_bline(args: tuple[gm.Point, ...]) -> Line:
+  a, b = args
+  m = (a + b) * 0.5
+  return m.perpendicular_line(Line(a, b))
+def sketch_dia(args: tuple[gm.Point, ...]) -> Circle:
+  a, b = args
+  return Circle((a + b) * 0.5, p1=a)
+def sketch_tangent(args: tuple[gm.Point, ...]) -> tuple[Point, Point]:
+  a, o, b = args
+  dia = sketch_dia([a, o])
+  return circle_circle_intersection(Circle(o, p1=b), dia)
+def sketch_circle(args: tuple[gm.Point, ...]) -> Circle:
+  a, b, c = args
+  return Circle(center=a, radius=b.distance(c))
+def sketch_cc_tangent(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  """Sketch tangents to two circles."""
+  o, a, w, b = args
+  ra, rb = o.distance(a), w.distance(b)
+  ow = Line(o, w)
+  if close_enough(ra, rb):
+    oo = ow.perpendicular_line(o)
+    oa = Circle(o, ra)
+    x, z = line_circle_intersection(oo, oa)
+    y = x + w - o
+    t = z + w - o
+    return x, y, z, t
+  swap = rb > ra
+  if swap:
+    o, a, w, b = w, b, o, a
+    ra, rb = rb, ra
+  oa = Circle(o, ra)
+  q = o + (w - o) * ra / (ra - rb)
+  x, z = circle_circle_intersection(sketch_dia([o, q]), oa)
+  y = w.foot(Line(x, q))
+  t = w.foot(Line(z, q))
+  if swap:
+    x, y, z, t = y, x, t, z
+  return x, y, z, t
+def sketch_hcircle(args: tuple[gm.Point, ...]) -> HoleCircle:
+  a, b = args
+  return HoleCircle(center=a, radius=a.distance(b), hole=b)
+def sketch_e5128(args: tuple[gm.Point, ...]) -> tuple[Point, Point]:
+  a, b, c, d = args
+  ad = Line(a, d)
+  g = (a + b) * 0.5
+  de = Line(d, g)
+  e, f = line_circle_intersection(de, Circle(c, p1=b))
+  if e.distance(d) < f.distance(d):
+    e = f
+  return e, g
+def sketch_eq_quadrangle(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  """Sketch quadrangle with two equal opposite sides."""
+  a = Point(0.0, 0.0)
+  b = Point(1.0, 0.0)
+  length = np.random.uniform(0.5, 2.0)
+  ang = np.random.uniform(np.pi / 3, np.pi * 2 / 3)
+  d = head_from(a, ang, length)
+  ang = ang_of(b, d)
+  ang = np.random.uniform(ang / 10, ang / 9)
+  c = head_from(b, ang, length)
+  a, b, c, d = random_rfss(a, b, c, d)
+  return a, b, c, d
+def sketch_eq_trapezoid(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a = Point(0.0, 0.0)
+  b = Point(1.0, 0.0)
+  l = unif(0.5, 2.0)
+  height = unif(0.5, 2.0)
+  c = Point(0.5 + l / 2.0, height)
+  d = Point(0.5 - l / 2.0, height)
+  a, b, c, d = random_rfss(a, b, c, d)
+  return a, b, c, d
+def sketch_eqangle2(args: tuple[gm.Point, ...]) -> Point:
+  """Sketch the def eqangle2."""
+  a, b, c = args
+  d = c * 2 - b
+  ba = b.distance(a)
+  bc = b.distance(c)
+  l = ba * ba / bc
+  if unif(0.0, 1.0) < 0.5:
+    be = min(l, bc)
+    be = unif(be * 0.1, be * 0.9)
+  else:
+    be = max(l, bc)
+    be = unif(be * 1.1, be * 1.5)
+  e = b + (c - b) * (be / bc)
+  y = b + (a - b) * (be / l)
+  return line_line_intersection(Line(c, y), Line(a, e))
+def sketch_eqangle3(args: tuple[gm.Point, ...]) -> Circle:
+  a, b, d, e, f = args
+  de = d.distance(e)
+  ef = e.distance(f)
+  ab = b.distance(a)
+  ang_ax = ang_of(a, b) + ang_between(e, d, f)
+  x = head_from(a, ang_ax, length=de / ef * ab)
+  return Circle(p1=a, p2=b, p3=x)
+def sketch_eqdia_quadrangle(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  """Sketch quadrangle with two equal diagonals."""
+  m = unif(0.3, 0.7)
+  n = unif(0.3, 0.7)
+  a = Point(-m, 0.0)
+  c = Point(1 - m, 0.0)
+  b = Point(0.0, -n)
+  d = Point(0.0, 1 - n)
+  ang = unif(-0.25 * np.pi, 0.25 * np.pi)
+  sin, cos = np.sin(ang), np.cos(ang)
+  b = b.rotate(sin, cos)
+  d = d.rotate(sin, cos)
+  a, b, c, d = random_rfss(a, b, c, d)
+  return a, b, c, d
+def sketch_free(args: tuple[gm.Point, ...]) -> Point:
+  return random_points(1)[0]
+def sketch_isos(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  base = unif(0.5, 1.5)
+  height = unif(0.5, 1.5)
+  b = Point(-base / 2, 0.0)
+  c = Point(base / 2, 0.0)
+  a = Point(0.0, height)
+  a, b, c = random_rfss(a, b, c)
+  return a, b, c
+def sketch_line(args: tuple[gm.Point, ...]) -> Line:
+  a, b = args
+  return Line(a, b)
+def sketch_cyclic(args: tuple[gm.Point, ...]) -> Circle:
+  a, b, c = args
+  return Circle(p1=a, p2=b, p3=c)
+def sketch_hline(args: tuple[gm.Point, ...]) -> HalfLine:
+  a, b = args
+  return HalfLine(a, b)
+def sketch_midp(args: tuple[gm.Point, ...]) -> Point:
+  a, b = args
+  return (a + b) * 0.5
+def sketch_pentagon(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  points = [Point(1.0, 0.0)]
+  ang = 0.0
+  for i in range(4):
+    ang += (2 * np.pi - ang) / (5 - i) * unif(0.5, 1.5)
+    point = Point(np.cos(ang), np.sin(ang))
+    points.append(point)
+  a, b, c, d, e = points  # pylint: disable=unbalanced-tuple-unpacking
+  a, b, c, d, e = random_rfss(a, b, c, d, e)
+  return a, b, c, d, e
+def sketch_pline(args: tuple[gm.Point, ...]) -> Line:
+  a, b, c = args
+  return a.parallel_line(Line(b, c))
+def sketch_pmirror(args: tuple[gm.Point, ...]) -> Point:
+  a, b = args
+  return b * 2 - a
+def sketch_quadrangle(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  """Sketch a random quadrangle."""
+  m = unif(0.3, 0.7)
+  n = unif(0.3, 0.7)
+  a = Point(-m, 0.0)
+  c = Point(1 - m, 0.0)
+  b = Point(0.0, -unif(0.25, 0.75))
+  d = Point(0.0, unif(0.25, 0.75))
+  ang = unif(-0.25 * np.pi, 0.25 * np.pi)
+  sin, cos = np.sin(ang), np.cos(ang)
+  b = b.rotate(sin, cos)
+  d = d.rotate(sin, cos)
+  a, b, c, d = random_rfss(a, b, c, d)
+  return a, b, c, d
+def sketch_r_trapezoid(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a = Point(0.0, 1.0)
+  d = Point(0.0, 0.0)
+  b = Point(unif(0.5, 1.5), 1.0)
+  c = Point(unif(0.5, 1.5), 0.0)
+  a, b, c, d = random_rfss(a, b, c, d)
+  return a, b, c, d
+def sketch_r_triangle(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a = Point(0.0, 0.0)
+  b = Point(0.0, unif(0.5, 2.0))
+  c = Point(unif(0.5, 2.0), 0.0)
+  a, b, c = random_rfss(a, b, c)
+  return a, b, c
+def sketch_rectangle(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a = Point(0.0, 0.0)
+  b = Point(0.0, 1.0)
+  l = unif(0.5, 2.0)
+  c = Point(l, 1.0)
+  d = Point(l, 0.0)
+  a, b, c, d = random_rfss(a, b, c, d)
+  return a, b, c, d
+def sketch_reflect(args: tuple[gm.Point, ...]) -> Point:
+  a, b, c = args
+  m = a.foot(Line(b, c))
+  return m * 2 - a
+def sketch_risos(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a = Point(0.0, 0.0)
+  b = Point(0.0, 1.0)
+  c = Point(1.0, 0.0)
+  a, b, c = random_rfss(a, b, c)
+  return a, b, c
+def sketch_rotaten90(args: tuple[gm.Point, ...]) -> Point:
+  a, b = args
+  ang = -np.pi / 2
+  return a + (b - a).rotate(np.sin(ang), np.cos(ang))
+def sketch_rotatep90(args: tuple[gm.Point, ...]) -> Point:
+  a, b = args
+  ang = np.pi / 2
+  return a + (b - a).rotate(np.sin(ang), np.cos(ang))
+def sketch_s_angle(args: tuple[gm.Point, ...]) -> HalfLine:
+  a, b, y = args
+  ang = y / 180 * np.pi
+  x = b + (a - b).rotatea(ang)
+  return HalfLine(b, x)
+def sketch_segment(args: tuple[gm.Point, ...]) -> tuple[Point, Point]:
+  a, b = random_points(2)
+  return a, b
+def sketch_shift(args: tuple[gm.Point, ...]) -> Point:
+  a, b, c = args
+  return c + (b - a)
+def sketch_square(args: tuple[gm.Point, ...]) -> tuple[Point, Point]:
+  a, b = args
+  c = b + (a - b).rotatea(-np.pi / 2)
+  d = a + (b - a).rotatea(np.pi / 2)
+  return c, d
+def sketch_isquare(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a = Point(0.0, 0.0)
+  b = Point(1.0, 0.0)
+  c = Point(1.0, 1.0)
+  d = Point(0.0, 1.0)
+  a, b, c, d = random_rfss(a, b, c, d)
+  return a, b, c, d
+def sketch_tline(args: tuple[gm.Point, ...]) -> Line:
+  a, b, c = args
+  return a.perpendicular_line(Line(b, c))
+def sketch_trapezoid(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  d = Point(0.0, 0.0)
+  c = Point(1.0, 0.0)
+  base = unif(0.5, 2.0)
+  height = unif(0.5, 2.0)
+  a = Point(unif(0.2, 0.5), height)
+  b = Point(a.x + base, height)
+  a, b, c, d = random_rfss(a, b, c, d)
+  return a, b, c, d
+def sketch_triangle(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  a = Point(0.0, 0.0)
+  b = Point(1.0, 0.0)
+  ac = unif(0.5, 2.0)
+  ang = unif(0.2, 0.8) * np.pi
+  c = head_from(a, ang, ac)
+  return a, b, c
+def sketch_triangle12(args: tuple[gm.Point, ...]) -> tuple[Point, ...]:
+  b = Point(0.0, 0.0)
+  c = Point(unif(1.5, 2.5), 0.0)
+  a, _ = circle_circle_intersection(Circle(b, 1.0), Circle(c, 2.0))
+  a, b, c = random_rfss(a, b, c)
+  return a, b, c
+def sketch_trisect(args: tuple[gm.Point, ...]) -> tuple[Point, Point]:
+  """Sketch two trisectors of an angle."""
+  a, b, c = args
+  ang1 = ang_of(b, a)
+  ang2 = ang_of(b, c)
+  swap = 0
+  if ang1 > ang2:
+    ang1, ang2 = ang2, ang1
+    swap += 1
+  if ang2 - ang1 > np.pi:
+    ang1, ang2 = ang2, ang1 + 2 * np.pi
+    swap += 1
+  angx = ang1 + (ang2 - ang1) / 3
+  angy = ang2 - (ang2 - ang1) / 3
+  x = b + Point(np.cos(angx), np.sin(angx))
+  y = b + Point(np.cos(angy), np.sin(angy))
+  ac = Line(a, c)
+  x = line_line_intersection(Line(b, x), ac)
+  y = line_line_intersection(Line(b, y), ac)
+  if swap == 1:
+    return y, x
+  return x, y
+def sketch_trisegment(args: tuple[gm.Point, ...]) -> tuple[Point, Point]:
+  a, b = args
+  x, y = a + (b - a) * (1.0 / 3), a + (b - a) * (2.0 / 3)
+  return x, y

external/alphageometry/numericals_test.py ADDED Viewed

	@@ -0,0 +1,313 @@

+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit testing for the geometry numericals code."""
+import unittest
+from absl.testing import absltest
+import numericals as nm
+np = nm.np
+unif = nm.unif
+Point = nm.Point
+Line = nm.Line
+Circle = nm.Circle
+HalfLine = nm.HalfLine
+line_circle_intersection = nm.line_circle_intersection
+line_line_intersection = nm.line_line_intersection
+check_coll = nm.check_coll
+check_eqangle = nm.check_eqangle
+random_points = nm.random_points
+ang_between = nm.ang_between
+head_from = nm.head_from
+class NumericalTest(unittest.TestCase):
+  def test_sketch_ieq_triangle(self):
+    a, b, c = nm.sketch_ieq_triangle([])
+    self.assertAlmostEqual(a.distance(b), b.distance(c))
+    self.assertAlmostEqual(c.distance(a), b.distance(c))
+  def test_sketch_2l1c(self):
+    p = nm.Point(0.0, 0.0)
+    pi = np.pi
+    anga = unif(-0.4 * pi, 0.4 * pi)
+    a = Point(np.cos(anga), np.sin(anga))
+    angb = unif(0.6 * pi, 1.4 * pi)
+    b = Point(np.cos(angb), np.sin(angb))
+    angc = unif(anga + 0.05 * pi, angb - 0.05 * pi)
+    c = Point(np.cos(angc), np.sin(angc)) * unif(0.2, 0.8)
+    x, y, z, i = nm.sketch_2l1c([a, b, c, p])
+    self.assertTrue(check_coll([x, c, a]))
+    self.assertTrue(check_coll([y, c, b]))
+    self.assertAlmostEqual(z.distance(p), 1.0)
+    self.assertTrue(check_coll([p, i, z]))
+    self.assertTrue(Line(i, x).is_perp(Line(c, a)))
+    self.assertTrue(Line(i, y).is_perp(Line(c, b)))
+    self.assertAlmostEqual(i.distance(x), i.distance(y))
+    self.assertAlmostEqual(i.distance(x), i.distance(z))
+  def test_sketch_3peq(self):
+    a, b, c = random_points(3)
+    x, y, z = nm.sketch_3peq([a, b, c])
+    self.assertTrue(check_coll([a, b, x]))
+    self.assertTrue(check_coll([a, c, y]))
+    self.assertTrue(check_coll([b, c, z]))
+    self.assertTrue(check_coll([x, y, z]))
+    self.assertAlmostEqual(z.distance(x), z.distance(y))
+  def test_sketch_aline(self):
+    a, b, c, d, e = random_points(5)
+    ex = nm.sketch_aline([a, b, c, d, e])
+    self.assertIsInstance(ex, HalfLine)
+    self.assertEqual(ex.tail, e)
+    x = ex.head
+    self.assertAlmostEqual(ang_between(b, a, c), ang_between(e, d, x))
+  def test_sketch_amirror(self):
+    a, b, c = random_points(3)
+    bx = nm.sketch_amirror([a, b, c])
+    self.assertIsInstance(bx, HalfLine)
+    assert bx.tail == b
+    x = bx.head
+    ang1 = ang_between(b, a, c)
+    ang2 = ang_between(b, c, x)
+    self.assertAlmostEqual(ang1, ang2)
+  def test_sketch_bisect(self):
+    a, b, c = random_points(3)
+    line = nm.sketch_bisect([a, b, c])
+    self.assertAlmostEqual(b.distance(line), 0.0)
+    l = a.perpendicular_line(line)
+    x = line_line_intersection(l, Line(b, c))
+    self.assertAlmostEqual(a.distance(line), x.distance(line))
+    d, _ = line_circle_intersection(line, Circle(b, radius=1))
+    ang1 = ang_between(b, a, d)
+    ang2 = ang_between(b, d, c)
+    self.assertAlmostEqual(ang1, ang2)
+  def test_sketch_bline(self):
+    a, b = random_points(2)
+    l = nm.sketch_bline([a, b])
+    self.assertTrue(Line(a, b).is_perp(l))
+    self.assertAlmostEqual(a.distance(l), b.distance(l))
+  def test_sketch_cc_tangent(self):
+    o = Point(0.0, 0.0)
+    w = Point(1.0, 0.0)
+    ra = unif(0.0, 0.6)
+    rb = unif(0.4, 1.0)
+    a = unif(0.0, np.pi)
+    b = unif(0.0, np.pi)
+    a = o + ra * Point(np.cos(a), np.sin(a))
+    b = w + rb * Point(np.sin(b), np.cos(b))
+    x, y, z, t = nm.sketch_cc_tangent([o, a, w, b])
+    xy = Line(x, y)
+    zt = Line(z, t)
+    self.assertAlmostEqual(o.distance(xy), o.distance(a))
+    self.assertAlmostEqual(o.distance(zt), o.distance(a))
+    self.assertAlmostEqual(w.distance(xy), w.distance(b))
+    self.assertAlmostEqual(w.distance(zt), w.distance(b))
+  def test_sketch_circle(self):
+    a, b, c = random_points(3)
+    circle = nm.sketch_circle([a, b, c])
+    self.assertAlmostEqual(circle.center.distance(a), 0.0)
+    self.assertAlmostEqual(circle.radius, b.distance(c))
+  def test_sketch_e5128(self):
+    b = Point(0.0, 0.0)
+    c = Point(0.0, 1.0)
+    ang = unif(-np.pi / 2, 3 * np.pi / 2)
+    d = head_from(c, ang, 1.0)
+    a = Point(unif(0.5, 2.0), 0.0)
+    e, g = nm.sketch_e5128([a, b, c, d])
+    ang1 = ang_between(a, b, d)
+    ang2 = ang_between(e, a, g)
+    self.assertAlmostEqual(ang1, ang2)
+  def test_sketch_eq_quadrangle(self):
+    a, b, c, d = nm.sketch_eq_quadrangle([])
+    self.assertAlmostEqual(a.distance(d), c.distance(b))
+    ac = Line(a, c)
+    assert ac.diff_side(b, d), (ac(b), ac(d))
+    bd = Line(b, d)
+    assert bd.diff_side(a, c), (bd(a), bd(c))
+  def test_sketch_eq_trapezoid(self):
+    a, b, c, d = nm.sketch_eq_trapezoid([])
+    assert Line(a, b).is_parallel(Line(c, d))
+    self.assertAlmostEqual(a.distance(d), b.distance(c))
+  def test_sketch_eqangle3(self):
+    points = random_points(5)
+    x = nm.sketch_eqangle3(points).sample_within(points)[0]
+    a, b, d, e, f = points
+    self.assertTrue(check_eqangle([x, a, x, b, d, e, d, f]))
+  def test_sketch_eqangle2(self):
+    a, b, c = random_points(3)
+    x = nm.sketch_eqangle2([a, b, c])
+    ang1 = ang_between(a, b, x)
+    ang2 = ang_between(c, x, b)
+    self.assertAlmostEqual(ang1, ang2)
+  def test_sketch_edia_quadrangle(self):
+    a, b, c, d = nm.sketch_eqdia_quadrangle([])
+    assert Line(a, c).diff_side(b, d)
+    assert Line(b, d).diff_side(a, c)
+    self.assertAlmostEqual(a.distance(c), b.distance(d))
+  def test_sketch_isos(self):
+    a, b, c = nm.sketch_isos([])
+    self.assertAlmostEqual(a.distance(b), a.distance(c))
+    self.assertAlmostEqual(ang_between(b, a, c), ang_between(c, b, a))
+  def test_sketch_quadrange(self):
+    a, b, c, d = nm.sketch_quadrangle([])
+    self.assertTrue(Line(a, c).diff_side(b, d))
+    self.assertTrue(Line(b, d).diff_side(a, c))
+  def test_sketch_r_trapezoid(self):
+    a, b, c, d = nm.sketch_r_trapezoid([])
+    self.assertTrue(Line(a, b).is_perp(Line(a, d)))
+    self.assertTrue(Line(a, b).is_parallel(Line(c, d)))
+    self.assertTrue(Line(a, c).diff_side(b, d))
+    self.assertTrue(Line(b, d).diff_side(a, c))
+  def test_sketch_r_triangle(self):
+    a, b, c = nm.sketch_r_triangle([])
+    self.assertTrue(Line(a, b).is_perp(Line(a, c)))
+  def test_sketch_rectangle(self):
+    a, b, c, d = nm.sketch_rectangle([])
+    self.assertTrue(Line(a, b).is_perp(Line(b, c)))
+    self.assertTrue(Line(b, c).is_perp(Line(c, d)))
+    self.assertTrue(Line(c, d).is_perp(Line(d, a)))
+  def test_sketch_reflect(self):
+    a, b, c = random_points(3)
+    x = nm.sketch_reflect([a, b, c])
+    self.assertTrue(Line(a, x).is_perp(Line(b, c)))
+    self.assertAlmostEqual(x.distance(Line(b, c)), a.distance(Line(b, c)))
+  def test_sketch_risos(self):
+    a, b, c = nm.sketch_risos([])
+    self.assertAlmostEqual(a.distance(b), a.distance(c))
+    self.assertTrue(Line(a, b).is_perp(Line(a, c)))
+  def test_sketch_rotaten90(self):
+    a, b = random_points(2)
+    x = nm.sketch_rotaten90([a, b])
+    self.assertAlmostEqual(a.distance(x), a.distance(b))
+    self.assertTrue(Line(a, x).is_perp(Line(a, b)))
+    d = Point(0.0, 0.0)
+    e = Point(0.0, 1.0)
+    f = Point(1.0, 0.0)
+    self.assertAlmostEqual(ang_between(d, e, f), ang_between(a, b, x))
+  def test_sketch_rotatep90(self):
+    a, b = random_points(2)
+    x = nm.sketch_rotatep90([a, b])
+    self.assertAlmostEqual(a.distance(x), a.distance(b))
+    self.assertTrue(Line(a, x).is_perp(Line(a, b)))
+    d = Point(0.0, 0.0)
+    e = Point(0.0, 1.0)
+    f = Point(1.0, 0.0)
+    self.assertAlmostEqual(ang_between(d, f, e), ang_between(a, b, x))
+  def test_sketch_s_angle(self):
+    a, b = random_points(2)
+    y = unif(0.0, np.pi)
+    bx = nm.sketch_s_angle([a, b, y / np.pi * 180])
+    self.assertIsInstance(bx, HalfLine)
+    self.assertEqual(bx.tail, b)
+    x = bx.head
+    d = Point(1.0, 0.0)
+    e = Point(0.0, 0.0)
+    f = Point(np.cos(y), np.sin(y))
+    self.assertAlmostEqual(ang_between(e, d, f), ang_between(b, a, x))
+  def test_sketch_shift(self):
+    a, b, c = random_points(3)
+    x = nm.sketch_shift([a, b, c])
+    self.assertTrue((b - a).close(x - c))
+  def test_sketch_square(self):
+    a, b = random_points(2)
+    c, d = nm.sketch_square([a, b])
+    self.assertTrue(Line(a, b).is_perp(Line(b, c)))
+    self.assertTrue(Line(b, c).is_perp(Line(c, d)))
+    self.assertTrue(Line(c, d).is_perp(Line(d, a)))
+    self.assertAlmostEqual(a.distance(b), b.distance(c))
+  def test_sketch_isquare(self):
+    a, b, c, d = nm.sketch_isquare([])
+    self.assertTrue(Line(a, b).is_perp(Line(b, c)))
+    self.assertTrue(Line(b, c).is_perp(Line(c, d)))
+    self.assertTrue(Line(c, d).is_perp(Line(d, a)))
+    self.assertAlmostEqual(a.distance(b), b.distance(c))
+  def test_sketch_trapezoid(self):
+    a, b, c, d = nm.sketch_trapezoid([])
+    self.assertTrue(Line(a, b).is_parallel(Line(c, d)))
+    self.assertTrue(Line(a, c).diff_side(b, d))
+    self.assertTrue(Line(b, d).diff_side(a, c))
+  def test_sketch_triangle(self):
+    a, b, c = nm.sketch_triangle([])
+    self.assertFalse(check_coll([a, b, c]))
+  def test_sketch_triangle12(self):
+    a, b, c = nm.sketch_triangle12([])
+    self.assertAlmostEqual(a.distance(b) * 2, a.distance(c))
+  def test_sketch_trisect(self):
+    a, b, c = random_points(3)
+    x, y = nm.sketch_trisect([a, b, c])
+    self.assertAlmostEqual(ang_between(b, a, x), ang_between(b, x, y))
+    self.assertAlmostEqual(ang_between(b, x, y), ang_between(b, y, c))
+    self.assertAlmostEqual(ang_between(b, a, x) * 3, ang_between(b, a, c))
+  def test_sketch_trisegment(self):
+    a, b = random_points(2)
+    x, y = nm.sketch_trisegment([a, b])
+    self.assertAlmostEqual(
+        a.distance(x) + x.distance(y) + y.distance(b), a.distance(b)
+    )
+    self.assertAlmostEqual(a.distance(x), x.distance(y))
+    self.assertAlmostEqual(x.distance(y), y.distance(b))
+if __name__ == '__main__':
+  absltest.main()

external/alphageometry/pretty.py ADDED Viewed

	@@ -0,0 +1,216 @@

+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for string manipulation in the DSL."""
+MAP_SYMBOL = {
+    'T': 'perp',
+    'P': 'para',
+    'D': 'cong',
+    'S': 'simtri',
+    'I': 'circle',
+    'M': 'midp',
+    'O': 'cyclic',
+    'C': 'coll',
+    '^': 'eqangle',
+    '/': 'eqratio',
+    '%': 'eqratio',
+    '=': 'contri',
+    'X': 'collx',
+    'A': 'acompute',
+    'R': 'rcompute',
+    'Q': 'fixc',
+    'E': 'fixl',
+    'V': 'fixb',
+    'H': 'fixt',
+    'Z': 'fixp',
+    'Y': 'ind',
+}
+def map_symbol(c: str) -> str:
+  return MAP_SYMBOL[c]
+def map_symbol_inv(c: str) -> str:
+  return {v: k for k, v in MAP_SYMBOL.items()}[c]
+def _gcd(x: int, y: int) -> int:
+  while y:
+    x, y = y, x % y
+  return x
+def simplify(n: int, d: int) -> tuple[int, int]:
+  g = _gcd(n, d)
+  return (n // g, d // g)
+def pretty2r(a: str, b: str, c: str, d: str) -> str:
+  if b in (c, d):
+    a, b = b, a
+  if a == d:
+    c, d = d, c
+  return f'{a} {b} {c} {d}'
+def pretty2a(a: str, b: str, c: str, d: str) -> str:
+  if b in (c, d):
+    a, b = b, a
+  if a == d:
+    c, d = d, c
+  return f'{a} {b} {c} {d}'
+def pretty_angle(a: str, b: str, c: str, d: str) -> str:
+  if b in (c, d):
+    a, b = b, a
+  if a == d:
+    c, d = d, c
+  if a == c:
+    return f'\u2220{b}{a}{d}'
+  return f'\u2220({a}{b}-{c}{d})'
+def pretty_nl(name: str, args: list[str]) -> str:
+  """Natural lang formatting a predicate."""
+  if name == 'aconst':
+    a, b, c, d, y = args
+    return f'{pretty_angle(a, b, c, d)} = {y}'
+  if name == 'rconst':
+    a, b, c, d, y = args
+    return f'{a}{b}:{c}{d} = {y}'
+  if name == 'acompute':
+    a, b, c, d = args
+    return f'{pretty_angle(a, b, c, d)}'
+  if name in ['coll', 'C']:
+    return '' + ','.join(args) + ' are collinear'
+  if name == 'collx':
+    return '' + ','.join(list(set(args))) + ' are collinear'
+  if name in ['cyclic', 'O']:
+    return '' + ','.join(args) + ' are concyclic'
+  if name in ['midp', 'midpoint', 'M']:
+    x, a, b = args
+    return f'{x} is midpoint of {a}{b}'
+  if name in ['eqangle', 'eqangle6', '^']:
+    a, b, c, d, e, f, g, h = args
+    return f'{pretty_angle(a, b, c, d)} = {pretty_angle(e, f, g, h)}'
+  if name in ['eqratio', 'eqratio6', '/']:
+    return '{}{}:{}{} = {}{}:{}{}'.format(*args)
+  if name == 'eqratio3':
+    a, b, c, d, o, o = args  # pylint: disable=redeclared-assigned-name
+    return f'S {o} {a} {b} {o} {c} {d}'
+  if name in ['cong', 'D']:
+    a, b, c, d = args
+    return f'{a}{b} = {c}{d}'
+  if name in ['perp', 'T']:
+    if len(args) == 2:  # this is algebraic derivation.
+      ab, cd = args  # ab = 'd( ... )'
+      return f'{ab} \u27c2 {cd}'
+    a, b, c, d = args
+    return f'{a}{b} \u27c2 {c}{d}'
+  if name in ['para', 'P']:
+    if len(args) == 2:  # this is algebraic derivation.
+      ab, cd = args  # ab = 'd( ... )'
+      return f'{ab} \u2225 {cd}'
+    a, b, c, d = args
+    return f'{a}{b} \u2225 {c}{d}'
+  if name in ['simtri2', 'simtri', 'simtri*']:
+    a, b, c, x, y, z = args
+    return f'\u0394{a}{b}{c} is similar to \u0394{x}{y}{z}'
+  if name in ['contri2', 'contri', 'contri*']:
+    a, b, c, x, y, z = args
+    return f'\u0394{a}{b}{c} is congruent to \u0394{x}{y}{z}'
+  if name in ['circle', 'I']:
+    o, a, b, c = args
+    return f'{o} is the circumcenter of \\Delta {a}{b}{c}'
+  if name == 'foot':
+    a, b, c, d = args
+    return f'{a} is the foot of {b} on {c}{d}'
+def pretty(txt: str) -> str:
+  """Pretty formating a predicate string."""
+  if isinstance(txt, str):
+    txt = txt.split(' ')
+  name, *args = txt
+  if name == 'ind':
+    return 'Y ' + ' '.join(args)
+  if name in ['fixc', 'fixl', 'fixb', 'fixt', 'fixp']:
+    return map_symbol_inv(name) + ' ' + ' '.join(args)
+  if name == 'acompute':
+    a, b, c, d = args
+    return 'A ' + ' '.join(args)
+  if name == 'rcompute':
+    a, b, c, d = args
+    return 'R ' + ' '.join(args)
+  if name == 'aconst':
+    a, b, c, d, y = args
+    return f'^ {pretty2a(a, b, c, d)} {y}'
+  if name == 'rconst':
+    a, b, c, d, y = args
+    return f'/ {pretty2r(a, b, c, d)} {y}'
+  if name == 'coll':
+    return 'C ' + ' '.join(args)
+  if name == 'collx':
+    return 'X ' + ' '.join(args)
+  if name == 'cyclic':
+    return 'O ' + ' '.join(args)
+  if name in ['midp', 'midpoint']:
+    x, a, b = args
+    return f'M {x} {a} {b}'
+  if name == 'eqangle':
+    a, b, c, d, e, f, g, h = args
+    return f'^ {pretty2a(a, b, c, d)} {pretty2a(e, f, g, h)}'
+  if name == 'eqratio':
+    a, b, c, d, e, f, g, h = args
+    return f'/ {pretty2r(a, b, c, d)} {pretty2r(e, f, g, h)}'
+  if name == 'eqratio3':
+    a, b, c, d, o, o = args  # pylint: disable=redeclared-assigned-name
+    return f'S {o} {a} {b} {o} {c} {d}'
+  if name == 'cong':
+    a, b, c, d = args
+    return f'D {a} {b} {c} {d}'
+  if name == 'perp':
+    if len(args) == 2:  # this is algebraic derivation.
+      ab, cd = args  # ab = 'd( ... )'
+      return f'T {ab} {cd}'
+    a, b, c, d = args
+    return f'T {a} {b} {c} {d}'
+  if name == 'para':
+    if len(args) == 2:  # this is algebraic derivation.
+      ab, cd = args  # ab = 'd( ... )'
+      return f'P {ab} {cd}'
+    a, b, c, d = args
+    return f'P {a} {b} {c} {d}'
+  if name in ['simtri2', 'simtri', 'simtri*']:
+    a, b, c, x, y, z = args
+    return f'S {a} {b} {c} {x} {y} {z}'
+  if name in ['contri2', 'contri', 'contri*']:
+    a, b, c, x, y, z = args
+    return f'= {a} {b} {c} {x} {y} {z}'
+  if name == 'circle':
+    o, a, b, c = args
+    return f'I {o} {a} {b} {c}'
+  if name == 'foot':
+    a, b, c, d = args
+    return f'F {a} {b} {c} {d}'
+  return ' '.join(txt)

external/alphageometry/problem.py ADDED Viewed

	@@ -0,0 +1,1133 @@

+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implements objects to represent problems, theorems, proofs, traceback."""
+from __future__ import annotations
+from collections import defaultdict  # pylint: disable=g-importing-member
+from typing import Any
+import geometry as gm
+import pretty as pt
+# pylint: disable=protected-access
+# pylint: disable=unused-variable
+# pylint: disable=unused-argument
+# pylint: disable=unused-assignment
+def reshape(l: list[Any], n: int = 1) -> list[list[Any]]:
+  assert len(l) % n == 0
+  columns = [[] for i in range(n)]
+  for i, x in enumerate(l):
+    columns[i % n].append(x)
+  return zip(*columns)
+def isint(x: str) -> bool:
+  try:
+    int(x)
+    return True
+  except:  # pylint: disable=bare-except
+    return False
+class Construction:
+  """One predicate."""
+  @classmethod
+  def from_txt(cls, data: str) -> Construction:
+    data = data.split(' ')
+    return Construction(data[0], data[1:])
+  def __init__(self, name: str, args: list[str]):
+    self.name = name
+    self.args = args
+  def translate(self, mapping: dict[str, str]) -> Construction:
+    args = [a if isint(a) else mapping[a] for a in self.args]
+    return Construction(self.name, args)
+  def txt(self) -> str:
+    return ' '.join([self.name] + list(self.args))
+class Clause:
+  """One construction (>= 1 predicate)."""
+  @classmethod
+  def from_txt(cls, data: str) -> Clause:
+    if data == ' =':
+      return Clause([], [])
+    points, constructions = data.split(' = ')
+    return Clause(
+        points.split(' '),
+        [Construction.from_txt(c) for c in constructions.split(', ')],
+    )
+  def __init__(self, points: list[str], constructions: list[Construction]):
+    self.points = []
+    self.nums = []
+    for p in points:
+      num = None
+      if isinstance(p, str) and '@' in p:
+        p, num = p.split('@')
+        x, y = num.split('_')
+        num = float(x), float(y)
+      self.points.append(p)
+      self.nums.append(num)
+    self.constructions = constructions
+  def translate(self, mapping: dict[str, str]) -> Clause:
+    points0 = []
+    for p in self.points:
+      pcount = len(mapping) + 1
+      name = chr(96 + pcount)
+      if name > 'z':  # pcount = 26 -> name = 'z'
+        name = chr(97 + (pcount - 1) % 26) + str((pcount - 1) // 26)
+      p0 = mapping.get(p, name)
+      mapping[p] = p0
+      points0.append(p0)
+    return Clause(points0, [c.translate(mapping) for c in self.constructions])
+  def add(self, name: str, args: list[str]) -> None:
+    self.constructions.append(Construction(name, args))
+  def txt(self) -> str:
+    return (
+        ' '.join(self.points)
+        + ' = '
+        + ', '.join(c.txt() for c in self.constructions)
+    )
+def _gcd(x: int, y: int) -> int:
+  while y:
+    x, y = y, x % y
+  return x
+def simplify(n: int, d: int) -> tuple[int, int]:
+  g = _gcd(n, d)
+  return (n // g, d // g)
+def compare_fn(dep: Dependency) -> tuple[Dependency, str]:
+  return (dep, pt.pretty(dep))
+def sort_deps(deps: list[Dependency]) -> list[Dependency]:
+  return sorted(deps, key=compare_fn)
+class Problem:
+  """Describe one problem to solve."""
+  @classmethod
+  def from_txt_file(
+      cls, fname: str, to_dict: bool = False, translate: bool = True
+  ):
+    """Load a problem from a text file."""
+    with open(fname, 'r') as f:
+      lines = f.read().split('\n')
+    lines = [l for l in lines if l]
+    data = [
+        cls.from_txt(url + '\n' + problem, translate)
+        for (url, problem) in reshape(lines, 2)
+    ]
+    if to_dict:
+      return cls.to_dict(data)
+    return data
+  @classmethod
+  def from_txt(cls, data: str, translate: bool = True) -> Problem:
+    """Load a problem from a str object."""
+    url = ''
+    if '\n' in data:
+      url, data = data.split('\n')
+    if ' ? ' in data:
+      clauses, goal = data.split(' ? ')
+      goal = Construction.from_txt(goal)
+    else:
+      clauses, goal = data, None
+    clauses = clauses.split('; ')
+    problem = Problem(
+        url=url, clauses=[Clause.from_txt(c) for c in clauses], goal=goal
+    )
+    if translate:
+      return problem.translate()
+    return problem
+  @classmethod
+  def to_dict(cls, data: list[Problem]) -> dict[str, Problem]:
+    return {p.url: p for p in data}
+  def __init__(self, url: str, clauses: list[Clause], goal: Construction):
+    self.url = url
+    self.clauses = clauses
+    self.goal = goal
+  def copy(self) -> Problem:
+    return Problem(self.url, list(self.clauses), self.goal)
+  def translate(self) -> Problem:  # to single-char point names
+    """Translate point names into alphabetical."""
+    mapping = {}
+    clauses = []
+    for clause in self.clauses:
+      clauses.append(clause.translate(mapping))
+    if self.goal:
+      goal = self.goal.translate(mapping)
+    else:
+      goal = self.goal
+    p = Problem(self.url, clauses, goal)
+    p.mapping = mapping
+    return p
+  def txt(self) -> str:
+    return (
+        '; '.join([c.txt() for c in self.clauses]) + ' ? ' + self.goal.txt()
+        if self.goal
+        else ''
+    )
+  def setup_str_from_problem(self, definitions: list[Definition]) -> str:
+    """Construct the <theorem_premises> string from Problem object."""
+    ref = 0
+    string = []
+    for clause in self.clauses:
+      group = {}
+      p2deps = defaultdict(list)
+      for c in clause.constructions:
+        cdef = definitions[c.name]
+        if len(c.args) != len(cdef.construction.args):
+          assert len(c.args) + len(clause.points) == len(cdef.construction.args)
+          c.args = clause.points + c.args
+        mapping = dict(zip(cdef.construction.args, c.args))
+        for points, bs in cdef.basics:
+          points = tuple([mapping[x] for x in points])
+          for p in points:
+            group[p] = points
+          for b in bs:
+            args = [mapping[a] for a in b.args]
+            name = b.name
+            if b.name in ['s_angle', 'aconst']:
+              x, y, z, v = args
+              name = 'aconst'
+              v = int(v)
+              if v < 0:
+                v = -v
+                x, z = z, x
+              m, n = simplify(int(v), 180)
+              args = [y, z, y, x, f'{m}pi/{n}']
+            p2deps[points].append(hashed_txt(name, args))
+      for k, v in p2deps.items():
+        p2deps[k] = sort_deps(v)
+      points = clause.points
+      while points:
+        p = points[0]
+        gr = group[p]
+        points = [x for x in points if x not in gr]
+        deps_str = []
+        for dep in p2deps[gr]:
+          ref_str = '{:02}'.format(ref)
+          dep_str = pt.pretty(dep)
+          if dep[0] == 'aconst':
+            m, n = map(int, dep[-1].split('pi/'))
+            mn = f'{m}. pi / {n}.'
+            dep_str = ' '.join(dep_str.split()[:-1] + [mn])
+          deps_str.append(dep_str + ' ' + ref_str)
+          ref += 1
+        string.append(' '.join(gr) + ' : ' + ' '.join(deps_str))
+    string = '{S} ' + ' ; '.join([s.strip() for s in string])
+    goal = self.goal
+    string += ' ? ' + pt.pretty([goal.name] + goal.args)
+    return string
+def parse_rely(s: str) -> dict[str, str]:
+  result = {}
+  if not s:
+    return result
+  s = [x.strip() for x in s.split(',')]
+  for x in s:
+    a, b = x.split(':')
+    a, b = a.strip().split(), b.strip().split()
+    result.update({m: b for m in a})
+  return result
+class Definition:
+  """Definitions of construction statements."""
+  @classmethod
+  def from_txt_file(cls, fname: str, to_dict: bool = False) -> Definition:
+    with open(fname, 'r') as f:
+      lines = f.read()
+    return cls.from_string(lines, to_dict)
+  @classmethod
+  def from_string(cls, string: str, to_dict: bool = False) -> Definition:
+    lines = string.split('\n')
+    data = [cls.from_txt('\n'.join(group)) for group in reshape(lines, 6)]
+    if to_dict:
+      return cls.to_dict(data)
+    return data
+  @classmethod
+  def to_dict(cls, data: list[Definition]) -> dict[str, Definition]:
+    return {d.construction.name: d for d in data}
+  @classmethod
+  def from_txt(cls, data: str) -> Definition:
+    """Load definitions from a str object."""
+    construction, rely, deps, basics, numerics, _ = data.split('\n')
+    basics = [] if not basics else [b.strip() for b in basics.split(';')]
+    levels = []
+    for bs in basics:
+      if ':' in bs:
+        points, bs = bs.split(':')
+        points = points.strip().split()
+      else:
+        points = []
+      if bs.strip():
+        bs = [Construction.from_txt(b.strip()) for b in bs.strip().split(',')]
+      else:
+        bs = []
+      levels.append((points, bs))
+    numerics = [] if not numerics else numerics.split(', ')
+    return Definition(
+        construction=Construction.from_txt(construction),
+        rely=parse_rely(rely),
+        deps=Clause.from_txt(deps),
+        basics=levels,
+        numerics=[Construction.from_txt(c) for c in numerics],
+    )
+  def __init__(
+      self,
+      construction: Construction,
+      rely: dict[str, str],
+      deps: Clause,
+      basics: list[tuple[list[str], list[Construction]]],
+      numerics: list[Construction],
+  ):
+    self.construction = construction
+    self.rely = rely
+    self.deps = deps
+    self.basics = basics
+    self.numerics = numerics
+    args = set()
+    for num in numerics:
+      args.update(num.args)
+    self.points = []
+    self.args = []
+    for p in self.construction.args:
+      if p in args:
+        self.args.append(p)
+      else:
+        self.points.append(p)
+class Theorem:
+  """Deduction rule."""
+  @classmethod
+  def from_txt_file(cls, fname: str, to_dict: bool = False) -> Theorem:
+    with open(fname, 'r') as f:
+      theorems = f.read()
+    return cls.from_string(theorems, to_dict)
+  @classmethod
+  def from_string(cls, string: str, to_dict: bool = False) -> Theorem:
+    """Load deduction rule from a str object."""
+    theorems = string.split('\n')
+    theorems = [l for l in theorems if l and not l.startswith('#')]
+    theorems = [cls.from_txt(l) for l in theorems]
+    for i, th in enumerate(theorems):
+      th.rule_name = 'r{:02}'.format(i)
+    if to_dict:
+      result = {}
+      for t in theorems:
+        if t.name in result:
+          t.name += '_'
+        result[t.rule_name] = t
+      return result
+    return theorems
+  @classmethod
+  def from_txt(cls, data: str) -> Theorem:
+    premises, conclusion = data.split(' => ')
+    premises = premises.split(', ')
+    conclusion = conclusion.split(', ')
+    return Theorem(
+        premise=[Construction.from_txt(p) for p in premises],
+        conclusion=[Construction.from_txt(c) for c in conclusion],
+    )
+  def __init__(
+      self, premise: list[Construction], conclusion: list[Construction]
+  ):
+    if len(conclusion) != 1:
+      raise ValueError('Cannot have more than one conclusion')
+    self.name = '_'.join([p.name for p in premise + conclusion])
+    self.premise = premise
+    self.conclusion = conclusion
+    self.is_arg_reduce = False
+    assert len(self.conclusion) == 1
+    con = self.conclusion[0]
+    if con.name in [
+        'eqratio3',
+        'midp',
+        'contri',
+        'simtri',
+        'contri2',
+        'simtri2',
+        'simtri*',
+        'contri*',
+    ]:
+      return
+    prem_args = set(sum([p.args for p in self.premise], []))
+    con_args = set(con.args)
+    if len(prem_args) <= len(con_args):
+      self.is_arg_reduce = True
+  def txt(self) -> str:
+    premise_txt = ', '.join([clause.txt() for clause in self.premise])
+    conclusion_txt = ', '.join([clause.txt() for clause in self.conclusion])
+    return f'{premise_txt} => {conclusion_txt}'
+  def conclusion_name_args(
+      self, mapping: dict[str, gm.Point]
+  ) -> tuple[str, list[gm.Point]]:
+    mapping = {arg: p for arg, p in mapping.items() if isinstance(arg, str)}
+    c = self.conclusion[0]
+    args = [mapping[a] for a in c.args]
+    return c.name, args
+def why_eqratio(
+    d1: gm.Direction,
+    d2: gm.Direction,
+    d3: gm.Direction,
+    d4: gm.Direction,
+    level: int,
+) -> list[Dependency]:
+  """Why two ratios are equal, returns a Dependency objects."""
+  all12 = list(gm.all_ratios(d1, d2, level))
+  all34 = list(gm.all_ratios(d3, d4, level))
+  min_why = None
+  for ang12, d1s, d2s in all12:
+    for ang34, d3s, d4s in all34:
+      why0 = gm.why_equal(ang12, ang34, level)
+      if why0 is None:
+        continue
+      d1_, d2_ = ang12._l
+      d3_, d4_ = ang34._l
+      why1 = gm.bfs_backtrack(d1, [d1_], d1s)
+      why2 = gm.bfs_backtrack(d2, [d2_], d2s)
+      why3 = gm.bfs_backtrack(d3, [d3_], d3s)
+      why4 = gm.bfs_backtrack(d4, [d4_], d4s)
+      why = why0 + why1 + why2 + why3 + why4
+      if min_why is None or len(why) < len(min_why[0]):
+        min_why = why, ang12, ang34, why0, why1, why2, why3, why4
+  if min_why is None:
+    return None
+  _, ang12, ang34, why0, why1, why2, why3, why4 = min_why
+  d1_, d2_ = ang12._l
+  d3_, d4_ = ang34._l
+  if d1 == d1_ and d2 == d2_ and d3 == d3_ and d4 == d4_:
+    return why0
+  (a_, b_), (c_, d_) = d1_._obj.points, d2_._obj.points
+  (e_, f_), (g_, h_) = d3_._obj.points, d4_._obj.points
+  deps = []
+  if why0:
+    dep = Dependency('eqratio', [a_, b_, c_, d_, e_, f_, g_, h_], '', level)
+    dep.why = why0
+    deps.append(dep)
+  (a, b), (c, d) = d1._obj.points, d2._obj.points
+  (e, f), (g, h) = d3._obj.points, d4._obj.points
+  for why, (x, y), (x_, y_) in zip(
+      [why1, why2, why3, why4],
+      [(a, b), (c, d), (e, f), (g, h)],
+      [(a_, b_), (c_, d_), (e_, f_), (g_, h_)],
+  ):
+    if why:
+      dep = Dependency('cong', [x, y, x_, y_], '', level)
+      dep.why = why
+      deps.append(dep)
+  return deps
+def why_eqangle(
+    d1: gm.Direction,
+    d2: gm.Direction,
+    d3: gm.Direction,
+    d4: gm.Direction,
+    level: int,
+    verbose: bool = False,
+) -> list[Dependency]:
+  """Why two angles are equal, returns a Dependency objects."""
+  all12 = list(gm.all_angles(d1, d2, level))
+  all34 = list(gm.all_angles(d3, d4, level))
+  min_why = None
+  for ang12, d1s, d2s in all12:
+    for ang34, d3s, d4s in all34:
+      why0 = gm.why_equal(ang12, ang34, level)
+      if why0 is None:
+        continue
+      d1_, d2_ = ang12._d
+      d3_, d4_ = ang34._d
+      why1 = gm.bfs_backtrack(d1, [d1_], d1s)
+      why2 = gm.bfs_backtrack(d2, [d2_], d2s)
+      why3 = gm.bfs_backtrack(d3, [d3_], d3s)
+      why4 = gm.bfs_backtrack(d4, [d4_], d4s)
+      why = why0 + why1 + why2 + why3 + why4
+      if min_why is None or len(why) < len(min_why[0]):
+        min_why = why, ang12, ang34, why0, why1, why2, why3, why4
+  if min_why is None:
+    return None
+  _, ang12, ang34, why0, why1, why2, why3, why4 = min_why
+  why0 = gm.why_equal(ang12, ang34, level)
+  d1_, d2_ = ang12._d
+  d3_, d4_ = ang34._d
+  if d1 == d1_ and d2 == d2_ and d3 == d3_ and d4 == d4_:
+    return (d1_, d2_, d3_, d4_), why0
+  (a_, b_), (c_, d_) = d1_._obj.points, d2_._obj.points
+  (e_, f_), (g_, h_) = d3_._obj.points, d4_._obj.points
+  deps = []
+  if why0:
+    dep = Dependency('eqangle', [a_, b_, c_, d_, e_, f_, g_, h_], '', None)
+    dep.why = why0
+    deps.append(dep)
+  (a, b), (c, d) = d1._obj.points, d2._obj.points
+  (e, f), (g, h) = d3._obj.points, d4._obj.points
+  for why, d_xy, (x, y), d_xy_, (x_, y_) in zip(
+      [why1, why2, why3, why4],
+      [d1, d2, d3, d4],
+      [(a, b), (c, d), (e, f), (g, h)],
+      [d1_, d2_, d3_, d4_],
+      [(a_, b_), (c_, d_), (e_, f_), (g_, h_)],
+  ):
+    xy, xy_ = d_xy._obj, d_xy_._obj
+    if why:
+      if xy == xy_:
+        name = 'collx'
+      else:
+        name = 'para'
+      dep = Dependency(name, [x_, y_, x, y], '', None)
+      dep.why = why
+      deps.append(dep)
+  return (d1_, d2_, d3_, d4_), deps
+CONSTRUCTION_RULE = 'c0'
+class EmptyDependency:
+  """Empty dependency predicate ready to get filled up."""
+  def __init__(self, level: int, rule_name: str):
+    self.level = level
+    self.rule_name = rule_name or ''
+    self.empty = True
+    self.why = []
+    self.trace = None
+  def populate(self, name: str, args: list[gm.Point]) -> Dependency:
+    dep = Dependency(name, args, self.rule_name, self.level)
+    dep.trace2 = self.trace
+    dep.why = list(self.why)
+    return dep
+  def copy(self) -> EmptyDependency:
+    other = EmptyDependency(self.level, self.rule_name)
+    other.why = list(self.why)
+    return other
+  def extend(
+      self,
+      g: Any,
+      name0: str,
+      args0: list[gm.Point],
+      name: str,
+      args: list[gm.Point],
+  ) -> EmptyDependency:
+    """Extend the dependency list by (name, args)."""
+    dep0 = self.populate(name0, args0)
+    deps = EmptyDependency(level=self.level, rule_name=None)
+    dep = Dependency(name, args, None, deps.level)
+    deps.why = [dep0, dep.why_me_or_cache(g, None)]
+    return deps
+  def extend_many(
+      self,
+      g: Any,
+      name0: str,
+      args0: list[gm.Point],
+      name_args: list[tuple[str, list[gm.Point]]],
+  ) -> EmptyDependency:
+    """Extend the dependency list by many name_args."""
+    if not name_args:
+      return self
+    dep0 = self.populate(name0, args0)
+    deps = EmptyDependency(level=self.level, rule_name=None)
+    deps.why = [dep0]
+    for name, args in name_args:
+      dep = Dependency(name, args, None, deps.level)
+      deps.why += [dep.why_me_or_cache(g, None)]
+    return deps
+def maybe_make_equal_pairs(
+    a: gm.Point,
+    b: gm.Point,
+    c: gm.Point,
+    d: gm.Point,
+    m: gm.Point,
+    n: gm.Point,
+    p: gm.Point,
+    q: gm.Point,
+    ab: gm.Line,
+    mn: gm.Line,
+    g: Any,
+    level: int,
+) -> list[Dependency]:
+  """Make a-b:c-d==m-n:p-q in case a-b==m-n or c-d==p-q."""
+  if ab != mn:
+    return
+  why = []
+  eqname = 'para' if isinstance(ab, gm.Line) else 'cong'
+  colls = [a, b, m, n]
+  if len(set(colls)) > 2 and eqname == 'para':
+    dep = Dependency('collx', colls, None, level)
+    dep.why_me(g, level)
+    why += [dep]
+  dep = Dependency(eqname, [c, d, p, q], None, level)
+  dep.why_me(g, level)
+  why += [dep]
+  return why
+class Dependency(Construction):
+  """Dependency is a predicate that other predicates depend on."""
+  def __init__(
+      self, name: str, args: list[gm.Point], rule_name: str, level: int
+  ):
+    super().__init__(name, args)
+    self.rule_name = rule_name or ''
+    self.level = level
+    self.why = []
+    self._stat = None
+    self.trace = None
+  def _find(self, dep_hashed: tuple[str, ...]) -> Dependency:
+    for w in self.why:
+      f = w._find(dep_hashed)
+      if f:
+        return f
+      if w.hashed() == dep_hashed:
+        return w
+  def remove_loop(self) -> Dependency:
+    f = self._find(self.hashed())
+    if f:
+      return f
+    return self
+  def copy(self) -> Dependency:
+    dep = Dependency(self.name, self.args, self.rule_name, self.level)
+    dep.trace = self.trace
+    dep.why = list(self.why)
+    return dep
+  def why_me_or_cache(self, g: Any, level: int) -> Dependency:
+    if self.hashed() in g.cache:
+      return g.cache[self.hashed()]
+    self.why_me(g, level)
+    return self
+  def populate(self, name: str, args: list[gm.Point]) -> Dependency:
+    assert self.rule_name == CONSTRUCTION_RULE, self.rule_name
+    dep = Dependency(self.name, self.args, self.rule_name, self.level)
+    dep.why = list(self.why)
+    return dep
+  def why_me(self, g: Any, level: int) -> None:
+    """Figure out the dependencies predicates of self."""
+    name, args = self.name, self.args
+    hashed_me = hashed(name, args)
+    if hashed_me in g.cache:
+      dep = g.cache[hashed_me]
+      self.why = dep.why
+      self.rule_name = dep.rule_name
+      return
+    if self.name == 'para':
+      a, b, c, d = self.args
+      if {a, b} == {c, d}:
+        self.why = []
+        return
+      ab = g._get_line(a, b)
+      cd = g._get_line(c, d)
+      if ab == cd:
+        if {a, b} == {c, d}:
+          self.why = []
+          self.rule_name = ''
+          return
+        dep = Dependency('coll', list({a, b, c, d}), 't??', None)
+        self.why = [dep.why_me_or_cache(g, level)]
+        return
+      for (x, y), xy in zip([(a, b), (c, d)], [ab, cd]):
+        x_, y_ = xy.points
+        if {x, y} == {x_, y_}:
+          continue
+        d = Dependency('collx', [x, y, x_, y_], None, level)
+        self.why += [d.why_me_or_cache(g, level)]
+      whypara = g.why_equal(ab, cd, None)
+      self.why += whypara
+    elif self.name == 'midp':
+      m, a, b = self.args
+      ma = g._get_segment(m, a)
+      mb = g._get_segment(m, b)
+      dep = Dependency('coll', [m, a, b], None, None).why_me_or_cache(g, None)
+      self.why = [dep] + g.why_equal(ma, mb, level)
+    elif self.name == 'perp':
+      a, b, c, d = self.args
+      ab = g._get_line(a, b)
+      cd = g._get_line(c, d)
+      for (x, y), xy in zip([(a, b), (c, d)], [ab, cd]):
+        x_, y_ = xy.points
+        if {x, y} == {x_, y_}:
+          continue
+        d = Dependency('collx', [x, y, x_, y_], None, level)
+        self.why += [d.why_me_or_cache(g, level)]
+      _, why = why_eqangle(ab._val, cd._val, cd._val, ab._val, level)
+      a, b = ab.points
+      c, d = cd.points
+      if hashed(self.name, [a, b, c, d]) != self.hashed():
+        d = Dependency(self.name, [a, b, c, d], None, level)
+        d.why = why
+        why = [d]
+      self.why += why
+    elif self.name == 'cong':
+      a, b, c, d = self.args
+      ab = g._get_segment(a, b)
+      cd = g._get_segment(c, d)
+      self.why = g.why_equal(ab, cd, level)
+    elif self.name == 'coll':
+      _, why = gm.line_of_and_why(self.args, level)
+      self.why = why
+    elif self.name == 'collx':
+      if g.check_coll(self.args):
+        args = list(set(self.args))
+        hashed_me = hashed('coll', args)
+        if hashed_me in g.cache:
+          dep = g.cache[hashed_me]
+          self.why = [dep]
+          self.rule_name = ''
+          return
+        _, self.why = gm.line_of_and_why(args, level)
+      else:
+        self.name = 'para'
+        self.why_me(g, level)
+    elif self.name == 'cyclic':
+      _, why = gm.circle_of_and_why(self.args, level)
+      self.why = why
+    elif self.name == 'circle':
+      o, a, b, c = self.args
+      oa = g._get_segment(o, a)
+      ob = g._get_segment(o, b)
+      oc = g._get_segment(o, c)
+      self.why = g.why_equal(oa, ob, level) + g.why_equal(oa, oc, level)
+    elif self.name in ['eqangle', 'eqangle6']:
+      a, b, c, d, m, n, p, q = self.args
+      ab, why1 = g.get_line_thru_pair_why(a, b)
+      cd, why2 = g.get_line_thru_pair_why(c, d)
+      mn, why3 = g.get_line_thru_pair_why(m, n)
+      pq, why4 = g.get_line_thru_pair_why(p, q)
+      if ab is None or cd is None or mn is None or pq is None:
+        if {a, b} == {m, n}:
+          d = Dependency('para', [c, d, p, q], None, level)
+          self.why = [d.why_me_or_cache(g, level)]
+        if {a, b} == {c, d}:
+          d = Dependency('para', [p, q, m, n], None, level)
+          self.why = [d.why_me_or_cache(g, level)]
+        if {c, d} == {p, q}:
+          d = Dependency('para', [a, b, m, n], None, level)
+          self.why = [d.why_me_or_cache(g, level)]
+        if {p, q} == {m, n}:
+          d = Dependency('para', [a, b, c, d], None, level)
+          self.why = [d.why_me_or_cache(g, level)]
+        return
+      for (x, y), xy, whyxy in zip(
+          [(a, b), (c, d), (m, n), (p, q)],
+          [ab, cd, mn, pq],
+          [why1, why2, why3, why4],
+      ):
+        x_, y_ = xy.points
+        if {x, y} == {x_, y_}:
+          continue
+        d = Dependency('collx', [x, y, x_, y_], None, level)
+        d.why = whyxy
+        self.why += [d]
+      a, b = ab.points
+      c, d = cd.points
+      m, n = mn.points
+      p, q = pq.points
+      diff = hashed(self.name, [a, b, c, d, m, n, p, q]) != self.hashed()
+      whyeqangle = None
+      if ab._val and cd._val and mn._val and pq._val:
+        whyeqangle = why_eqangle(ab._val, cd._val, mn._val, pq._val, level)
+      if whyeqangle:
+        (dab, dcd, dmn, dpq), whyeqangle = whyeqangle
+        if diff:
+          d = Dependency('eqangle', [a, b, c, d, m, n, p, q], None, level)
+          d.why = whyeqangle
+          whyeqangle = [d]
+        self.why += whyeqangle
+      else:
+        if (ab == cd and mn == pq) or (ab == mn and cd == pq):
+          self.why += []
+        elif ab == mn:
+          self.why += maybe_make_equal_pairs(
+              a, b, c, d, m, n, p, q, ab, mn, g, level
+          )
+        elif cd == pq:
+          self.why += maybe_make_equal_pairs(
+              c, d, a, b, p, q, m, n, cd, pq, g, level
+          )
+        elif ab == cd:
+          self.why += maybe_make_equal_pairs(
+              a, b, m, n, c, d, p, q, ab, cd, g, level
+          )
+        elif mn == pq:
+          self.why += maybe_make_equal_pairs(
+              m, n, a, b, p, q, c, d, mn, pq, g, level
+          )
+        elif g.is_equal(ab, mn) or g.is_equal(cd, pq):
+          dep1 = Dependency('para', [a, b, m, n], None, level)
+          dep1.why_me(g, level)
+          dep2 = Dependency('para', [c, d, p, q], None, level)
+          dep2.why_me(g, level)
+          self.why += [dep1, dep2]
+        elif g.is_equal(ab, cd) or g.is_equal(mn, pq):
+          dep1 = Dependency('para', [a, b, c, d], None, level)
+          dep1.why_me(g, level)
+          dep2 = Dependency('para', [m, n, p, q], None, level)
+          dep2.why_me(g, level)
+          self.why += [dep1, dep2]
+        elif ab._val and cd._val and mn._val and pq._val:
+          self.why = why_eqangle(ab._val, cd._val, mn._val, pq._val, level)
+    elif self.name in ['eqratio', 'eqratio6']:
+      a, b, c, d, m, n, p, q = self.args
+      ab = g._get_segment(a, b)
+      cd = g._get_segment(c, d)
+      mn = g._get_segment(m, n)
+      pq = g._get_segment(p, q)
+      if ab is None or cd is None or mn is None or pq is None:
+        if {a, b} == {m, n}:
+          d = Dependency('cong', [c, d, p, q], None, level)
+          self.why = [d.why_me_or_cache(g, level)]
+        if {a, b} == {c, d}:
+          d = Dependency('cong', [p, q, m, n], None, level)
+          self.why = [d.why_me_or_cache(g, level)]
+        if {c, d} == {p, q}:
+          d = Dependency('cong', [a, b, m, n], None, level)
+          self.why = [d.why_me_or_cache(g, level)]
+        if {p, q} == {m, n}:
+          d = Dependency('cong', [a, b, c, d], None, level)
+          self.why = [d.why_me_or_cache(g, level)]
+        return
+      if ab._val and cd._val and mn._val and pq._val:
+        self.why = why_eqratio(ab._val, cd._val, mn._val, pq._val, level)
+      if self.why is None:
+        self.why = []
+        if (ab == cd and mn == pq) or (ab == mn and cd == pq):
+          self.why = []
+        elif ab == mn:
+          self.why += maybe_make_equal_pairs(
+              a, b, c, d, m, n, p, q, ab, mn, g, level
+          )
+        elif cd == pq:
+          self.why += maybe_make_equal_pairs(
+              c, d, a, b, p, q, m, n, cd, pq, g, level
+          )
+        elif ab == cd:
+          self.why += maybe_make_equal_pairs(
+              a, b, m, n, c, d, p, q, ab, cd, g, level
+          )
+        elif mn == pq:
+          self.why += maybe_make_equal_pairs(
+              m, n, a, b, p, q, c, d, mn, pq, g, level
+          )
+        elif g.is_equal(ab, mn) or g.is_equal(cd, pq):
+          dep1 = Dependency('cong', [a, b, m, n], None, level)
+          dep1.why_me(g, level)
+          dep2 = Dependency('cong', [c, d, p, q], None, level)
+          dep2.why_me(g, level)
+          self.why += [dep1, dep2]
+        elif g.is_equal(ab, cd) or g.is_equal(mn, pq):
+          dep1 = Dependency('cong', [a, b, c, d], None, level)
+          dep1.why_me(g, level)
+          dep2 = Dependency('cong', [m, n, p, q], None, level)
+          dep2.why_me(g, level)
+          self.why += [dep1, dep2]
+        elif ab._val and cd._val and mn._val and pq._val:
+          self.why = why_eqangle(ab._val, cd._val, mn._val, pq._val, level)
+    elif self.name in ['diff', 'npara', 'nperp', 'ncoll', 'sameside']:
+      self.why = []
+    elif self.name == 'simtri':
+      a, b, c, x, y, z = self.args
+      dep1 = Dependency('eqangle', [a, b, a, c, x, y, x, z], '', level)
+      dep1.why_me(g, level)
+      dep2 = Dependency('eqangle', [b, a, b, c, y, x, y, z], '', level)
+      dep2.why_me(g, level)
+      self.rule_name = 'r34'
+      self.why = [dep1, dep2]
+    elif self.name == 'contri':
+      a, b, c, x, y, z = self.args
+      dep1 = Dependency('cong', [a, b, x, y], '', level)
+      dep1.why_me(g, level)
+      dep2 = Dependency('cong', [b, c, y, z], '', level)
+      dep2.why_me(g, level)
+      dep3 = Dependency('cong', [c, a, z, x], '', level)
+      dep3.why_me(g, level)
+      self.rule_name = 'r32'
+      self.why = [dep1, dep2, dep3]
+    elif self.name == 'ind':
+      pass
+    elif self.name == 'aconst':
+      a, b, c, d, ang0 = self.args
+      measure = ang0._val
+      for ang in measure.neighbors(gm.Angle):
+        if ang == ang0:
+          continue
+        d1, d2 = ang._d
+        l1, l2 = d1._obj, d2._obj
+        (a1, b1), (c1, d1) = l1.points, l2.points
+        if not g.check_para_or_coll([a, b, a1, b1]) or not g.check_para_or_coll(
+            [c, d, c1, d1]
+        ):
+          continue
+        self.why = []
+        for args in [(a, b, a1, b1), (c, d, c1, d1)]:
+          if g.check_coll(args):
+            if len(set(args)) > 2:
+              dep = Dependency('coll', args, None, None)
+              self.why.append(dep.why_me_or_cache(g, level))
+          else:
+            dep = Dependency('para', args, None, None)
+            self.why.append(dep.why_me_or_cache(g, level))
+        self.why += gm.why_equal(ang, ang0)
+        break
+    elif self.name == 'rconst':
+      a, b, c, d, rat0 = self.args
+      val = rat0._val
+      for rat in val.neighbors(gm.Ratio):
+        if rat == rat0:
+          continue
+        l1, l2 = rat._l
+        s1, s2 = l1._obj, l2._obj
+        (a1, b1), (c1, d1) = list(s1.points), list(s2.points)
+        if not g.check_cong([a, b, a1, b1]) or not g.check_cong([c, d, c1, d1]):
+          continue
+        self.why = []
+        for args in [(a, b, a1, b1), (c, d, c1, d1)]:
+          if len(set(args)) > 2:
+            dep = Dependency('cong', args, None, None)
+            self.why.append(dep.why_me_or_cache(g, level))
+        self.why += gm.why_equal(rat, rat0)
+        break
+    else:
+      raise ValueError('Not recognize', self.name)
+  def hashed(self, rename: bool = False) -> tuple[str, ...]:
+    return hashed(self.name, self.args, rename=rename)
+def hashed(
+    name: str, args: list[gm.Point], rename: bool = False
+) -> tuple[str, ...]:
+  if name == 's_angle':
+    args = [p.name if not rename else p.new_name for p in args[:-1]] + [
+        str(args[-1])
+    ]
+  else:
+    args = [p.name if not rename else p.new_name for p in args]
+  return hashed_txt(name, args)
+def hashed_txt(name: str, args: list[str]) -> tuple[str, ...]:
+  """Return a tuple unique to name and args upto arg permutation equivariant."""
+  if name in ['const', 'aconst', 'rconst']:
+    a, b, c, d, y = args
+    a, b = sorted([a, b])
+    c, d = sorted([c, d])
+    return name, a, b, c, d, y
+  if name in ['npara', 'nperp', 'para', 'cong', 'perp', 'collx']:
+    a, b, c, d = args
+    a, b = sorted([a, b])
+    c, d = sorted([c, d])
+    (a, b), (c, d) = sorted([(a, b), (c, d)])
+    return (name, a, b, c, d)
+  if name in ['midp', 'midpoint']:
+    a, b, c = args
+    b, c = sorted([b, c])
+    return (name, a, b, c)
+  if name in ['coll', 'cyclic', 'ncoll', 'diff', 'triangle']:
+    return (name,) + tuple(sorted(list(set(args))))
+  if name == 'circle':
+    x, a, b, c = args
+    return (name, x) + tuple(sorted([a, b, c]))
+  if name in ['eqangle', 'eqratio', 'eqangle6', 'eqratio6']:
+    a, b, c, d, e, f, g, h = args
+    a, b = sorted([a, b])
+    c, d = sorted([c, d])
+    e, f = sorted([e, f])
+    g, h = sorted([g, h])
+    if tuple(sorted([a, b, e, f])) > tuple(sorted([c, d, g, h])):
+      a, b, e, f, c, d, g, h = c, d, g, h, a, b, e, f
+    if (a, b, c, d) > (e, f, g, h):
+      a, b, c, d, e, f, g, h = e, f, g, h, a, b, c, d
+    if name == 'eqangle6':
+      name = 'eqangle'
+    if name == 'eqratio6':
+      name = 'eqratio'
+    return (name,) + (a, b, c, d, e, f, g, h)
+  if name in ['contri', 'simtri', 'simtri2', 'contri2', 'contri*', 'simtri*']:
+    a, b, c, x, y, z = args
+    (a, x), (b, y), (c, z) = sorted([(a, x), (b, y), (c, z)], key=sorted)
+    (a, b, c), (x, y, z) = sorted([(a, b, c), (x, y, z)], key=sorted)
+    return (name, a, b, c, x, y, z)
+  if name in ['eqratio3']:
+    a, b, c, d, o, o = args  # pylint: disable=redeclared-assigned-name
+    (a, c), (b, d) = sorted([(a, c), (b, d)], key=sorted)
+    (a, b), (c, d) = sorted([(a, b), (c, d)], key=sorted)
+    return (name, a, b, c, d, o, o)
+  if name in ['sameside', 's_angle']:
+    return (name,) + tuple(args)
+  raise ValueError(f'Not recognize {name} to hash.')

external/alphageometry/problem_test.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit tests for problem.py."""
+import unittest
+from absl.testing import absltest
+import problem as pr
+class ProblemTest(unittest.TestCase):
+  @classmethod
+  def setUpClass(cls):
+    super().setUpClass()
+    cls.defs = pr.Definition.from_txt_file('defs.txt', to_dict=True)
+  def test_orthocenter_no_translate(self):
+    txt = 'a b c = triangle a b c; h = on_tline h b a c, on_tline h c a b ? perp a h b c'  # pylint: disable=line-too-long
+    # read the txt into pr.Problem object, do not change the name of points:
+    p = pr.Problem.from_txt(txt, translate=False)
+    # This is fed into the LM, translating from constructive to constrained:
+    setup_str = p.setup_str_from_problem(ProblemTest.defs)
+    self.assertEqual(
+        setup_str,
+        '{S} a : ; b : ; c : ; h : T a b c h 00 T a c b h 01 ? T a h b c',
+    )
+  def test_orthocenter_translate(self):
+    txt = 'a b c = triangle a b c; h = on_tline h b a c, on_tline h c a b ? perp a h b c'  # pylint: disable=line-too-long
+    # Read the txt into pr.Problem object, change h -> d to match
+    # training data distribution.
+    p = pr.Problem.from_txt(txt, translate=True)
+    # This is fed into the LM, translating from constructive to constrained:
+    setup_str = p.setup_str_from_problem(ProblemTest.defs)
+    self.assertEqual(
+        setup_str,
+        '{S} a : ; b : ; c : ; d : T a b c d 00 T a c b d 01 ? T a d b c',
+    )
+if __name__ == '__main__':
+  absltest.main()

external/alphageometry/requirements.in ADDED Viewed

	@@ -0,0 +1,17 @@

+tensorflow==2.13.0
+numpy==1.23.5
+scipy==1.10.0
+matplotlib==3.7.0
+gdown==4.7.1
+jax==0.4.6
+jaxlib==0.4.6
+flax==0.5.3
+gin-config==0.5.0
+gin==0.1.6
+t5==0.9.4
+sentencepiece==0.1.99
+absl-py==1.4.0
+clu==0.0.7
+optax==0.1.7
+seqio==0.0.18
+tensorflow-datasets==4.9.3

external/alphageometry/requirements.txt ADDED Viewed

The diff for this file is too large to render. See raw diff