bbkdevops's picture
download
raw
2.16 kB
"""Runtime mode selection for TinyMind Omega++."""
from __future__ import annotations
from enum import Enum
import json
from pathlib import Path
from .config import OmegaConfig
class RuntimeMode(str, Enum):
BF16_QUALITY = "bf16_quality"
INT4_SPARSE_FAST = "int4_sparse_fast"
INT6_BRIDGE_IMMA_FAST = "int6_bridge_imma_fast"
AUTO = "auto"
def int6_bridge_available(
report_path: str | Path = "reports/int6_bridge_imma_eval/int6_bridge_imma_eval_report.json",
min_hardware_imma_tops: float = 100.0,
) -> bool:
path = Path(report_path)
if not path.exists():
return False
try:
report = json.loads(path.read_text(encoding="utf-8-sig"))
except (OSError, json.JSONDecodeError):
return False
gate = report.get("claim_gate", {})
metrics = report.get("metrics", {})
return (
gate.get("fused_two_pass_kernel_measured") is True
and gate.get("imma_sp_sass_observed") is True
and float(metrics.get("avg_hardware_imma_tops", 0.0)) >= min_hardware_imma_tops
)
def resolve_runtime_mode(
cfg: OmegaConfig,
cuda_available: bool,
sparse_artifact_available: bool,
int6_bridge_artifact_available: bool = False,
requested: str | None = None,
) -> RuntimeMode:
mode = requested or cfg.precision_mode
if mode == RuntimeMode.INT6_BRIDGE_IMMA_FAST.value:
if cuda_available and int6_bridge_artifact_available:
return RuntimeMode.INT6_BRIDGE_IMMA_FAST
if cuda_available and sparse_artifact_available:
return RuntimeMode.INT4_SPARSE_FAST
return RuntimeMode.BF16_QUALITY
if mode == RuntimeMode.INT4_SPARSE_FAST.value:
if cuda_available and sparse_artifact_available:
return RuntimeMode.INT4_SPARSE_FAST
return RuntimeMode.BF16_QUALITY
if mode == RuntimeMode.AUTO.value:
if cuda_available and int6_bridge_artifact_available:
return RuntimeMode.INT6_BRIDGE_IMMA_FAST
if cuda_available and sparse_artifact_available:
return RuntimeMode.INT4_SPARSE_FAST
return RuntimeMode.BF16_QUALITY
return RuntimeMode.BF16_QUALITY

Xet Storage Details

Size:
2.16 kB
·
Xet hash:
7d486156df410a26a68053d221d2475c526aa288d07fafa538907f34f2721c24

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.