Buckets:
| """Runtime mode selection for TinyMind Omega++.""" | |
| from __future__ import annotations | |
| from enum import Enum | |
| import json | |
| from pathlib import Path | |
| from .config import OmegaConfig | |
| class RuntimeMode(str, Enum): | |
| BF16_QUALITY = "bf16_quality" | |
| INT4_SPARSE_FAST = "int4_sparse_fast" | |
| INT6_BRIDGE_IMMA_FAST = "int6_bridge_imma_fast" | |
| AUTO = "auto" | |
| def int6_bridge_available( | |
| report_path: str | Path = "reports/int6_bridge_imma_eval/int6_bridge_imma_eval_report.json", | |
| min_hardware_imma_tops: float = 100.0, | |
| ) -> bool: | |
| path = Path(report_path) | |
| if not path.exists(): | |
| return False | |
| try: | |
| report = json.loads(path.read_text(encoding="utf-8-sig")) | |
| except (OSError, json.JSONDecodeError): | |
| return False | |
| gate = report.get("claim_gate", {}) | |
| metrics = report.get("metrics", {}) | |
| return ( | |
| gate.get("fused_two_pass_kernel_measured") is True | |
| and gate.get("imma_sp_sass_observed") is True | |
| and float(metrics.get("avg_hardware_imma_tops", 0.0)) >= min_hardware_imma_tops | |
| ) | |
| def resolve_runtime_mode( | |
| cfg: OmegaConfig, | |
| cuda_available: bool, | |
| sparse_artifact_available: bool, | |
| int6_bridge_artifact_available: bool = False, | |
| requested: str | None = None, | |
| ) -> RuntimeMode: | |
| mode = requested or cfg.precision_mode | |
| if mode == RuntimeMode.INT6_BRIDGE_IMMA_FAST.value: | |
| if cuda_available and int6_bridge_artifact_available: | |
| return RuntimeMode.INT6_BRIDGE_IMMA_FAST | |
| if cuda_available and sparse_artifact_available: | |
| return RuntimeMode.INT4_SPARSE_FAST | |
| return RuntimeMode.BF16_QUALITY | |
| if mode == RuntimeMode.INT4_SPARSE_FAST.value: | |
| if cuda_available and sparse_artifact_available: | |
| return RuntimeMode.INT4_SPARSE_FAST | |
| return RuntimeMode.BF16_QUALITY | |
| if mode == RuntimeMode.AUTO.value: | |
| if cuda_available and int6_bridge_artifact_available: | |
| return RuntimeMode.INT6_BRIDGE_IMMA_FAST | |
| if cuda_available and sparse_artifact_available: | |
| return RuntimeMode.INT4_SPARSE_FAST | |
| return RuntimeMode.BF16_QUALITY | |
| return RuntimeMode.BF16_QUALITY | |
Xet Storage Details
- Size:
- 2.16 kB
- Xet hash:
- 7d486156df410a26a68053d221d2475c526aa288d07fafa538907f34f2721c24
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.