from __future__ import annotations import json from pathlib import Path _BASE_PATH = Path(__file__).parent / "data" / "throughputs.json" class LatencyEstimator: __DEFAULT_TPS = 80.0 def __init__(self, file_path: str | Path) -> None: with open(file_path, "r") as fd: data = json.load(fd) self.__throughputs = {} for el in data["models"]: self.__throughputs[(el["provider"], el["name"])] = el["throughput"] def get_throughput(self, provider: str, model_name: str) -> float: return float(self.__throughputs.get((provider, model_name), self.__DEFAULT_TPS)) def estimate(self, provider: str, model_name: str, output_tokens: int, throughput: float | None = None) -> float: if throughput is None: throughput = self.__throughputs.get((provider, model_name), self.__DEFAULT_TPS) return float(output_tokens / throughput) latency_estimator = LatencyEstimator(file_path=_BASE_PATH)