Spaces:
Sleeping
Sleeping
| from dataclasses import dataclass | |
| import httpx | |
| class RuntimeSnapshot: | |
| prompt_tps: float | None | |
| decode_tps: float | None | |
| requests_processing: int | None | |
| requests_deferred: int | None | |
| active_slots: int | |
| slot_context_tokens: tuple[int, ...] | |
| slot_context_size: int | None | |
| def parse_prometheus_value(text: str, name: str) -> float | None: | |
| prefix = f"{name} " | |
| for line in text.splitlines(): | |
| if line.startswith(prefix): | |
| return float(line.removeprefix(prefix)) | |
| return None | |
| def fetch_runtime_snapshot( | |
| client: httpx.Client, | |
| metrics_url: str, | |
| slots_url: str, | |
| ) -> RuntimeSnapshot: | |
| metrics_response = client.get(metrics_url) | |
| metrics_response.raise_for_status() | |
| slots_response = client.get(slots_url) | |
| slots_response.raise_for_status() | |
| metrics_text = metrics_response.text | |
| slots = slots_response.json() | |
| active_slots = [ | |
| slot | |
| for slot in slots | |
| if slot.get("is_processing") is True | |
| ] | |
| requests_processing = parse_prometheus_value( | |
| metrics_text, | |
| "llamacpp:requests_processing", | |
| ) | |
| requests_deferred = parse_prometheus_value( | |
| metrics_text, | |
| "llamacpp:requests_deferred", | |
| ) | |
| return RuntimeSnapshot( | |
| prompt_tps=parse_prometheus_value( | |
| metrics_text, | |
| "llamacpp:prompt_tokens_seconds", | |
| ), | |
| decode_tps=parse_prometheus_value( | |
| metrics_text, | |
| "llamacpp:predicted_tokens_seconds", | |
| ), | |
| requests_processing=( | |
| int(requests_processing) | |
| if requests_processing is not None | |
| else None | |
| ), | |
| requests_deferred=( | |
| int(requests_deferred) | |
| if requests_deferred is not None | |
| else None | |
| ), | |
| active_slots=len(active_slots), | |
| slot_context_tokens=tuple( | |
| int(slot["n_prompt_tokens"]) | |
| for slot in active_slots | |
| if slot.get("n_prompt_tokens") is not None | |
| ), | |
| slot_context_size=( | |
| int(slots[0]["n_ctx"]) | |
| if slots and slots[0].get("n_ctx") is not None | |
| else None | |
| ), | |
| ) |