Spaces:
Sleeping
Sleeping
| """ | |
| Shared OpenAI-compatible runtime helpers for LLM-backed benchmark features. | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass | |
| from typing import Any | |
| from openai import OpenAI | |
| DEFAULT_API_BASE_URL = "https://router.huggingface.co/v1" | |
| DEFAULT_MODEL_NAME = "Qwen/Qwen2.5-72B-Instruct" | |
| class JsonCallResult: | |
| data: dict[str, Any] | |
| prompt_tokens: int | None | |
| completion_tokens: int | None | |
| def model_name() -> str: | |
| return os.getenv("MODEL_NAME", DEFAULT_MODEL_NAME) | |
| def resolve_llm_credentials() -> tuple[str | None, str | None, str | None]: | |
| api_base_url = os.getenv("API_BASE_URL", DEFAULT_API_BASE_URL) | |
| api_key = os.getenv("API_KEY") | |
| legacy_token = os.getenv("HF_TOKEN") | |
| if api_key: | |
| return api_base_url, api_key, "proxy" | |
| if legacy_token: | |
| return api_base_url, legacy_token, "legacy" | |
| return None, None, None | |
| def llm_configured() -> bool: | |
| _base_url, api_key, _auth_mode = resolve_llm_credentials() | |
| return bool(api_key) | |
| def _extract_json_object(text: str) -> dict[str, Any]: | |
| payload = text.strip() | |
| try: | |
| return json.loads(payload) | |
| except json.JSONDecodeError: | |
| match = re.search(r"\{.*\}", payload, re.DOTALL) | |
| if not match: | |
| raise | |
| return json.loads(match.group(0)) | |
| async def call_json( | |
| *, | |
| system_prompt: str, | |
| user_payload: dict[str, Any] | list[Any] | str, | |
| temperature: float = 0.0, | |
| max_output_tokens: int = 400, | |
| ) -> JsonCallResult: | |
| api_base_url, client_api_key, _auth_mode = resolve_llm_credentials() | |
| if not api_base_url or not client_api_key: | |
| raise RuntimeError("llm_credentials_missing") | |
| client = OpenAI(base_url=api_base_url, api_key=client_api_key) | |
| user_content = user_payload if isinstance(user_payload, str) else json.dumps(user_payload, ensure_ascii=True) | |
| def _call(): | |
| return client.chat.completions.create( | |
| model=model_name(), | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_content}, | |
| ], | |
| response_format={"type": "json_object"}, | |
| temperature=temperature, | |
| max_tokens=max_output_tokens, | |
| ) | |
| response = await asyncio.to_thread(_call) | |
| content = response.choices[0].message.content or "{}" | |
| usage = getattr(response, "usage", None) | |
| return JsonCallResult( | |
| data=_extract_json_object(content), | |
| prompt_tokens=getattr(usage, "prompt_tokens", None), | |
| completion_tokens=getattr(usage, "completion_tokens", None), | |
| ) | |
| async def estimate_tokens(text: str) -> int: | |
| cleaned = text.strip() | |
| if not cleaned: | |
| return 0 | |
| if not llm_configured(): | |
| return max(1, len(cleaned) // 4) | |
| result = await call_json( | |
| system_prompt=( | |
| "You are TOKEN_ESTIMATOR. Estimate how many model tokens the provided text would use " | |
| "for the current chat model. Return JSON with exactly one integer field: " | |
| '{"token_count": 123}' | |
| ), | |
| user_payload={"text": cleaned}, | |
| temperature=0.0, | |
| max_output_tokens=32, | |
| ) | |
| token_count = int(result.data.get("token_count", 0)) | |
| return max(1, token_count) | |