ai / _shared_logic.py
celik-muhammed's picture
Upload _shared_logic.py
377a993 verified
# _shared_logic.py v6.0.0
#
# Single source of truth for shared constants, pure helper functions, and
# type aliases used by the deployed proxy (_hf_spaces_proxy/app.py) and the
# local development proxy (dev_proxy.py).
#
# Import discipline
# -----------------
# Only the Python standard library is imported here. httpx, fastapi, and
# torch are NOT imported so this module can be sourced by stdlib-only tools
# (dev_proxy) and tested in isolation without any network or GPU environment.
#
# Routing paths (v6.0.0)
# ----------------------
# Three ordered routing paths β€” each with its own configurable read timeout:
#
# Path 1 β€” BACKEND_URL set (explicit override)
# Forward to BACKEND_URL. HF_TOKEN injected when also set.
# Read timeout: proxy_timeout kwarg (env: PROXY_TIMEOUT, default 600 s).
#
# Path 2 β€” Model namespace in HF_SPACES_MODEL_NAMESPACES
# Model owner (e.g. "scikit-plots") matches a custom namespace.
# Forward to HF_SPACES_MODEL_URL (the ai-model HF Space, CPU inference).
# These models have no HF Inference Provider β†’ direct HF API returns 404/503.
# Read timeout: path2_read_timeout kwarg (env: PATH2_TIMEOUT, default 600 s).
# CPU inference on a 7B model takes 4-5 minutes; 600 s gives safe headroom.
#
# Path 3 β€” Standard HF Inference API (default)
# Model has a registered HF Inference Provider (openai/*, Qwen/*, etc.).
# Forward to HF_BASE/{model}/v1/chat/completions with HF_TOKEN.
# Read timeout: path3_read_timeout kwarg (env: PATH3_TIMEOUT, default 120 s).
# HF Serverless API (GPU-backed) normally responds within 30-90 s.
#
# Breaking changes v4.0.0 β†’ v5.0.0
# ----------------------------------
# + DEFAULT_PROXY_TIMEOUT raised from 120 s to 600 s.
# Root cause: 120 s was shorter than the 4-5 min CPU inference on the
# ai-model HF Space, causing every request to return a network error.
# + DEFAULT_PATH2_READ_TIMEOUT added (600 s) β€” ai-model space per-path timeout.
# + DEFAULT_PATH3_READ_TIMEOUT added (120 s) β€” HF API per-path timeout.
# + _resolve_upstream_url now accepts path2_read_timeout, path3_read_timeout,
# and proxy_timeout keyword-only parameters.
# + _resolve_upstream_url return type changed from tuple[str, dict] to
# tuple[str, dict, float] β€” the third element is the per-path read timeout.
# Callers must unpack all three values.
# + load_proxy_env extended with path2_read_timeout and path3_read_timeout.
#
# Breaking changes v5.0.0 β†’ v6.0.0
# ----------------------------------
# + DEFAULT_HF_BASE changed from ``https://api-inference.huggingface.co/models``
# to ``https://router.huggingface.co``.
# Root cause: api-inference.huggingface.co was DNS-unresolvable ([Errno -5]
# EAI_NODATA / EAI_NONAME) from within HF Docker Spaces.
# router.huggingface.co is the current HF Inference Providers endpoint and
# resolves correctly in all deployment environments.
# Callers who hard-code ``HF_BASE`` to the old hostname must migrate to
# the new router URL.
#
# SPDX-License-Identifier: BSD-3-Clause
# Authors: The scikit-plots developers
"""
Shared utilities for the sphinx-ai-assistant proxy solutions.
This module provides pure, stateless helper functions and typed constants
that are common to all server-side proxy implementations. It has **no**
runtime dependencies beyond the Python standard library.
Public API:
PROXY_VERSION : str
Proxy release version string.
DEFAULT_HF_BASE : str
HuggingFace Serverless Inference API base URL.
DEFAULT_MODEL : str
Fallback model ID when the request body omits ``model``.
DEFAULT_PROXY_TIMEOUT : int
Global upstream read timeout in seconds (Path 1 / backward-compat).
DEFAULT_PATH2_READ_TIMEOUT : float
Per-path read timeout for Path 2 (ai-model space, CPU inference).
DEFAULT_PATH3_READ_TIMEOUT : float
Per-path read timeout for Path 3 (HF Serverless Inference API).
DEFAULT_MAX_BODY_BYTES : int
Maximum accepted request body size.
DEFAULT_HF_SPACES_MODEL_URL : str
Default URL for the custom ai-model HF Space (Path 2).
DEFAULT_HF_SPACES_MODEL_NAMESPACES : tuple[str, ...]
Default model owner namespaces routed to the model Space (Path 2).
_safe_int : callable
Parse an integer environment variable with a safe fallback.
_parse_model : callable
Extract the ``model`` field from a raw JSON request body.
_is_custom_model_namespace : callable
Return True when a model's owner namespace is in the custom list.
_build_cors_headers : callable
Return the CORS response-header mapping.
_token_log_fragment : callable
Produce a safely-truncated token string for log output.
_resolve_upstream_url : callable
Centralised three-path routing: choose upstream URL, auth headers,
and per-path read timeout.
_validate_env : callable
Fail-fast startup check with actionable error messages.
load_proxy_env : callable
Read all proxy-relevant environment variables into a typed dict.
Notes
-----
**Developer note** β€” All functions are pure (no side effects, no I/O).
Tests can import this module without a running event loop or any network.
The proxy (FastAPI / asyncio) and dev_proxy (stdlib HTTPServer) both import
from here so that routing and CORS logic are *never* duplicated.
**Breaking change v5.0.0** β€” ``_resolve_upstream_url`` now returns a
3-tuple ``(url, headers, read_timeout_s: float)`` instead of the previous
2-tuple ``(url, headers)``. All callers must unpack the third element or
the per-path timeout falls through to the old flat-timeout behaviour.
**Breaking change v6.0.0** β€” :data:`DEFAULT_HF_BASE` migrated from
``https://api-inference.huggingface.co/models`` to
``https://router.huggingface.co``. The old hostname was DNS-unresolvable
([Errno -5] EAI_NONAME) from within HF Docker Spaces. Deployments that
override ``HF_BASE`` to the legacy hostname must update their configuration.
**Security note** β€” :func:`_token_log_fragment` ensures the full API token
never appears in log output. Never widen the exposed fragment beyond the
current 8+4 character window without reviewing log-aggregation policy first.
**Versioning note** β€” Bump :data:`PROXY_VERSION` on every breaking change so
deployed Spaces and log aggregators can correlate errors to a specific release.
"""
from __future__ import annotations
import json
import os
from typing import Any
__all__ = [
# Constants
"DEFAULT_HF_BASE",
"DEFAULT_HF_SPACES_MODEL_NAMESPACES",
"DEFAULT_HF_SPACES_MODEL_URL",
"DEFAULT_MAX_BODY_BYTES",
"DEFAULT_MODEL",
"DEFAULT_PATH2_READ_TIMEOUT",
"DEFAULT_PATH3_READ_TIMEOUT",
"DEFAULT_PROXY_TIMEOUT",
"PROXY_VERSION",
# Helpers
"_build_cors_headers",
"_is_custom_model_namespace",
"_parse_model",
"_resolve_upstream_url",
"_safe_int",
"_token_log_fragment",
"_validate_env",
"load_proxy_env",
]
# ─────────────────────────────────────────────────────────────────────────────
# Module-level constants
# ─────────────────────────────────────────────────────────────────────────────
#: Proxy release version β€” bump on every breaking change.
PROXY_VERSION: str = "6.0.0"
#: HuggingFace Inference Providers router base URL (no trailing slash).
#: Only used for Path 3 (standard provider models) when ``BACKEND_URL`` is
#: empty and the model namespace is not in ``HF_SPACES_MODEL_NAMESPACES``.
#:
#: Migrated from ``https://api-inference.huggingface.co/models`` (v5.0.0) to
#: ``https://router.huggingface.co`` (v6.0.0).
#: Root cause: api-inference.huggingface.co was DNS-unresolvable ([Errno -5]
#: EAI_NODATA / EAI_NONAME) from within HF Docker Spaces; the router hostname
#: resolves correctly and is the current HF Inference Providers endpoint.
DEFAULT_HF_BASE: str = "https://router.huggingface.co"
#: Fallback model ID when the request body omits the ``model`` field.
#: Must have a registered HF Inference Provider for Path 3.
DEFAULT_MODEL: str = "scikit-plots/Qwen2.5-Coder-32B-Instruct"
#: Global upstream read timeout in seconds (used for Path 1 / backward compat).
#:
#: Raised from 120 s (v4.0.0) to 600 s (v5.0.0).
#:
#: Root cause of the increase: the ai-model HF Space runs a 7B model on CPU
#: basic hardware. Cold-start inference (model loading + generation) takes
#: 4-5 minutes. The 120 s ceiling caused every request to the ai-model Space
#: to return ``httpx.ReadTimeout``, which the browser reported as
#: "Sorry, something went wrong: network error".
DEFAULT_PROXY_TIMEOUT: int = 600
#: Per-path read timeout for Path 2 (ai-model HF Space, CPU inference).
#:
#: CPU inference on a 7B model takes 4-5 minutes. 600 s gives 1 minute of
#: additional headroom for cold-start model loading (~50 s tokenizer +
#: ~50 s model load + ~4.5 min generation on the first request).
DEFAULT_PATH2_READ_TIMEOUT: float = 600.0
#: Per-path read timeout for Path 3 (HF Serverless Inference API).
#:
#: The HF Serverless API runs inference on GPU hardware. Most responses
#: arrive within 30-90 s. 120 s gives a comfortable margin.
DEFAULT_PATH3_READ_TIMEOUT: float = 120.0
#: Maximum accepted request body size in bytes (10 MiB).
#: Prevents memory exhaustion from maliciously oversized POST bodies.
DEFAULT_MAX_BODY_BYTES: int = 10 * 1024 * 1024 # 10 MiB
#: Default URL for the custom ai-model HF Space (Path 2).
#: Requests for models whose namespace is in ``DEFAULT_HF_SPACES_MODEL_NAMESPACES``
#: are forwarded here instead of the HF Serverless Inference API.
#: Overridable via the ``HF_SPACES_MODEL_URL`` environment variable.
DEFAULT_HF_SPACES_MODEL_URL: str = (
"https://scikit-plots-ai-model.hf.space/v1/chat/completions"
)
#: Default model owner namespaces routed to :data:`DEFAULT_HF_SPACES_MODEL_URL`.
#: Models whose owner (the part before ``/``) matches any entry in this tuple
#: are routed to the ai-model Space (Path 2) rather than the HF API (Path 3).
#: Overridable via the ``HF_SPACES_MODEL_NAMESPACES`` environment variable.
DEFAULT_HF_SPACES_MODEL_NAMESPACES: tuple[str, ...] = ("scikit-plots",)
# ─────────────────────────────────────────────────────────────────────────────
# Pure helper functions
# ─────────────────────────────────────────────────────────────────────────────
def _safe_int(value: str | None, default: int) -> int:
"""
Parse *value* as an integer, returning *default* on any failure.
Parameters
----------
value : str or None
String to parse. Typically the raw value of an environment variable
(may be ``None`` when the variable is absent).
default : int
Returned when *value* is ``None``, empty, or cannot be converted.
Returns
-------
int
Parsed integer, or *default* on any ``ValueError`` / ``TypeError``.
Notes
-----
**Developer note** β€” This function is intentionally never-raise.
A misconfigured ``PROXY_TIMEOUT`` or ``MAX_BODY_BYTES`` must not prevent
the proxy from starting β€” the safe default is better than a crash.
Examples
--------
>>> _safe_int("120", 60)
120
>>> _safe_int("not-a-number", 60)
60
>>> _safe_int(None, 60)
60
>>> _safe_int("", 60)
60
"""
if value is None:
return default
try:
return int(value)
except (ValueError, TypeError):
return default
def _safe_float(value: str | None, default: float) -> float:
"""
Parse *value* as a float, returning *default* on any failure.
Parameters
----------
value : str or None
String to parse. Typically the raw value of an environment variable.
default : float
Returned when *value* is ``None``, empty, or cannot be converted.
Returns
-------
float
Parsed float, or *default* on any ``ValueError`` / ``TypeError``.
Notes
-----
**Developer note** β€” Like :func:`_safe_int`, this is intentionally
never-raise. A misconfigured ``PATH2_TIMEOUT`` or ``PATH3_TIMEOUT``
must not crash the proxy at startup.
Examples
--------
>>> _safe_float("600.0", 120.0)
600.0
>>> _safe_float("bad", 120.0)
120.0
>>> _safe_float(None, 120.0)
120.0
"""
if value is None:
return default
try:
return float(value)
except (ValueError, TypeError):
return default
def _parse_model(body: bytes, default: str = DEFAULT_MODEL) -> str:
"""
Extract the ``model`` field from a raw JSON request body.
Parameters
----------
body : bytes
Raw HTTP request body forwarded from the browser. Expected to be
valid JSON but the function never raises on malformed input.
default : str, optional
Fallback model ID when the field is absent or the body cannot be
decoded. Defaults to :data:`DEFAULT_MODEL`.
Returns
-------
str
The ``model`` value from the body, or *default* if the field is
absent, empty, or the body is not valid JSON.
Notes
-----
**Developer note** β€” This function is intentionally never-raise.
A malformed body must not crash the proxy; the upstream model backend
will return a meaningful error that the browser can display.
Examples
--------
>>> _parse_model(b'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct"}')
'Qwen/Qwen2.5-Coder-7B-Instruct'
>>> _parse_model(b"{}")
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
>>> _parse_model(b"not-json")
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
>>> _parse_model(b'{"model": " "}')
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
"""
try:
data: Any = json.loads(body)
candidate = str(data.get("model", "")).strip()
return candidate or default
except (json.JSONDecodeError, ValueError, AttributeError, TypeError):
return default
def _is_custom_model_namespace(
model: str,
namespaces: tuple[str, ...] | list[str],
) -> bool:
"""
Return ``True`` when the model owner namespace is in *namespaces*.
The owner is the portion of the model ID before the first ``/``.
An optional HF Router variant suffix (e.g. ``:fastest``) is stripped
before comparison so ``"scikit-plots/Qwen2.5-Coder-7B-Instruct:fastest"``
is correctly identified as belonging to the ``"scikit-plots"`` namespace.
Parameters
----------
model : str
Model ID string, e.g. ``"scikit-plots/Qwen2.5-Coder-7B-Instruct"``
or ``"openai/gpt-oss-20b:fastest"``.
namespaces : tuple[str, ...] or list[str]
Iterable of owner namespace strings to match against (case-insensitive).
Typically :data:`DEFAULT_HF_SPACES_MODEL_NAMESPACES` or parsed from
the ``HF_SPACES_MODEL_NAMESPACES`` environment variable.
Returns
-------
bool
``True`` when the model owner is in *namespaces*, ``False`` otherwise.
Notes
-----
**Developer note** β€” Comparison is case-insensitive and strips leading /
trailing whitespace from both the model owner and each namespace entry.
A model string without a ``/`` separator (i.e. no namespace component)
always returns ``False``; such IDs are routed to Path 3 (HF Inference API).
Examples
--------
>>> _is_custom_model_namespace(
... "scikit-plots/Qwen2.5-Coder-7B-Instruct",
... ("scikit-plots",),
... )
True
>>> _is_custom_model_namespace(
... "scikit-plots/Qwen2.5-Coder-7B-Instruct:fastest",
... ("scikit-plots",),
... )
True
>>> _is_custom_model_namespace("openai/gpt-oss-20b", ("scikit-plots",))
False
>>> _is_custom_model_namespace("no-slash-model", ("scikit-plots",))
False
"""
base = model.split(":", maxsplit=1)[0].strip()
if not base or "/" not in base:
return False
owner = base.split("/", 1)[0].lower().strip()
normalised = {ns.lower().strip() for ns in namespaces if ns.strip()}
return owner in normalised
def _build_cors_headers(allowed_origin: str = "*") -> dict[str, str]:
"""
Return the standard CORS response-header mapping.
Parameters
----------
allowed_origin : str, optional
Value for the ``Access-Control-Allow-Origin`` header.
Defaults to ``"*"`` (allow all origins).
Returns
-------
dict[str, str]
CORS response headers.
Examples
--------
>>> headers = _build_cors_headers()
>>> headers["Access-Control-Allow-Origin"]
'*'
"""
return {
"Access-Control-Allow-Origin": allowed_origin,
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type",
}
def _token_log_fragment(token: str) -> str:
"""
Produce a safely-truncated token string for log output.
Parameters
----------
token : str
A HuggingFace API token (typically ``hf_xxx...``).
Returns
-------
str
A truncated representation showing the first 8 and last 4
characters separated by ``...``. Returns ``"<not-set>"`` when
*token* is empty or shorter than 12 characters.
Notes
-----
**Security note** β€” Never widen the exposed fragment beyond 8+4
characters without a log-aggregation security review.
Examples
--------
>>> _token_log_fragment("hf_abcdefghij1234")
'hf_abcde...1234'
>>> _token_log_fragment("")
'<not-set>'
"""
if not token or len(token) < 12: # noqa: PLR2004
return "<not-set>"
return f"{token[:8]}...{token[-4:]}"
def _resolve_upstream_url(
body: bytes,
*,
backend_url: str,
hf_token: str,
hf_base: str = DEFAULT_HF_BASE,
default_model: str = DEFAULT_MODEL,
hf_spaces_model_url: str = DEFAULT_HF_SPACES_MODEL_URL,
hf_spaces_model_namespaces: tuple[str, ...] | list[str] = DEFAULT_HF_SPACES_MODEL_NAMESPACES,
proxy_timeout: float = float(DEFAULT_PROXY_TIMEOUT),
path2_read_timeout: float = DEFAULT_PATH2_READ_TIMEOUT,
path3_read_timeout: float = DEFAULT_PATH3_READ_TIMEOUT,
) -> tuple[str, dict[str, str], float]:
"""
Centralised three-path routing β€” choose upstream endpoint, auth headers,
and per-path read timeout.
Priority
--------
1. *backend_url* is non-empty β†’ **Path 1**: explicit custom backend.
Forward to *backend_url* (Docker Model Runner, Ollama, any backend).
*hf_token* is injected only when it is also set.
Read timeout: *proxy_timeout* (env ``PROXY_TIMEOUT``, default 600 s).
2. Model namespace is in *hf_spaces_model_namespaces* β†’ **Path 2**: HF model Space.
Forward to *hf_spaces_model_url* (the ``scikit-plots/ai-model`` Space).
CPU inference on a 7B model takes 4-5 minutes; *path2_read_timeout*
(env ``PATH2_TIMEOUT``, default 600 s) prevents premature timeout.
*hf_token* is injected when set (needed for private Spaces).
3. Otherwise β†’ **Path 3**: HF Serverless Inference API (default).
Build ``{hf_base}/{model}/v1/chat/completions`` and inject *hf_token*
(always required for the HF API).
*path3_read_timeout* (env ``PATH3_TIMEOUT``, default 120 s) is
appropriate for GPU-backed HF API inference.
Parameters
----------
body : bytes
Raw JSON request body. Used to extract the ``model`` field for
Paths 2 and 3.
backend_url : str
Value of the ``BACKEND_URL`` environment variable. Non-empty string
triggers Path 1; empty string means "proceed to Path 2 / 3".
hf_token : str
HuggingFace API token. Required for Path 3; optional for Paths 1 and 2.
hf_base : str, optional
HF Serverless Inference API base URL (no trailing slash).
default_model : str, optional
Fallback model ID when the body omits the ``model`` field.
hf_spaces_model_url : str, optional
URL of the custom ai-model HF Space (Path 2 target).
hf_spaces_model_namespaces : tuple[str, ...] or list[str], optional
Model owner namespaces routed to *hf_spaces_model_url*.
proxy_timeout : float, optional
Read timeout (seconds) for Path 1. Default: 600 s.
path2_read_timeout : float, optional
Read timeout (seconds) for Path 2 (ai-model Space). Default: 600 s.
path3_read_timeout : float, optional
Read timeout (seconds) for Path 3 (HF Serverless API). Default: 120 s.
Returns
-------
url : str
Fully-qualified upstream endpoint URL.
headers : dict[str, str]
HTTP headers for the upstream POST request.
read_timeout_s : float
Per-path read timeout in seconds. Pass to ``httpx.Timeout(read=...)``.
Notes
-----
**Breaking change v5.0.0** β€” Return type changed from
``tuple[str, dict]`` to ``tuple[str, dict, float]``. All callers must
unpack the third element.
**Breaking change v6.0.0** β€” :data:`DEFAULT_HF_BASE` changed from
``https://api-inference.huggingface.co/models`` to
``https://router.huggingface.co``. The old hostname was DNS-unresolvable
from HF Docker Spaces ([Errno -5] EAI_NONAME).
**Developer note** β€” All routing logic lives here. To add a new backend
type, add a new branch in this function. Callers (``app.py``,
``dev_proxy.py``) remain unchanged when they already unpack 3 values.
Examples
--------
Path 2 β€” scikit-plots namespace β†’ ai-model Space:
>>> url, hdrs, t = _resolve_upstream_url(
... b'{"model":"scikit-plots/Qwen2.5-Coder-7B-Instruct","messages":[]}',
... backend_url="",
... hf_token="",
... )
>>> "scikit-plots-ai-model.hf.space" in url
True
>>> t
600.0
Path 3 β€” standard HF Inference API:
>>> url, hdrs, t = _resolve_upstream_url(
... b'{"model":"openai/gpt-oss-20b","messages":[]}',
... backend_url="",
... hf_token="hf_test_token_abc123",
... )
>>> "api-inference.huggingface.co" in url
True
>>> t
120.0
Path 1 β€” explicit BACKEND_URL:
>>> url, hdrs, t = _resolve_upstream_url(
... b"{}",
... backend_url="https://my-model.hf.space/v1/chat/completions",
... hf_token="",
... )
>>> url
'https://my-model.hf.space/v1/chat/completions'
>>> t
600.0
""" # noqa: D205
headers: dict[str, str] = {"Content-Type": "application/json"}
# ── Path 1: explicit custom backend override ──────────────────────────────
if backend_url:
if hf_token:
headers["Authorization"] = f"Bearer {hf_token}"
return backend_url, headers, proxy_timeout
# Extract model ID from request body (needed for Paths 2 and 3).
model: str = _parse_model(body, default=default_model)
# ── Path 2: custom model namespace β†’ HF Spaces model backend ─────────────
if hf_spaces_model_url and _is_custom_model_namespace(
model, hf_spaces_model_namespaces
):
if hf_token:
headers["Authorization"] = f"Bearer {hf_token}"
return hf_spaces_model_url, headers, path2_read_timeout
# ── Path 3: HF Serverless Inference API (provider models) ─────────────────
# router.huggingface.co is a flat OpenAI-compatible endpoint.
# The model is supplied in the request body (already present in `body`),
# NOT embedded in the URL path. The old api-inference.huggingface.co/models
# API DID embed the model in the path as /{model}/v1/chat/completions, but
# router.huggingface.co uses a single endpoint for all models:
# POST https://router.huggingface.co/v1/chat/completions
# body: {"model": "Qwen/Qwen2.5-Coder-7B-Instruct:nscale", ...}
# Embedding the model ID in the path produces a 404/422 with no log entry
# because _forward passes non-2xx upstream responses through transparently.
url = f"{hf_base.rstrip('/')}/v1/chat/completions"
headers["Authorization"] = f"Bearer {hf_token}"
return url, headers, path3_read_timeout
def _validate_env(
backend_url: str,
hf_token: str,
hf_spaces_model_url: str = DEFAULT_HF_SPACES_MODEL_URL,
) -> None:
"""
Validate the minimum required environment at proxy startup.
At least one of the three routing paths must be viable:
* **Path 1** β€” *backend_url* is non-empty.
* **Path 2** β€” *hf_spaces_model_url* is non-empty (serves custom namespace models).
* **Path 3** β€” *hf_token* is non-empty (HF Inference API for provider models).
Parameters
----------
backend_url : str
Value of the ``BACKEND_URL`` environment variable (may be empty).
hf_token : str
Value of the ``HF_TOKEN`` environment variable (may be empty).
hf_spaces_model_url : str, optional
Value of the ``HF_SPACES_MODEL_URL`` environment variable.
Raises
------
RuntimeError
When all three routing paths are disabled (all parameters are empty).
Examples
--------
>>> _validate_env("https://my-model.hf.space/v1/chat/completions", "", "")
>>> _validate_env("", "hf_mytoken", "")
>>> _validate_env(
... "", "", "https://scikit-plots-ai-model.hf.space/v1/chat/completions"
... )
>>> import pytest
>>> with pytest.raises(RuntimeError, match="no viable routing path"):
... _validate_env("", "", "")
"""
if not backend_url and not hf_token and not hf_spaces_model_url:
raise RuntimeError(
"Proxy configuration error: no viable routing path configured.\n\n"
"Set at least ONE of the following in Space β†’ Settings β†’ Repository secrets:\n\n"
" Option 1 β€” HF Inference API (standard provider models):\n"
" HF_TOKEN = hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"
" DEFAULT_MODEL = openai/gpt-oss-20b\n\n"
" Option 2 β€” Custom ai-model Space (scikit-plots/* models):\n"
" HF_SPACES_MODEL_URL = "
"https://scikit-plots-ai-model.hf.space/v1/chat/completions\n\n"
" Option 3 β€” Explicit custom backend (DMR, Ollama, or any backend):\n"
" BACKEND_URL = http://localhost:12434/engines/llama.cpp/v1/chat/completions\n\n"
"See FREE_PROXY_SOLUTIONS.md for the full path decision tree."
)
def load_proxy_env() -> dict[str, Any]:
"""
Read all proxy-relevant environment variables and return a typed dict.
Returns
-------
dict[str, Any]
Keys and types:
``backend_url`` : str
``hf_token`` : str
``hf_base`` : str
``default_model`` : str
``hf_spaces_model_url`` : str
``hf_spaces_model_namespaces`` : tuple[str, ...]
``proxy_timeout`` : int
Global / Path 1 read timeout (env ``PROXY_TIMEOUT``).
``path2_read_timeout`` : float
Path 2 read timeout (env ``PATH2_TIMEOUT``).
``path3_read_timeout`` : float
Path 3 read timeout (env ``PATH3_TIMEOUT``).
``max_body_bytes`` : int
``allowed_origins`` : str
Examples
--------
>>> import os
>>> os.environ["PROXY_TIMEOUT"] = "600"
>>> cfg = load_proxy_env()
>>> cfg["proxy_timeout"]
600
>>> os.environ["PATH2_TIMEOUT"] = "900"
>>> cfg = load_proxy_env()
>>> cfg["path2_read_timeout"]
900.0
"""
_raw_namespaces: str = os.environ.get(
"HF_SPACES_MODEL_NAMESPACES",
",".join(DEFAULT_HF_SPACES_MODEL_NAMESPACES),
)
_parsed_namespaces: tuple[str, ...] = (
tuple(ns.strip() for ns in _raw_namespaces.split(",") if ns.strip())
or DEFAULT_HF_SPACES_MODEL_NAMESPACES
)
return {
"backend_url": os.environ.get("BACKEND_URL", "").strip(),
"hf_token": os.environ.get("HF_TOKEN", "").strip(),
"hf_base": os.environ.get("HF_BASE", DEFAULT_HF_BASE).rstrip("/"),
"default_model": (
os.environ.get("DEFAULT_MODEL", DEFAULT_MODEL).strip() or DEFAULT_MODEL
),
"hf_spaces_model_url": (
os.environ.get("HF_SPACES_MODEL_URL", DEFAULT_HF_SPACES_MODEL_URL).strip()
),
"hf_spaces_model_namespaces": _parsed_namespaces,
"proxy_timeout": _safe_int(
os.environ.get("PROXY_TIMEOUT"),
DEFAULT_PROXY_TIMEOUT,
),
"path2_read_timeout": _safe_float(
os.environ.get("PATH2_TIMEOUT"),
DEFAULT_PATH2_READ_TIMEOUT,
),
"path3_read_timeout": _safe_float(
os.environ.get("PATH3_TIMEOUT"),
DEFAULT_PATH3_READ_TIMEOUT,
),
"max_body_bytes": _safe_int(
os.environ.get("MAX_BODY_BYTES"),
DEFAULT_MAX_BODY_BYTES,
),
"allowed_origins": os.environ.get("ALLOWED_ORIGINS", "*").strip(),
}