Spaces:
Running
Running
Upload _shared_logic.py
Browse files- _shared_logic.py +21 -9
_shared_logic.py
CHANGED
|
@@ -339,9 +339,9 @@ def _parse_model(body: bytes, default: str = DEFAULT_MODEL) -> str:
|
|
| 339 |
--------
|
| 340 |
>>> _parse_model(b'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct"}')
|
| 341 |
'Qwen/Qwen2.5-Coder-7B-Instruct'
|
| 342 |
-
>>> _parse_model(b
|
| 343 |
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
|
| 344 |
-
>>> _parse_model(b
|
| 345 |
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
|
| 346 |
>>> _parse_model(b'{"model": " "}')
|
| 347 |
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
|
|
@@ -435,7 +435,7 @@ def _build_cors_headers(allowed_origin: str = "*") -> dict[str, str]:
|
|
| 435 |
'*'
|
| 436 |
"""
|
| 437 |
return {
|
| 438 |
-
"Access-Control-Allow-Origin":
|
| 439 |
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
| 440 |
"Access-Control-Allow-Headers": "Content-Type",
|
| 441 |
}
|
|
@@ -588,7 +588,7 @@ def _resolve_upstream_url(
|
|
| 588 |
Path 1 β explicit BACKEND_URL:
|
| 589 |
|
| 590 |
>>> url, hdrs, t = _resolve_upstream_url(
|
| 591 |
-
... b
|
| 592 |
... backend_url="https://my-model.hf.space/v1/chat/completions",
|
| 593 |
... hf_token="",
|
| 594 |
... )
|
|
@@ -617,7 +617,16 @@ def _resolve_upstream_url(
|
|
| 617 |
return hf_spaces_model_url, headers, path2_read_timeout
|
| 618 |
|
| 619 |
# ββ Path 3: HF Serverless Inference API (provider models) βββββββββββββββββ
|
| 620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
headers["Authorization"] = f"Bearer {hf_token}"
|
| 622 |
return url, headers, path3_read_timeout
|
| 623 |
|
|
@@ -654,7 +663,9 @@ def _validate_env(
|
|
| 654 |
--------
|
| 655 |
>>> _validate_env("https://my-model.hf.space/v1/chat/completions", "", "")
|
| 656 |
>>> _validate_env("", "hf_mytoken", "")
|
| 657 |
-
>>> _validate_env(
|
|
|
|
|
|
|
| 658 |
>>> import pytest
|
| 659 |
>>> with pytest.raises(RuntimeError, match="no viable routing path"):
|
| 660 |
... _validate_env("", "", "")
|
|
@@ -715,9 +726,10 @@ def load_proxy_env() -> dict[str, Any]:
|
|
| 715 |
"HF_SPACES_MODEL_NAMESPACES",
|
| 716 |
",".join(DEFAULT_HF_SPACES_MODEL_NAMESPACES),
|
| 717 |
)
|
| 718 |
-
_parsed_namespaces: tuple[str, ...] =
|
| 719 |
-
ns.strip() for ns in _raw_namespaces.split(",") if ns.strip()
|
| 720 |
-
|
|
|
|
| 721 |
|
| 722 |
return {
|
| 723 |
"backend_url": os.environ.get("BACKEND_URL", "").strip(),
|
|
|
|
| 339 |
--------
|
| 340 |
>>> _parse_model(b'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct"}')
|
| 341 |
'Qwen/Qwen2.5-Coder-7B-Instruct'
|
| 342 |
+
>>> _parse_model(b"{}")
|
| 343 |
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
|
| 344 |
+
>>> _parse_model(b"not-json")
|
| 345 |
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
|
| 346 |
>>> _parse_model(b'{"model": " "}')
|
| 347 |
'scikit-plots/Qwen2.5-Coder-32B-Instruct'
|
|
|
|
| 435 |
'*'
|
| 436 |
"""
|
| 437 |
return {
|
| 438 |
+
"Access-Control-Allow-Origin": allowed_origin,
|
| 439 |
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
| 440 |
"Access-Control-Allow-Headers": "Content-Type",
|
| 441 |
}
|
|
|
|
| 588 |
Path 1 β explicit BACKEND_URL:
|
| 589 |
|
| 590 |
>>> url, hdrs, t = _resolve_upstream_url(
|
| 591 |
+
... b"{}",
|
| 592 |
... backend_url="https://my-model.hf.space/v1/chat/completions",
|
| 593 |
... hf_token="",
|
| 594 |
... )
|
|
|
|
| 617 |
return hf_spaces_model_url, headers, path2_read_timeout
|
| 618 |
|
| 619 |
# ββ Path 3: HF Serverless Inference API (provider models) βββββββββββββββββ
|
| 620 |
+
# router.huggingface.co is a flat OpenAI-compatible endpoint.
|
| 621 |
+
# The model is supplied in the request body (already present in `body`),
|
| 622 |
+
# NOT embedded in the URL path. The old api-inference.huggingface.co/models
|
| 623 |
+
# API DID embed the model in the path as /{model}/v1/chat/completions, but
|
| 624 |
+
# router.huggingface.co uses a single endpoint for all models:
|
| 625 |
+
# POST https://router.huggingface.co/v1/chat/completions
|
| 626 |
+
# body: {"model": "Qwen/Qwen2.5-Coder-7B-Instruct:nscale", ...}
|
| 627 |
+
# Embedding the model ID in the path produces a 404/422 with no log entry
|
| 628 |
+
# because _forward passes non-2xx upstream responses through transparently.
|
| 629 |
+
url = f"{hf_base.rstrip('/')}/v1/chat/completions"
|
| 630 |
headers["Authorization"] = f"Bearer {hf_token}"
|
| 631 |
return url, headers, path3_read_timeout
|
| 632 |
|
|
|
|
| 663 |
--------
|
| 664 |
>>> _validate_env("https://my-model.hf.space/v1/chat/completions", "", "")
|
| 665 |
>>> _validate_env("", "hf_mytoken", "")
|
| 666 |
+
>>> _validate_env(
|
| 667 |
+
... "", "", "https://scikit-plots-ai-model.hf.space/v1/chat/completions"
|
| 668 |
+
... )
|
| 669 |
>>> import pytest
|
| 670 |
>>> with pytest.raises(RuntimeError, match="no viable routing path"):
|
| 671 |
... _validate_env("", "", "")
|
|
|
|
| 726 |
"HF_SPACES_MODEL_NAMESPACES",
|
| 727 |
",".join(DEFAULT_HF_SPACES_MODEL_NAMESPACES),
|
| 728 |
)
|
| 729 |
+
_parsed_namespaces: tuple[str, ...] = (
|
| 730 |
+
tuple(ns.strip() for ns in _raw_namespaces.split(",") if ns.strip())
|
| 731 |
+
or DEFAULT_HF_SPACES_MODEL_NAMESPACES
|
| 732 |
+
)
|
| 733 |
|
| 734 |
return {
|
| 735 |
"backend_url": os.environ.get("BACKEND_URL", "").strip(),
|