Spaces:

scikit-plots
/

ai

Running

App Files Files Community

celik-muhammed commited on Jun 1

Commit

377a993

verified ·

1 Parent(s): 7da0afd

Upload _shared_logic.py

Browse files

Files changed (1) hide show

_shared_logic.py +21 -9

_shared_logic.py CHANGED Viewed

@@ -339,9 +339,9 @@ def _parse_model(body: bytes, default: str = DEFAULT_MODEL) -> str:
     --------
     >>> _parse_model(b'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct"}')
     'Qwen/Qwen2.5-Coder-7B-Instruct'
-    >>> _parse_model(b'{}')
     'scikit-plots/Qwen2.5-Coder-32B-Instruct'
-    >>> _parse_model(b'not-json')
     'scikit-plots/Qwen2.5-Coder-32B-Instruct'
     >>> _parse_model(b'{"model": "  "}')
     'scikit-plots/Qwen2.5-Coder-32B-Instruct'
@@ -435,7 +435,7 @@ def _build_cors_headers(allowed_origin: str = "*") -> dict[str, str]:
     '*'
     """
     return {
-        "Access-Control-Allow-Origin":  allowed_origin,
         "Access-Control-Allow-Methods": "POST, OPTIONS",
         "Access-Control-Allow-Headers": "Content-Type",
     }
@@ -588,7 +588,7 @@ def _resolve_upstream_url(
     Path 1 — explicit BACKEND_URL:
     >>> url, hdrs, t = _resolve_upstream_url(
-    ...     b'{}',
     ...     backend_url="https://my-model.hf.space/v1/chat/completions",
     ...     hf_token="",
     ... )
@@ -617,7 +617,16 @@ def _resolve_upstream_url(
         return hf_spaces_model_url, headers, path2_read_timeout
     # ── Path 3: HF Serverless Inference API (provider models) ─────────────────
-    url = f"{hf_base.rstrip('/')}/{model}/v1/chat/completions"
     headers["Authorization"] = f"Bearer {hf_token}"
     return url, headers, path3_read_timeout
@@ -654,7 +663,9 @@ def _validate_env(
     --------
     >>> _validate_env("https://my-model.hf.space/v1/chat/completions", "", "")
     >>> _validate_env("", "hf_mytoken", "")
-    >>> _validate_env("", "", "https://scikit-plots-ai-model.hf.space/v1/chat/completions")
     >>> import pytest
     >>> with pytest.raises(RuntimeError, match="no viable routing path"):
     ...     _validate_env("", "", "")
@@ -715,9 +726,10 @@ def load_proxy_env() -> dict[str, Any]:
         "HF_SPACES_MODEL_NAMESPACES",
         ",".join(DEFAULT_HF_SPACES_MODEL_NAMESPACES),
     )
-    _parsed_namespaces: tuple[str, ...] = tuple(
-        ns.strip() for ns in _raw_namespaces.split(",") if ns.strip()
-    ) or DEFAULT_HF_SPACES_MODEL_NAMESPACES
     return {
         "backend_url": os.environ.get("BACKEND_URL", "").strip(),

     --------
     >>> _parse_model(b'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct"}')
     'Qwen/Qwen2.5-Coder-7B-Instruct'
+    >>> _parse_model(b"{}")
     'scikit-plots/Qwen2.5-Coder-32B-Instruct'
+    >>> _parse_model(b"not-json")
     'scikit-plots/Qwen2.5-Coder-32B-Instruct'
     >>> _parse_model(b'{"model": "  "}')
     'scikit-plots/Qwen2.5-Coder-32B-Instruct'
     '*'
     """
     return {
+        "Access-Control-Allow-Origin": allowed_origin,
         "Access-Control-Allow-Methods": "POST, OPTIONS",
         "Access-Control-Allow-Headers": "Content-Type",
     }
     Path 1 — explicit BACKEND_URL:
     >>> url, hdrs, t = _resolve_upstream_url(
+    ...     b"{}",
     ...     backend_url="https://my-model.hf.space/v1/chat/completions",
     ...     hf_token="",
     ... )
         return hf_spaces_model_url, headers, path2_read_timeout
     # ── Path 3: HF Serverless Inference API (provider models) ─────────────────
+    # router.huggingface.co is a flat OpenAI-compatible endpoint.
+    # The model is supplied in the request body (already present in `body`),
+    # NOT embedded in the URL path.  The old api-inference.huggingface.co/models
+    # API DID embed the model in the path as /{model}/v1/chat/completions, but
+    # router.huggingface.co uses a single endpoint for all models:
+    #   POST https://router.huggingface.co/v1/chat/completions
+    #   body: {"model": "Qwen/Qwen2.5-Coder-7B-Instruct:nscale", ...}
+    # Embedding the model ID in the path produces a 404/422 with no log entry
+    # because _forward passes non-2xx upstream responses through transparently.
+    url = f"{hf_base.rstrip('/')}/v1/chat/completions"
     headers["Authorization"] = f"Bearer {hf_token}"
     return url, headers, path3_read_timeout
     --------
     >>> _validate_env("https://my-model.hf.space/v1/chat/completions", "", "")
     >>> _validate_env("", "hf_mytoken", "")
+    >>> _validate_env(
+    ...     "", "", "https://scikit-plots-ai-model.hf.space/v1/chat/completions"
+    ... )
     >>> import pytest
     >>> with pytest.raises(RuntimeError, match="no viable routing path"):
     ...     _validate_env("", "", "")
         "HF_SPACES_MODEL_NAMESPACES",
         ",".join(DEFAULT_HF_SPACES_MODEL_NAMESPACES),
     )
+    _parsed_namespaces: tuple[str, ...] = (
+        tuple(ns.strip() for ns in _raw_namespaces.split(",") if ns.strip())
+        or DEFAULT_HF_SPACES_MODEL_NAMESPACES
+    )
     return {
         "backend_url": os.environ.get("BACKEND_URL", "").strip(),