celik-muhammed commited on
Commit
377a993
Β·
verified Β·
1 Parent(s): 7da0afd

Upload _shared_logic.py

Browse files
Files changed (1) hide show
  1. _shared_logic.py +21 -9
_shared_logic.py CHANGED
@@ -339,9 +339,9 @@ def _parse_model(body: bytes, default: str = DEFAULT_MODEL) -> str:
339
  --------
340
  >>> _parse_model(b'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct"}')
341
  'Qwen/Qwen2.5-Coder-7B-Instruct'
342
- >>> _parse_model(b'{}')
343
  'scikit-plots/Qwen2.5-Coder-32B-Instruct'
344
- >>> _parse_model(b'not-json')
345
  'scikit-plots/Qwen2.5-Coder-32B-Instruct'
346
  >>> _parse_model(b'{"model": " "}')
347
  'scikit-plots/Qwen2.5-Coder-32B-Instruct'
@@ -435,7 +435,7 @@ def _build_cors_headers(allowed_origin: str = "*") -> dict[str, str]:
435
  '*'
436
  """
437
  return {
438
- "Access-Control-Allow-Origin": allowed_origin,
439
  "Access-Control-Allow-Methods": "POST, OPTIONS",
440
  "Access-Control-Allow-Headers": "Content-Type",
441
  }
@@ -588,7 +588,7 @@ def _resolve_upstream_url(
588
  Path 1 β€” explicit BACKEND_URL:
589
 
590
  >>> url, hdrs, t = _resolve_upstream_url(
591
- ... b'{}',
592
  ... backend_url="https://my-model.hf.space/v1/chat/completions",
593
  ... hf_token="",
594
  ... )
@@ -617,7 +617,16 @@ def _resolve_upstream_url(
617
  return hf_spaces_model_url, headers, path2_read_timeout
618
 
619
  # ── Path 3: HF Serverless Inference API (provider models) ─────────────────
620
- url = f"{hf_base.rstrip('/')}/{model}/v1/chat/completions"
 
 
 
 
 
 
 
 
 
621
  headers["Authorization"] = f"Bearer {hf_token}"
622
  return url, headers, path3_read_timeout
623
 
@@ -654,7 +663,9 @@ def _validate_env(
654
  --------
655
  >>> _validate_env("https://my-model.hf.space/v1/chat/completions", "", "")
656
  >>> _validate_env("", "hf_mytoken", "")
657
- >>> _validate_env("", "", "https://scikit-plots-ai-model.hf.space/v1/chat/completions")
 
 
658
  >>> import pytest
659
  >>> with pytest.raises(RuntimeError, match="no viable routing path"):
660
  ... _validate_env("", "", "")
@@ -715,9 +726,10 @@ def load_proxy_env() -> dict[str, Any]:
715
  "HF_SPACES_MODEL_NAMESPACES",
716
  ",".join(DEFAULT_HF_SPACES_MODEL_NAMESPACES),
717
  )
718
- _parsed_namespaces: tuple[str, ...] = tuple(
719
- ns.strip() for ns in _raw_namespaces.split(",") if ns.strip()
720
- ) or DEFAULT_HF_SPACES_MODEL_NAMESPACES
 
721
 
722
  return {
723
  "backend_url": os.environ.get("BACKEND_URL", "").strip(),
 
339
  --------
340
  >>> _parse_model(b'{"model": "Qwen/Qwen2.5-Coder-7B-Instruct"}')
341
  'Qwen/Qwen2.5-Coder-7B-Instruct'
342
+ >>> _parse_model(b"{}")
343
  'scikit-plots/Qwen2.5-Coder-32B-Instruct'
344
+ >>> _parse_model(b"not-json")
345
  'scikit-plots/Qwen2.5-Coder-32B-Instruct'
346
  >>> _parse_model(b'{"model": " "}')
347
  'scikit-plots/Qwen2.5-Coder-32B-Instruct'
 
435
  '*'
436
  """
437
  return {
438
+ "Access-Control-Allow-Origin": allowed_origin,
439
  "Access-Control-Allow-Methods": "POST, OPTIONS",
440
  "Access-Control-Allow-Headers": "Content-Type",
441
  }
 
588
  Path 1 β€” explicit BACKEND_URL:
589
 
590
  >>> url, hdrs, t = _resolve_upstream_url(
591
+ ... b"{}",
592
  ... backend_url="https://my-model.hf.space/v1/chat/completions",
593
  ... hf_token="",
594
  ... )
 
617
  return hf_spaces_model_url, headers, path2_read_timeout
618
 
619
  # ── Path 3: HF Serverless Inference API (provider models) ─────────────────
620
+ # router.huggingface.co is a flat OpenAI-compatible endpoint.
621
+ # The model is supplied in the request body (already present in `body`),
622
+ # NOT embedded in the URL path. The old api-inference.huggingface.co/models
623
+ # API DID embed the model in the path as /{model}/v1/chat/completions, but
624
+ # router.huggingface.co uses a single endpoint for all models:
625
+ # POST https://router.huggingface.co/v1/chat/completions
626
+ # body: {"model": "Qwen/Qwen2.5-Coder-7B-Instruct:nscale", ...}
627
+ # Embedding the model ID in the path produces a 404/422 with no log entry
628
+ # because _forward passes non-2xx upstream responses through transparently.
629
+ url = f"{hf_base.rstrip('/')}/v1/chat/completions"
630
  headers["Authorization"] = f"Bearer {hf_token}"
631
  return url, headers, path3_read_timeout
632
 
 
663
  --------
664
  >>> _validate_env("https://my-model.hf.space/v1/chat/completions", "", "")
665
  >>> _validate_env("", "hf_mytoken", "")
666
+ >>> _validate_env(
667
+ ... "", "", "https://scikit-plots-ai-model.hf.space/v1/chat/completions"
668
+ ... )
669
  >>> import pytest
670
  >>> with pytest.raises(RuntimeError, match="no viable routing path"):
671
  ... _validate_env("", "", "")
 
726
  "HF_SPACES_MODEL_NAMESPACES",
727
  ",".join(DEFAULT_HF_SPACES_MODEL_NAMESPACES),
728
  )
729
+ _parsed_namespaces: tuple[str, ...] = (
730
+ tuple(ns.strip() for ns in _raw_namespaces.split(",") if ns.strip())
731
+ or DEFAULT_HF_SPACES_MODEL_NAMESPACES
732
+ )
733
 
734
  return {
735
  "backend_url": os.environ.get("BACKEND_URL", "").strip(),