Spaces:
Sleeping
Sleeping
Update Deployment_UI_BE.py
Browse files- Deployment_UI_BE.py +37 -19
Deployment_UI_BE.py
CHANGED
|
@@ -111,35 +111,53 @@ def _as_json(r):
|
|
| 111 |
# ---------------------------------------------------------------------
|
| 112 |
# Probes and route discovery (new)
|
| 113 |
# ---------------------------------------------------------------------
|
| 114 |
-
def _probe(method: str, url: str, timeout=5):
|
| 115 |
-
t0 = time.time()
|
| 116 |
-
try:
|
| 117 |
-
resp = requests.request(method, url, timeout=timeout)
|
| 118 |
-
ms = int((time.time() - t0) * 1000)
|
| 119 |
-
return resp.status_code, ms, (resp.text[:200] if resp.text else "")
|
| 120 |
-
except Exception as e:
|
| 121 |
-
return -1, int((time.time() - t0) * 1000), str(e)
|
| 122 |
|
| 123 |
-
|
| 124 |
-
_POSSIBLE_ROUTES = [
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
def _infer_routes_from_image(image_uri: str):
|
|
|
|
|
|
|
|
|
|
| 128 |
iu = (image_uri or "").lower()
|
|
|
|
|
|
|
| 129 |
if "vllm-serve" in iu:
|
| 130 |
return ("/generate", "/ping")
|
|
|
|
|
|
|
|
|
|
| 131 |
if "hf-inference-toolkit" in iu or "huggingface-pytorch-inference" in iu:
|
| 132 |
-
return ("/
|
|
|
|
|
|
|
| 133 |
return (None, None)
|
| 134 |
|
| 135 |
-
def
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
|
|
|
| 143 |
# ---------------------------------------------------------------------
|
| 144 |
# Blob ingest via Model Blob page JSON (with blob_url override)
|
| 145 |
# ---------------------------------------------------------------------
|
|
|
|
| 111 |
# ---------------------------------------------------------------------
|
| 112 |
# Probes and route discovery (new)
|
| 113 |
# ---------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
+
# Expanded set: will try these against https://pod:port/<route>
|
| 116 |
+
_POSSIBLE_ROUTES = [
|
| 117 |
+
"/invocations", # <— added and placed first
|
| 118 |
+
"/generate",
|
| 119 |
+
"/predict",
|
| 120 |
+
"/predictions",
|
| 121 |
+
"/v1/chat/completions",
|
| 122 |
+
"/v1/models/model:predict",
|
| 123 |
+
]
|
| 124 |
|
| 125 |
def _infer_routes_from_image(image_uri: str):
|
| 126 |
+
"""
|
| 127 |
+
Infer (predict_route, health_route) from known image patterns.
|
| 128 |
+
"""
|
| 129 |
iu = (image_uri or "").lower()
|
| 130 |
+
|
| 131 |
+
# vLLM images
|
| 132 |
if "vllm-serve" in iu:
|
| 133 |
return ("/generate", "/ping")
|
| 134 |
+
|
| 135 |
+
# HuggingFace / Vertex HF Inference Toolkit
|
| 136 |
+
# changed from "/predict" → "/invocations"
|
| 137 |
if "hf-inference-toolkit" in iu or "huggingface-pytorch-inference" in iu:
|
| 138 |
+
return ("/invocations", "/ping")
|
| 139 |
+
|
| 140 |
+
# Unknown image → allow route scanning fallback
|
| 141 |
return (None, None)
|
| 142 |
|
| 143 |
+
async def _probe_all_routes(base: str, port: str, session):
|
| 144 |
+
"""
|
| 145 |
+
Try all known routes until one responds 200/OK-ish.
|
| 146 |
+
Returns (predict_route, health_route or None)
|
| 147 |
+
"""
|
| 148 |
+
from urllib.parse import urljoin
|
| 149 |
+
|
| 150 |
+
proto_base = f"{base}:{port}"
|
| 151 |
+
for route in _POSSIBLE_ROUTES:
|
| 152 |
+
url = urljoin(proto_base + "/", route.lstrip("/"))
|
| 153 |
+
try:
|
| 154 |
+
r = await session.get(url, timeout=3)
|
| 155 |
+
if r.status_code < 500:
|
| 156 |
+
return route, ("/ping" if "/ping" in route else None)
|
| 157 |
+
except Exception:
|
| 158 |
+
pass
|
| 159 |
|
| 160 |
+
return None, None
|
| 161 |
# ---------------------------------------------------------------------
|
| 162 |
# Blob ingest via Model Blob page JSON (with blob_url override)
|
| 163 |
# ---------------------------------------------------------------------
|