Spaces:
Sleeping
Sleeping
Update Deployment_UI_BE.py
Browse files- Deployment_UI_BE.py +48 -68
Deployment_UI_BE.py
CHANGED
|
@@ -755,83 +755,63 @@ async def api_middleware_infer(req: Request):
|
|
| 755 |
route = _INST.get("predictRoute") or "/predict"
|
| 756 |
_INST["predictRoute"] = route
|
| 757 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
payload = await req.json()
|
| 759 |
prompt = payload.get("prompt")
|
| 760 |
if not isinstance(prompt, str) or not prompt.strip():
|
| 761 |
-
return JSONResponse({"error": "Missing 'prompt' in request body."},
|
| 762 |
|
| 763 |
-
# HF text-classification shim
|
| 764 |
img = (_get_container_spec().get("imageUri","")).lower()
|
| 765 |
if "huggingface-pytorch-inference" in img and isinstance(payload.get("prompt"), str):
|
| 766 |
payload = {"instances": [payload["prompt"]]}
|
| 767 |
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
proxy_base = None
|
| 772 |
try:
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
if
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
bodies = [
|
| 796 |
-
payload,
|
| 797 |
-
{"prompt": prompt},
|
| 798 |
-
{"text": prompt},
|
| 799 |
-
{"inputs": prompt},
|
| 800 |
-
{"input": prompt},
|
| 801 |
-
]
|
| 802 |
-
rp, data = None, None
|
| 803 |
-
for body in bodies:
|
| 804 |
-
try:
|
| 805 |
-
rp = requests.post(url, json=body, timeout=120)
|
| 806 |
-
_log_status(f"PREDICT_RESP code={rp.status_code} len={len(rp.text)}")
|
| 807 |
-
if rp.ok:
|
| 808 |
-
break
|
| 809 |
-
except Exception as e:
|
| 810 |
-
_log_status(f"PREDICT_ERR {e}")
|
| 811 |
-
|
| 812 |
-
if not rp:
|
| 813 |
-
return JSONResponse({"error": "no response from model"}, status_code=504)
|
| 814 |
-
|
| 815 |
-
ct = (rp.headers.get("content-type") or "").lower()
|
| 816 |
-
data = _as_json(rp) if "application/json" in ct else {"_raw": rp.text}
|
| 817 |
-
|
| 818 |
-
if isinstance(data, dict):
|
| 819 |
-
if "image_b64" in data:
|
| 820 |
-
return JSONResponse({"image_b64": data["image_b64"], "timings": data.get("timings")}, status_code=rp.status_code)
|
| 821 |
-
if isinstance(data.get("output"), str):
|
| 822 |
-
return JSONResponse({"output": data["output"]}, status_code=rp.status_code)
|
| 823 |
-
if "_raw" in data:
|
| 824 |
-
return JSONResponse({"output": data["_raw"]}, status_code=rp.status_code)
|
| 825 |
-
return JSONResponse({"output": json.dumps(data, ensure_ascii=False)}, status_code=rp.status_code)
|
| 826 |
-
|
| 827 |
-
return JSONResponse({"output": str(data)}, status_code=rp.status_code)
|
| 828 |
-
|
| 829 |
-
except HTTPException as he:
|
| 830 |
-
_job_log("compute", f"[MW] ERROR {he.status_code}: {he.detail}")
|
| 831 |
-
return JSONResponse({"error": he.detail}, status_code=he.status_code)
|
| 832 |
-
except Exception as e:
|
| 833 |
-
_job_log("compute", f"[MW] ERROR infer: {e}")
|
| 834 |
-
return JSONResponse({"error": f"middleware infer failed: {e}"}, status_code=502)
|
| 835 |
# ---------------------------------------------------------------------
|
| 836 |
# Job progress + callback routes
|
| 837 |
# ---------------------------------------------------------------------
|
|
|
|
| 755 |
route = _INST.get("predictRoute") or "/predict"
|
| 756 |
_INST["predictRoute"] = route
|
| 757 |
|
| 758 |
+
# Build deterministic proxy URL instead of waiting on readiness
|
| 759 |
+
pid = (_INST.get("podId") or "").strip()
|
| 760 |
+
if not pid:
|
| 761 |
+
try:
|
| 762 |
+
_load_state()
|
| 763 |
+
pid = (_INST.get("podId") or "").strip()
|
| 764 |
+
except Exception:
|
| 765 |
+
pass
|
| 766 |
+
if not pid:
|
| 767 |
+
return JSONResponse({"error": "no podId yet (create/start first)"}, status_code=400)
|
| 768 |
+
|
| 769 |
+
cspec = _get_container_spec()
|
| 770 |
+
internal, _ = _get_port_and_proto(cspec)
|
| 771 |
+
if not internal:
|
| 772 |
+
return JSONResponse({"error": "cannot resolve internal port from blob"}, status_code=400)
|
| 773 |
+
|
| 774 |
+
base = f"https://{pid}-{internal}.proxy.runpod.net"
|
| 775 |
+
url = f"{base}{route}"
|
| 776 |
+
_log_status(f"PROMPT_ENDPOINT {url}")
|
| 777 |
+
_job_log("compute", f"[MW] Forwarding infer to {url}")
|
| 778 |
+
|
| 779 |
payload = await req.json()
|
| 780 |
prompt = payload.get("prompt")
|
| 781 |
if not isinstance(prompt, str) or not prompt.strip():
|
| 782 |
+
return JSONResponse({"error": "Missing 'prompt' in request body."}, 400)
|
| 783 |
|
| 784 |
+
# HF text-classification shim
|
| 785 |
img = (_get_container_spec().get("imageUri","")).lower()
|
| 786 |
if "huggingface-pytorch-inference" in img and isinstance(payload.get("prompt"), str):
|
| 787 |
payload = {"instances": [payload["prompt"]]}
|
| 788 |
|
| 789 |
+
# Send immediately; no readiness checks or polling gates
|
| 790 |
+
bodies = [payload, {"prompt": prompt}, {"text": prompt}, {"inputs": prompt}, {"input": prompt}]
|
| 791 |
+
for body in bodies:
|
|
|
|
| 792 |
try:
|
| 793 |
+
rp = requests.post(url, json=body, timeout=120)
|
| 794 |
+
_log_status(f"PREDICT_RESP code={rp.status_code} len={len(rp.text)}")
|
| 795 |
+
if rp.ok:
|
| 796 |
+
ct = (rp.headers.get("content-type") or "").lower()
|
| 797 |
+
data = _as_json(rp) if "application/json" in ct else {"_raw": rp.text}
|
| 798 |
+
if isinstance(data, dict):
|
| 799 |
+
if "image_b64" in data:
|
| 800 |
+
return JSONResponse({"image_b64": data["image_b64"], "timings": data.get("timings")}, rp.status_code)
|
| 801 |
+
if isinstance(data.get("output"), str):
|
| 802 |
+
return JSONResponse({"output": data["output"]}, rp.status_code)
|
| 803 |
+
if "_raw" in data:
|
| 804 |
+
return JSONResponse({"output": data["_raw"]}, rp.status_code)
|
| 805 |
+
return JSONResponse({"output": json.dumps(data, ensure_ascii=False)}, rp.status_code)
|
| 806 |
+
return JSONResponse({"output": str(data)}, rp.status_code)
|
| 807 |
+
except Exception as e:
|
| 808 |
+
_log_status(f"PREDICT_ERR {e}")
|
| 809 |
+
|
| 810 |
+
# Fallthrough: show last response or generic error
|
| 811 |
+
try:
|
| 812 |
+
return JSONResponse({"error": rp.text[:400]}, status_code=rp.status_code)
|
| 813 |
+
except Exception:
|
| 814 |
+
return JSONResponse({"error": "no response from model"}, status_code=504)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
# ---------------------------------------------------------------------
|
| 816 |
# Job progress + callback routes
|
| 817 |
# ---------------------------------------------------------------------
|