Theflame47 commited on
Commit
c2b54fb
·
verified ·
1 Parent(s): deb1e79

Update Deployment_UI_BE.py

Browse files
Files changed (1) hide show
  1. Deployment_UI_BE.py +48 -68
Deployment_UI_BE.py CHANGED
@@ -755,83 +755,63 @@ async def api_middleware_infer(req: Request):
755
  route = _INST.get("predictRoute") or "/predict"
756
  _INST["predictRoute"] = route
757
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
  payload = await req.json()
759
  prompt = payload.get("prompt")
760
  if not isinstance(prompt, str) or not prompt.strip():
761
- return JSONResponse({"error": "Missing 'prompt' in request body."}, status_code=400)
762
 
763
- # HF text-classification shim: wrap into Vertex-style instances
764
  img = (_get_container_spec().get("imageUri","")).lower()
765
  if "huggingface-pytorch-inference" in img and isinstance(payload.get("prompt"), str):
766
  payload = {"instances": [payload["prompt"]]}
767
 
768
- try:
769
- # Prefer proxy base; fall back to direct IP if proxy not cached
770
- pid = (_INST.get("podId") or "").strip()
771
- proxy_base = None
772
  try:
773
- cspec = _get_container_spec()
774
- internal, _ = _get_port_and_proto(cspec)
775
- if pid and internal:
776
- proxy_base = f"https://{pid}-{internal}.proxy.runpod.net"
777
- _log_status(f"PROMPT_BASE proxy={proxy_base}")
778
- except Exception:
779
- pass
780
- if not proxy_base:
781
- ip, port = _INST.get("ip"), _INST.get("port")
782
- if ip and port:
783
- proxy_base = f"http://{ip}:{port}"
784
- _log_status(f"PROMPT_BASE direct={proxy_base}")
785
-
786
- # If neither path is available, do not gate—return clear error
787
- if not proxy_base:
788
- return JSONResponse({"error": "no reachable base yet (no proxy or ip/port)"}, status_code=503)
789
-
790
- url = f"{proxy_base}{route}"
791
- _log_status(f"PROMPT_ENDPOINT {url}")
792
- _job_log("compute", f"[MW] Forwarding infer to {url}")
793
-
794
- # Try multiple prompt body formats until success
795
- bodies = [
796
- payload,
797
- {"prompt": prompt},
798
- {"text": prompt},
799
- {"inputs": prompt},
800
- {"input": prompt},
801
- ]
802
- rp, data = None, None
803
- for body in bodies:
804
- try:
805
- rp = requests.post(url, json=body, timeout=120)
806
- _log_status(f"PREDICT_RESP code={rp.status_code} len={len(rp.text)}")
807
- if rp.ok:
808
- break
809
- except Exception as e:
810
- _log_status(f"PREDICT_ERR {e}")
811
-
812
- if not rp:
813
- return JSONResponse({"error": "no response from model"}, status_code=504)
814
-
815
- ct = (rp.headers.get("content-type") or "").lower()
816
- data = _as_json(rp) if "application/json" in ct else {"_raw": rp.text}
817
-
818
- if isinstance(data, dict):
819
- if "image_b64" in data:
820
- return JSONResponse({"image_b64": data["image_b64"], "timings": data.get("timings")}, status_code=rp.status_code)
821
- if isinstance(data.get("output"), str):
822
- return JSONResponse({"output": data["output"]}, status_code=rp.status_code)
823
- if "_raw" in data:
824
- return JSONResponse({"output": data["_raw"]}, status_code=rp.status_code)
825
- return JSONResponse({"output": json.dumps(data, ensure_ascii=False)}, status_code=rp.status_code)
826
-
827
- return JSONResponse({"output": str(data)}, status_code=rp.status_code)
828
-
829
- except HTTPException as he:
830
- _job_log("compute", f"[MW] ERROR {he.status_code}: {he.detail}")
831
- return JSONResponse({"error": he.detail}, status_code=he.status_code)
832
- except Exception as e:
833
- _job_log("compute", f"[MW] ERROR infer: {e}")
834
- return JSONResponse({"error": f"middleware infer failed: {e}"}, status_code=502)
835
  # ---------------------------------------------------------------------
836
  # Job progress + callback routes
837
  # ---------------------------------------------------------------------
 
755
  route = _INST.get("predictRoute") or "/predict"
756
  _INST["predictRoute"] = route
757
 
758
+ # Build deterministic proxy URL instead of waiting on readiness
759
+ pid = (_INST.get("podId") or "").strip()
760
+ if not pid:
761
+ try:
762
+ _load_state()
763
+ pid = (_INST.get("podId") or "").strip()
764
+ except Exception:
765
+ pass
766
+ if not pid:
767
+ return JSONResponse({"error": "no podId yet (create/start first)"}, status_code=400)
768
+
769
+ cspec = _get_container_spec()
770
+ internal, _ = _get_port_and_proto(cspec)
771
+ if not internal:
772
+ return JSONResponse({"error": "cannot resolve internal port from blob"}, status_code=400)
773
+
774
+ base = f"https://{pid}-{internal}.proxy.runpod.net"
775
+ url = f"{base}{route}"
776
+ _log_status(f"PROMPT_ENDPOINT {url}")
777
+ _job_log("compute", f"[MW] Forwarding infer to {url}")
778
+
779
  payload = await req.json()
780
  prompt = payload.get("prompt")
781
  if not isinstance(prompt, str) or not prompt.strip():
782
+ return JSONResponse({"error": "Missing 'prompt' in request body."}, 400)
783
 
784
+ # HF text-classification shim
785
  img = (_get_container_spec().get("imageUri","")).lower()
786
  if "huggingface-pytorch-inference" in img and isinstance(payload.get("prompt"), str):
787
  payload = {"instances": [payload["prompt"]]}
788
 
789
+ # Send immediately; no readiness checks or polling gates
790
+ bodies = [payload, {"prompt": prompt}, {"text": prompt}, {"inputs": prompt}, {"input": prompt}]
791
+ for body in bodies:
 
792
  try:
793
+ rp = requests.post(url, json=body, timeout=120)
794
+ _log_status(f"PREDICT_RESP code={rp.status_code} len={len(rp.text)}")
795
+ if rp.ok:
796
+ ct = (rp.headers.get("content-type") or "").lower()
797
+ data = _as_json(rp) if "application/json" in ct else {"_raw": rp.text}
798
+ if isinstance(data, dict):
799
+ if "image_b64" in data:
800
+ return JSONResponse({"image_b64": data["image_b64"], "timings": data.get("timings")}, rp.status_code)
801
+ if isinstance(data.get("output"), str):
802
+ return JSONResponse({"output": data["output"]}, rp.status_code)
803
+ if "_raw" in data:
804
+ return JSONResponse({"output": data["_raw"]}, rp.status_code)
805
+ return JSONResponse({"output": json.dumps(data, ensure_ascii=False)}, rp.status_code)
806
+ return JSONResponse({"output": str(data)}, rp.status_code)
807
+ except Exception as e:
808
+ _log_status(f"PREDICT_ERR {e}")
809
+
810
+ # Fallthrough: show last response or generic error
811
+ try:
812
+ return JSONResponse({"error": rp.text[:400]}, status_code=rp.status_code)
813
+ except Exception:
814
+ return JSONResponse({"error": "no response from model"}, status_code=504)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
815
  # ---------------------------------------------------------------------
816
  # Job progress + callback routes
817
  # ---------------------------------------------------------------------