Spaces:

Theflame47
/

RunPodRun

Sleeping

File size: 36,382 Bytes

from fastapi import APIRouter, HTTPException, Request, Form
from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
import os, json, requests, time, re, pathlib

router = APIRouter()

# ---------------------------------------------------------------------
# In-memory job + instance store
# ---------------------------------------------------------------------
_JOBS = {}
_INST = {
    "podId": "", "status": "", "ip": "", "port": "",
    "blob": None, "model_id": "", "container_image_hint": "",
    "predictRoute": None, "healthRoute": None,
    "readinessRoute": None, "livenessRoute": None,
}

def _now_ms(): return int(time.time() * 1000)

def _job_log(job_id, msg):
    j = _JOBS.setdefault(job_id, {"status":"created","logs":[],
                                  "image_b64":None,"timings":{}})
    j["logs"].append({"t":_now_ms(),"msg":msg})
    print(f"[{job_id}] {msg}", flush=True)

def _log_create(msg): _job_log("compute", f"[CREATE] {msg}")
def _log_status(msg): _job_log("compute", f"[STATUS] {msg}")
def _log_delete(msg): _job_log("compute", f"[DELETE] {msg}")
def _log_id(prefix, pid): _job_log("compute", f"{prefix} ID: {pid}")

# --- local blob ingest (landing page only) ---
_LOCAL_BLOB_PATH = os.getenv("MODEL_BLOB_PATH", "model_blob.json")

def _load_local_blob():
    try:
        if os.path.exists(_LOCAL_BLOB_PATH):
            with open(_LOCAL_BLOB_PATH, "r", encoding="utf-8") as f:
                return json.load(f)
    except Exception as e:
        _job_log("compute", f"ERROR LocalBlobLoad: {e}")
    return None

def _ingest_blob(parsed: dict, model_id_hint: str = "", container_image_hint: str = ""):
    if not isinstance(parsed, dict):
        raise HTTPException(400, "Invalid blob (expected JSON object).")

    _INST.update({
        "blob": parsed,
        "model_id": model_id_hint or "",
        "container_image_hint": container_image_hint or "",
    })

    c = (((parsed.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec")
         or parsed.get("container") or {}) or {}

    for k in ("predictRoute", "healthRoute", "readinessRoute", "livenessRoute"):
        v = c.get(k)
        if isinstance(v, str) and v.strip():
            _INST[k] = v.strip()

    image_uri = (c.get("imageUri") or "").strip().lower()
    pr, hr = _infer_routes_from_image(image_uri)
    if pr and not _INST.get("predictRoute"):
        _INST["predictRoute"] = pr
    if hr and not _INST.get("healthRoute"):
        _INST["healthRoute"] = hr

    return True

# ---------------------------------------------------------------------
# Disk persistence for recovery
# ---------------------------------------------------------------------
_STATE_PATH = "/tmp/pod_state.json"

def _save_state():
    try:
        pathlib.Path("/tmp").mkdir(parents=True, exist_ok=True)
        with open(_STATE_PATH, "w") as f:
            json.dump({k:_INST.get(k,"") for k in
                      ("podId","status","ip","port")}, f)
    except Exception as e:
        _job_log("compute", f"ERROR SaveState: {e}")

def _load_state():
    try:
        if os.path.exists(_STATE_PATH):
            with open(_STATE_PATH) as f: d = json.load(f)
            for k in ("podId","status","ip","port"):
                if k in d: _INST[k]=d[k]
    except Exception as e:
        _job_log("compute", f"ERROR LoadState: {e}")

# ---------------------------------------------------------------------
# RunPod helpers
# ---------------------------------------------------------------------
_RP_BASE = "https://rest.runpod.io/v1"

def _rp_headers():
    key=os.getenv("RunPod","").strip()
    if not key:
        raise HTTPException(500,"Missing RunPod API key (env var 'RunPod').")
    return {"Authorization":f"Bearer {key}","Content-Type":"application/json"}

def _as_json(r):
    c=(r.headers.get("content-type") or "").lower()
    if "json" in c:
        try: return r.json()
        except Exception: return {"_raw":r.text}
    return {"_raw":r.text}

# ---------------------------------------------------------------------
# Probes and route discovery (new)
# ---------------------------------------------------------------------

# Expanded set: will try these against https://pod:port/<route>
_POSSIBLE_ROUTES = [
    "/invocations",          # <— added and placed first
    "/generate",
    "/predict",
    "/predictions",
    "/v1/chat/completions",
    "/v1/models/model:predict",
]

def _infer_routes_from_image(image_uri: str):
    """
    Infer (predict_route, health_route) from known image patterns.
    """
    iu = (image_uri or "").lower()

    # vLLM images
    if "vllm-serve" in iu:
        return ("/generate", "/ping")

    # HuggingFace / Vertex HF Inference Toolkit
    # changed from "/predict" → "/invocations"
    if "hf-inference-toolkit" in iu or "huggingface-pytorch-inference" in iu:
        return ("/invocations", "/ping")

    # Unknown image → allow route scanning fallback
    return (None, None)

async def _probe_all_routes(base: str, port: str, session):
    """
    Try all known routes until one responds 200/OK-ish.
    Returns (predict_route, health_route or None)
    """
    from urllib.parse import urljoin

    proto_base = f"{base}:{port}"
    for route in _POSSIBLE_ROUTES:
        url = urljoin(proto_base + "/", route.lstrip("/"))
        try:
            r = await session.get(url, timeout=3)
            if r.status_code < 500:
                return route, ("/ping" if "/ping" in route else None)
        except Exception:
            pass

    return None, None
# ---------------------------------------------------------------------
# Blob ingest via Model Blob page JSON (with blob_url override)
# ---------------------------------------------------------------------
_HF_SPACE_PORT = os.getenv("PORT", "7860")
_LOCAL_BASE = f"http://127.0.0.1:{_HF_SPACE_PORT}"

def _normalize_blob_url(u: str | None) -> str | None:
    if not u:
        return None
    u = str(u).strip()
    if u.startswith(("http://", "https://")):
        return u
    # Treat '/x' or 'x' as local to this app (same origin as FE)
    if u.startswith("/"):
        return f"{_LOCAL_BASE}{u}"
    return f"{_LOCAL_BASE}/{u}"

def _fetch_url(u: str):
    try:
        r = requests.get(u, timeout=8)
        if r.ok:
            return r.json()
        _job_log("compute", f"ERROR BlobFetch code={r.status_code} url={u} body={r.text[:200]}")
    except Exception as e:
        _job_log("compute", f"ERROR BlobFetch url={u}: {e}")
    return None

def _fetch_blob_from_page():
    return _fetch_url(f"{_LOCAL_BASE}/modelblob.json")

def _ingest_blob(parsed: dict, model_id_hint: str = "", container_image_hint: str = ""):
    if not isinstance(parsed, dict):
        raise HTTPException(400, "Invalid blob (expected JSON object).")

    _INST.update({
        "blob": parsed,
        "model_id": model_id_hint or "",
        "container_image_hint": container_image_hint or "",
    })

    c = (((parsed.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec")
         or parsed.get("container") or {}) or {}

    for k in ("predictRoute", "healthRoute", "readinessRoute", "livenessRoute"):
        v = c.get(k)
        if isinstance(v, str) and v.strip():
            _INST[k] = v.strip()

    image_uri = (c.get("imageUri") or "").strip().lower()
    pr, hr = _infer_routes_from_image(image_uri)
    if pr and not _INST.get("predictRoute"):
        _INST["predictRoute"] = pr
    if hr and not _INST.get("healthRoute"):
        _INST["healthRoute"] = hr  # <-- only addition: cache healthRoute hint

    return True

@router.post("/api/ingest/from_landing")
def api_ingest_from_landing(blob_url: str | None = None):
    """
    Ingest the deployment blob for downstream use.
    Mirrors FE behavior: resolve relative paths like '/modelblob.json'
    against the app origin.
    """
    u = _normalize_blob_url(blob_url) or _normalize_blob_url("/modelblob.json")
    parsed = _fetch_url(u)
    if not parsed:
        return JSONResponse({"error": "Blob not available"}, 404)
    _ingest_blob(parsed, model_id_hint="", container_image_hint="")
    return JSONResponse({"ok": True, "source": u})

# (Optional compatibility: UI posting to /Deployment_UI; accepts blob_url via query)
@router.post("/Deployment_UI")
async def deployment_ui_ingest(request: Request,
                               model_id: str = Form(""),
                               container_image: str = Form(""),
                               blob: str = Form("")):
    """
    Legacy entry used by the Deployment UI page.
    Prefers blob_url from query string; falls back to the modelblob page JSON.
    """
    blob_url = request.query_params.get("blob_url")
    u = _normalize_blob_url(blob_url) if blob_url else _normalize_blob_url("/modelblob.json")
    parsed = _fetch_url(u)
    if not parsed:
        return HTMLResponse("<pre>Missing blob (no /modelblob.json and no blob_url)</pre>", 400)
    _ingest_blob(parsed, model_id_hint=model_id, container_image_hint=container_image)
    return RedirectResponse("/Deployment_UI", 303)
# ---------------------------------------------------------------------
# Create instance
# ---------------------------------------------------------------------
@router.post("/api/compute/create_instance")
async def api_create_instance(req: Request):
    # Ensure blob is present (lazy-load from landing file if needed)
    if not _INST.get("blob"):
        lb = _load_local_blob()
        if lb:
            _ingest_blob(lb)
    blob = _INST.get("blob")
    if not blob:
        return JSONResponse({"error": "No deployment blob provided."}, 400)

    c = ((blob.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
        or blob.get("container")
    if not isinstance(c, dict) or not c:
        return JSONResponse({"error": "Blob missing containerSpec."}, 400)

    image = (c.get("imageUri") or "").strip()
    if not image:
        return JSONResponse({"error": "containerSpec.imageUri missing."}, 400)
    _log_create(f"imageName: {image}")

    env_list = c.get("env") or []
    env_obj = {e.get("name"): e.get("value") for e in env_list
               if isinstance(e, dict) and e.get("name")}
    _log_create(f"env: {json.dumps(env_obj, ensure_ascii=False)}")

    ports_list = c.get("ports") or []
    rp_ports = []
    for p in ports_list:
        if isinstance(p, dict):
            cp = p.get("containerPort")
            proto = (p.get("protocol") or "http").lower()
            if proto not in ("http", "tcp"):
                proto = "http"
            if isinstance(cp, int):
                rp_ports.append(f"{cp}/{proto}")
    if not rp_ports:
        return JSONResponse({"error": "ports[].containerPort required."}, 400)
    _log_create(f"ports: {rp_ports}")

    command = c.get("command") if isinstance(c.get("command"), list) else None
    args = c.get("args") if isinstance(c.get("args"), list) else None
    if command: _log_create(f"command: {command}")
    if args: _log_create(f"args: {args}")

    # GPU normalization (enum -> pretty string); include only if non-empty
    dr = c.get("dedicatedResources") or {}
    gpu_ids = None
    gpu_count = 1
    if isinstance(dr, dict):
        typ = (dr.get("machineSpec", {}) or {}).get("acceleratorType")
        cnt = (dr.get("machineSpec", {}) or {}).get("acceleratorCount")
        if typ: gpu_ids = [typ] if isinstance(typ, str) else typ
        if isinstance(cnt, int) and cnt > 0: gpu_count = cnt

    def _normalize_gpu_enum(s: str) -> str:
        if not isinstance(s, str) or not s.strip():
            return ""
        t = s.strip().upper().replace("_", " ")
        vendor = "NVIDIA"
        if t.startswith("NVIDIA "):
            t = t[len("NVIDIA "):]
        elif t.startswith("AMD "):
            vendor = "AMD"; t = t[len("AMD "):]
        t = re.sub(r"(\d)(GB\b)", r"\1 \2", t)  # 80GB -> 80 GB
        return f"{vendor} {t}".strip()

    rp_gpu = None
    if gpu_ids:
        rp_gpu = _normalize_gpu_enum(gpu_ids[0]).strip() or None
        _log_create(f"GPU_TRANSLATION original={gpu_ids[0]} -> runpod='{rp_gpu}'")

    _log_create("SECURE_PLACEMENT interruptible=false")

    payload = {
        "name": re.sub(r"[^a-z0-9-]", "-", f"ephemeral-{int(time.time())}".lower()),
        "computeType": "GPU",
        "interruptible": False,              # On-Demand (not community)
        "imageName": image,
        "gpuCount": gpu_count,
        "ports": rp_ports,
        "supportPublicIp": True,
        **({"gpuTypeIds": [rp_gpu]} if rp_gpu else {}),
        **({"env": env_obj} if env_obj else {}),
        **({"command": command} if command else {}),
        **({"args": args} if args else {}),
    }

    _log_create(f"PAYLOAD_SENT {json.dumps(payload, ensure_ascii=False)}")

    content = {}
    pid = None
    try:
        r = requests.post(f"{_RP_BASE}/pods", headers=_rp_headers(), json=payload, timeout=60)
        content = _as_json(r)
        _log_create(f"RUNPOD_RESPONSE {json.dumps(content, ensure_ascii=False)}")
        pid = content.get("id")
        if not pid and isinstance(content, dict):
            for v in content.values():
                if isinstance(v, dict) and "id" in v:
                    pid = v["id"]; break
    except Exception as e:
        _log_create(f"ERROR Create: {e}")
        return JSONResponse({"error": f"RunPod create failed: {e}"}, 500)

    _log_create(f"ID: {pid}")

    if not isinstance(r, requests.Response):
        return JSONResponse({"error": "No HTTP response from RunPod create."}, 502)
    if not r.ok:
        return JSONResponse(content if isinstance(content, dict) else {"_raw": str(content)}, r.status_code)
    if not pid:
        return JSONResponse({"error": "Create succeeded but no pod ID in response.", "raw": content}, 502)

    # cache pod id
    try:
        _INST["podId"] = str(pid).strip()
        _log_id("CREATE_SET", _INST["podId"])
        _save_state()
    except Exception as e:
        return JSONResponse({"error": f"Could not cache pod ID: {e}"}, 502)

    # start the pod immediately so networking/IP can come up
    try:
        sr = requests.post(f"{_RP_BASE}/pods/{_INST['podId']}/start", headers=_rp_headers(), timeout=30)
        scontent = _as_json(sr)
        _log_status(f"START_RESPONSE {json.dumps(scontent, ensure_ascii=False)}")
    except Exception as e:
        _log_status(f"ERROR Start: {e}")

    # initial status snapshot
    try:
        rs = requests.get(f"{_RP_BASE}/pods/{_INST['podId']}", headers=_rp_headers(), timeout=30)
        st = _as_json(rs)
        _log_status(f"STATUS_POLL {json.dumps(st, ensure_ascii=False)}")
        content["_status"] = st
    except Exception as e:
        content["_status_error"] = str(e)
        _log_status(f"ERROR Status: {e}")

    _INST["status"] = content.get("desiredStatus") or content.get("status") or ""
    _INST["ip"] = _INST.get("ip") or ""
    _INST["port"] = _INST.get("port") or ""
    return JSONResponse(content, r.status_code)
# ---------------------------------------------------------------------
# Poll / read instance status + explicit readiness fields
# ---------------------------------------------------------------------
@router.get("/api/compute/pods/{pod_id}")
def api_get_instance(pod_id: str = None):
    pid = (pod_id or _INST.get("podId") or "").strip()
    if not pid:
        return JSONResponse({"error": "pod_id missing."}, 400)
    _log_id("STATUS_USES", pid)

    try:
        r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=30)
        last = _as_json(r)
        _log_status(f"STATUS_POLL {json.dumps(last, ensure_ascii=False)}")
    except Exception as e:
        return JSONResponse({"error": f"poll failed: {e}"}, 502)

    declared = None
    try:
        c = (((_INST.get("blob") or {}).get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
            or (_INST.get("blob") or {}).get("container") or {}
        declared = int((c.get("ports") or [])[0].get("containerPort"))
    except Exception:
        c = {}
        pass

    if isinstance(last, dict):
        ip = last.get("publicIp") or ""
        pm = last.get("portMappings") or {}

        if ip and isinstance(pm, dict) and pm:
            # choose mapped public port for the declared internal port; else first mapping
            if isinstance(declared, int) and str(declared) in pm:
                chosen = str(pm[str(declared)])
            else:
                k = next(iter(pm.keys()))
                chosen = str(pm[k])
                _log_status(f"PORT_MAPPING declared={declared} not_found_using_first key={k}")

            _INST.update({"podId": pid,
                          "status": last.get("desiredStatus", ""),
                          "ip": ip,
                          "port": chosen})
            _save_state()

            base = f"http://{_INST['ip']}:{_INST['port']}"
            _log_status(f"PORT_MAPPING declared={declared} chosen={chosen} all={pm}")
            _log_status(f"RESOLVED_ENDPOINT base={base}")

            # --- NEW: health-first readiness (mirror Vertex), fallback to predict existence ---
            hr = (_INST.get("healthRoute") or "/health").strip()
            pr = (_INST.get("predictRoute") or "/predict").strip()

            code_h, ms_h, snippet_h = _probe("GET", f"{base}{hr}")
            _log_status(f"HEALTH_PROBE path={hr} code={code_h} ms={ms_h} body_snippet={snippet_h[:120]}")
            #if code_h in (200, 204):
                #_INST["status"] = "READY"
            #else:
                #code_p, ms_p, _ = _probe("HEAD", f"{base}{pr}")
                #_log_status(f"PREDICT_PROBE path={pr} code={code_p} ms={ms_p}")
                #if code_p in (200, 204, 400, 405):
                   # _INST["status"] = "READY"

            # Final prompt URL (prefer IP; else proxy host)
            proute = _INST.get("predictRoute") or "/predict"
            if _INST.get("ip") and _INST.get("port"):
                prompt_url = f"http://{_INST['ip']}:{_INST['port']}{proute}"
            else:
                proxy_base = f"https://{pid}-{declared}.proxy.runpod.net" if declared else ""
                prompt_url = f"{proxy_base}{proute}" if proxy_base else ""
            if prompt_url:
                _log_status(f"PROMPT_ENDPOINT {prompt_url}")

    # Always include cached readiness data for the UI
    merged = {**last, "cachedState": {
        "podId": _INST.get("podId"),
        "status": _INST.get("status"),
        "ip": _INST.get("ip"),
        "port": _INST.get("port"),
        "predictRoute": _INST.get("predictRoute"),
        "healthRoute": _INST.get("healthRoute"),
    }}
    return JSONResponse(merged)
# ---------------------------------------------------------------------
# Start, Stop, End All — same as before
# ---------------------------------------------------------------------
@router.post("/api/compute/pods/{pod_id}/start")
def api_start_instance(pod_id:str):
    _log_id("START_USES",pod_id)
    try:
        r=requests.post(f"{_RP_BASE}/pods/{pod_id}/start",
                        headers=_rp_headers(),timeout=30)
        payload=_as_json(r)
        _log_status(f"START_RESPONSE {json.dumps(payload,ensure_ascii=False)}")
        return JSONResponse(payload, r.status_code)
    except Exception as e:
        _log_status(f"ERROR Start: {e}")
        return JSONResponse({"error":f"RunPod start failed: {e}"},500)

@router.delete("/api/compute/delete_instance")
async def api_delete_instance():
    pid = (_INST.get("podId") or "").strip()
    if not pid:
        return JSONResponse({"error": "pod_id missing and no cached pod found."}, status_code=400)
    _log_id("STOP_USES", pid)
    try:
        _log_delete(">>> STOP endpoint triggered")
        r = requests.post(f"{_RP_BASE}/pods/{pid}/stop", headers=_rp_headers(), timeout=60)
        payload = _as_json(r)
        _log_delete(f"STOP_RESPONSE {json.dumps(payload, ensure_ascii=False)}")
        return JSONResponse(status_code=r.status_code, content=payload)
    except Exception as e:
        _log_delete(f"ERROR Stop: {e}")
        return JSONResponse(status_code=500, content={"error": f"RunPod stop failed: {e}"})

@router.delete("/api/compute/end_all")
async def api_end_all():
    pid = (_INST.get("podId") or "").strip()
    if not pid:
        return JSONResponse({"error": "pod_id missing and no cached pod found."}, status_code=400)
    _log_id("DELETE_USES", pid)
    try:
        _log_delete(">>> END-ALL endpoint triggered")
        r = requests.delete(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=60)
        payload = _as_json(r)
        _log_delete(f"DELETE_RESPONSE {json.dumps(payload, ensure_ascii=False)}")
        if r.status_code in (200, 202, 204):
            _INST.update({"podId": "", "status": "", "ip": "", "port": ""})
            _save_state()
        return JSONResponse(status_code=r.status_code, content=payload)
    except Exception as e:
        _log_delete(f"ERROR Delete: {e}")
        return JSONResponse(status_code=500, content={"error": f"RunPod delete failed: {e}"})

# ---------------------------------------------------------------------
# Wait instance
# ---------------------------------------------------------------------
@router.get("/api/compute/wait_instance")
def api_wait_instance(pod_id: str = None):
    pid = (pod_id or _INST.get("podId") or "").strip()
    if not pid:
        return JSONResponse({"error": "pod_id missing."}, status_code=400)
    try:
        r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=30)
        last = _as_json(r)
        _log_status(f"WAIT_STATUS {json.dumps(last, ensure_ascii=False)}")
    except Exception as e:
        return JSONResponse({"error": f"wait poll failed: {e}"}, status_code=502)

    ip = last.get("publicIp") or _INST.get("ip")
    pm = last.get("portMappings") or {}
    port = None
    declared = None
    try:
        c = (((_INST.get("blob") or {}).get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
            or (_INST.get("blob") or {}).get("container") or {}
        declared = int((c.get("ports") or [])[0].get("containerPort"))
    except Exception:
        c = {}
        pass

    if ip and pm:
        try:
            if isinstance(declared, int) and str(declared) in pm:
                port = str(pm[str(declared)])
        except Exception:
            pass
        if not port and "8080" in pm:
            port = str(pm["8080"])
        elif not port and pm:
            port = str(pm[next(iter(pm.keys()))])

    if ip and port:
        base = f"http://{ip}:{port}"
        _log_status(f"RESOLVED_IP {base}")

        # --- NEW: health-first readiness (mirror Vertex), fallback to predict existence ---
        hr = (_INST.get("healthRoute") or "/health").strip()
        pr = (_INST.get("predictRoute") or "/predict").strip()

        code_h, ms_h, snippet_h = _probe("GET", f"{base}{hr}")
        _log_status(f"HEALTH_PROBE path={hr} code={code_h} ms={ms_h} body_snippet={snippet_h[:120]}")
       # if code_h in (200, 204):
           # _INST["status"] = "READY"
        #else:
            #code_p, ms_p, _ = _probe("HEAD", f"{base}{pr}")
            #_log_status(f"PREDICT_PROBE path={pr} code={code_p} ms={ms_p}")
            #if code_p in (200, 204, 400, 405):
                #_INST["status"] = "READY"

        if _INST.get("predictRoute"):
            _log_status(f"PROMPT_ENDPOINT {base}{_INST['predictRoute']}")

    try:
        cspec = _get_container_spec()
        internal, _ = _get_port_and_proto(cspec)
        if internal:
            proxy_base = f"https://{pid}-{internal}.proxy.runpod.net"
            _log_status(f"RESOLVED_PROXY {proxy_base}")
            _INST["base"] = proxy_base
            _save_state()
    except Exception:
        pass

    _INST.update({"ip": ip or "", "port": port or "", "status": last.get("desiredStatus", "")})
    _save_state()

    merged = {
        **last,
        "cachedState": {
            "podId": _INST.get("podId"),
            "status": _INST.get("status"),
            "ip": _INST.get("ip"),
            "port": _INST.get("port"),
            "base": _INST.get("base"),
            "predictRoute": _INST.get("predictRoute"),
            "healthRoute": _INST.get("healthRoute"),
        },
    }
    return JSONResponse(merged)
# ---------------------------------------------------------------------
# Debug: live probes against the instance (IP + Proxy)
# ---------------------------------------------------------------------
@router.get("/api/compute/debug/probes")
def api_debug_probes(pod_id: str = None):
    pid = (pod_id or _INST.get("podId") or "").strip()
    if not pid:
        return JSONResponse({"error": "pod_id missing."}, 400)

    # latest pod object (for portMappings/publicIp)
    try:
        r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=20)
        pod = _as_json(r)
        _log_status(f"DEBUG_POD_OBJ {json.dumps(pod, ensure_ascii=False)}")
    except Exception as e:
        return JSONResponse({"error": f"pod fetch failed: {e}"}, 502)

    ip = pod.get("publicIp") or _INST.get("ip")
    pm = pod.get("portMappings") or {}

    # choose internal/public ports
    internal = None
    try:
        cs = _get_container_spec()
        internal = int((cs.get("ports") or [])[0].get("containerPort"))
    except Exception:
        pass

    if internal and str(internal) in pm:
        public = str(pm[str(internal)])
    elif "8080" in pm:
        internal, public = 8080, str(pm["8080"])
    elif pm:
        k = next(iter(pm.keys()))
        internal, public = int(k), str(pm[k])
    else:
        public = None

    # candidate paths
    healths = [(_INST.get("healthRoute") or "").strip(), "/health", "/ping", "/healthz", "/v1/models"]
    healths = [p for p in healths if p]
    predicts = [(_INST.get("predictRoute") or "").strip(), "/generate", "/predict", "/predictions",
                "/v1/chat/completions", "/v1/models/model:predict"]
    predicts = [p for p in predicts if p]

    results = {"podId": pid, "ip": ip, "internalPort": internal, "publicPort": public, "probes": []}

    # base URLs (IP and proxy)
    bases = []
    if ip and public:
        bases.append(f"http://{ip}:{public}")
    if internal:
        bases.append(f"https://{pid}-{internal}.proxy.runpod.net")

    # probe health
    for base in bases:
        for hp in healths:
            code, ms, snippet = _probe("GET", f"{base}{hp}")
            _log_status(f"DEBUG_HEALTH base={base} path={hp} code={code} ms={ms}")
            results["probes"].append({"base": base, "path": hp, "kind": "health", "code": code, "ms": ms, "snippet": snippet})

    # probe predict (HEAD)
    for base in bases:
        for pp in predicts:
            code, ms, _ = _probe("HEAD", f"{base}{pp}")
            _log_status(f"DEBUG_PREDICT base={base} path={pp} code={code} ms={ms}")
            results["probes"].append({"base": base, "path": pp, "kind": "predict", "code": code, "ms": ms})

    return JSONResponse(results, 200)

# ---------------------------------------------------------------------
# Helper functions for containerSpec parsing
# ---------------------------------------------------------------------
def _get_container_spec():
    blob = _INST.get("blob")
    if not blob:
        lb = _load_local_blob()
        if lb:
            _ingest_blob(lb)
            blob = lb
    return (((blob.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec")
            or blob.get("container") or {})

def _get_port_and_proto(cspec: dict):
    try:
        ports = cspec.get("ports") or []
        if isinstance(ports, list) and ports:
            p0 = ports[0]
            internal = p0.get("containerPort")
            proto = (p0.get("protocol") or "").lower() or None
            return (int(internal) if str(internal).isdigit() else None, proto)
    except Exception:
        pass
    return (None, None)

def _build_proxy_url(route: str) -> str:
    pid = (_INST.get("podId") or "").strip()
    if not pid:
        raise HTTPException(status_code=400, detail="No podId in cache. Create/Start the instance first.")
    cspec = _get_container_spec()
    internal_port, _ = _get_port_and_proto(cspec)
    if not internal_port:
        raise HTTPException(status_code=400, detail="Cannot resolve internal port from containerSpec.ports[].")
    return f"https://{pid}-{internal_port}.proxy.runpod.net{route}"

def _build_ip_url(route: str) -> str:
    ip, port = _INST.get("ip"), _INST.get("port")
    if not (ip and port):
        raise HTTPException(status_code=400, detail="No running instance (ip/port missing).")
    return f"http://{ip}:{port}{route}"

def _resolve_infer_url(route: str) -> str:
    cspec = _get_container_spec()
    _, proto = _get_port_and_proto(cspec)
    try:
        if _INST.get("ip") and _INST.get("port"):
            url = _build_ip_url(route)
            _job_log("compute", f"[MW] Using IP path: {url}")
            return url
    except HTTPException:
        pass
    if proto == "http" or True:
        url = _build_proxy_url(route)
        _job_log("compute", f"[MW] Using Proxy path: {url}")
        return url
    return _build_ip_url(route)

# ---------------------------------------------------------------------
# /api/infer — updated to use resolver
# ---------------------------------------------------------------------
@router.post("/api/infer")
async def api_infer(req: Request):
    route = _INST.get("predictRoute")
    if not route:
        return JSONResponse(
            {"error": "predictRoute unresolved; check ROUTE_PROBE logs and HEALTH results."},
            status_code=428
        )
    body = await req.json()
    try:
        url = _resolve_infer_url(route)
        r = requests.post(url, json=body, timeout=120)
        ct = (r.headers.get("content-type") or "").lower()
        if "application/json" in ct:
            return JSONResponse(status_code=r.status_code, content=r.json())
        return HTMLResponse(status_code=r.status_code, content=r.text)
    except HTTPException as he:
        return JSONResponse({"error": he.detail}, status_code=he.status_code)
    except Exception as e:
        return JSONResponse({"error": f"inference request failed: {e}"}, status_code=502)

# ---------------------------------------------------------------------
# /api/middleware/infer — middleware prompt routing and normalization
# ---------------------------------------------------------------------
@router.post("/api/middleware/infer")
async def api_middleware_infer(req: Request):
    # Always ensure predictRoute exists
    route = _INST.get("predictRoute") or "/predict"
    _INST["predictRoute"] = route

    # Build deterministic proxy URL instead of waiting on readiness
    pid = (_INST.get("podId") or "").strip()
    if not pid:
        try:
            _load_state()
            pid = (_INST.get("podId") or "").strip()
        except Exception:
            pass
    if not pid:
        return JSONResponse({"error": "no podId yet (create/start first)"}, status_code=400)

    cspec = _get_container_spec()
    internal, _ = _get_port_and_proto(cspec)
    if not internal:
        return JSONResponse({"error": "cannot resolve internal port from blob"}, status_code=400)

    # ---------------- NEW: routing override for HF_TASK=text-to-image ----------------
    env = (cspec.get("env") or [])
    kv = {e.get("name"): e.get("value") for e in env if isinstance(e, dict) and e.get("name")}
    hf_task = (kv.get("HF_TASK") or "").strip().lower()
    model_id = (kv.get("MODEL_ID") or kv.get("HF_MODEL_ID") or "").strip()

    if hf_task == "text-to-image" and model_id:
        route = f"/predictions/{model_id}"
        _INST["predictRoute"] = route
    # -------------------------------------------------------------------------------

    base = f"https://{pid}-{internal}.proxy.runpod.net"
    url = f"{base}{route}"
    _log_status(f"PROMPT_ENDPOINT {url}")
    _job_log("compute", f"[MW] Forwarding infer to {url}")

    payload = await req.json()
    prompt = payload.get("prompt")
    if not isinstance(prompt, str) or not prompt.strip():
        return JSONResponse({"error": "Missing 'prompt' in request body."}, 400)

    # ---------------- NEW: canonical text-to-image payload ----------------
    if hf_task == "text-to-image":
        body = {
            "inputs": prompt,
            "parameters": {
                "num_inference_steps": 30,
                "guidance_scale": 7.5,
                "width": 1024,
                "height": 1024
            }
        }
        try:
            rp = requests.post(url, json=body, timeout=120)
            _log_status(f"PREDICT_RESP code={rp.status_code} len={len(rp.text)}")
            if rp.ok:
                ct = (rp.headers.get("content-type") or "").lower()
                data = _as_json(rp) if "application/json" in ct else {"_raw": rp.text}
                if isinstance(data, dict) and "image_b64" in data:
                    return JSONResponse({"image_b64": data["image_b64"], "timings": data.get("timings")}, rp.status_code)
                return JSONResponse(data, rp.status_code)
            return JSONResponse({"error": rp.text[:400]}, status_code=rp.status_code)
        except Exception as e:
            _log_status(f"PREDICT_ERR {e}")
            return JSONResponse({"error": f"inference request failed: {e}"}, status_code=502)
    # ---------------------------------------------------------------------

    # HF text-classification shim
    img = (_get_container_spec().get("imageUri","")).lower()
    if "huggingface-pytorch-inference" in img and isinstance(payload.get("prompt"), str):
        payload = {"instances": [payload["prompt"]]}

    # Non-image fallback (unchanged)
    bodies = [payload, {"prompt": prompt}, {"text": prompt}, {"inputs": prompt}, {"input": prompt}]
    for body in bodies:
        try:
            rp = requests.post(url, json=body, timeout=120)
            _log_status(f"PREDICT_RESP code={rp.status_code} len={len(rp.text)}")
            if rp.ok:
                ct = (rp.headers.get("content-type") or "").lower()
                data = _as_json(rp) if "application/json" in ct else {"_raw": rp.text}
                if isinstance(data, dict):
                    if "image_b64" in data:
                        return JSONResponse({"image_b64": data["image_b64"], "timings": data.get("timings")}, rp.status_code)
                    if isinstance(data.get("output"), str):
                        return JSONResponse({"output": data["output"]}, rp.status_code)
                    if "_raw" in data:
                        return JSONResponse({"output": data["_raw"]}, rp.status_code)
                    return JSONResponse({"output": json.dumps(data, ensure_ascii=False)}, rp.status_code)
                return JSONResponse({"output": str(data)}, rp.status_code)
        except Exception as e:
            _log_status(f"PREDICT_ERR {e}")

    # Fallthrough: show last response or generic error
    try:
        return JSONResponse({"error": rp.text[:400]}, status_code=rp.status_code)
    except Exception:
        return JSONResponse({"error": "no response from model"}, status_code=504)
# ---------------------------------------------------------------------
# Job progress + callback routes
# ---------------------------------------------------------------------
@router.post("/api/job/ready")
async def api_job_ready(req: Request):
    return JSONResponse({"ok": True})

@router.post("/api/job/progress")
async def api_job_progress(req: Request):
    data = await req.json()
    job_id = str(data.get("job_id", "unknown"))
    msg = data.get("message", "")
    _job_log(job_id, msg or "progress")
    return JSONResponse({"ok": True})

@router.post("/api/job/done")
async def api_job_done(req: Request):
    data = await req.json()
    job_id = str(data.get("job_id", "unknown"))
    j = _JOBS.setdefault(job_id, {"status": "created", "logs": [], "image_b64": None, "timings": {}})
    j["status"] = "done"
    j["image_b64"] = data.get("image_b64")
    j["timings"] = data.get("timings", {})
    _job_log(job_id, "completed")
    return JSONResponse({"ok": True})

@router.get("/api/job/status")
def api_job_status(job_id: str):
    return JSONResponse(_JOBS.get(job_id, {"status": "unknown"}))