Spaces:

Theflame47
/

RunPodRun

Sleeping

App Files Files Community

Theflame47 commited on Nov 4, 2025

Commit

9dd3a51

verified ·

1 Parent(s): e1eefef

Update Deployment_UI_BE.py

Browse files

Files changed (1) hide show

Deployment_UI_BE.py +838 -239

Deployment_UI_BE.py CHANGED Viewed

@@ -1,244 +1,843 @@
-# Deployment_UI.py — UI-only (binds to /api/* provided by Deployment_UI_BE.py)
-from fastapi import APIRouter
-from fastapi.responses import HTMLResponse
 router = APIRouter()
-@router.get("/Deployment_UI", response_class=HTMLResponse)
-def deployment_ui_page():
-    html_head = """
-<!doctype html>
-<html><head><meta charset="utf-8"/><title>Deployment UI</title>
-<style>
-  body { font-family: system-ui, sans-serif; margin: 24px; display: flex; flex-direction: column; height: 90vh; }
-  a.button { display:inline-block; padding:8px 14px; margin-bottom:10px; border:1px solid #ccc; border-radius:6px; text-decoration:none; color:#000; background:#f7f7f7; }
-  a.button:hover { background:#eee; }
-  #chat-window { flex:1; border:1px solid #ccc; border-radius:8px; padding:12px; overflow-y:auto; background:#fafafa; margin-bottom:10px; }
-  #input-area { display:flex; gap:8px; margin-bottom:10px; }
-  #user-input { flex:1; padding:10px; border:1px solid #ccc; border-radius:8px; }
-  #send-btn { padding:10px 16px; border:none; border-radius:8px; background:#0078d7; color:#fff; cursor:pointer; }
-  #send-btn:hover { background:#005fa3; }
-  #control-bar { display:flex; gap:8px; margin:0 0 10px; }
-  #start-btn, #stop-btn, #endall-btn { padding:8px 14px; border:1px solid #ccc; border-radius:8px; background:#f7f7f7; cursor:pointer; }
-  #start-btn:hover, #stop-btn:hover, #endall-btn:hover { background:#eee; }
-  #log-toggle { cursor:pointer; font-size:14px; color:#0078d7; text-decoration:underline; margin-bottom:6px; align-self:flex-start; }
-  #logs { border:1px dashed #bbb; border-radius:8px; padding:10px; font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace; font-size:12px; background:#fff; max-height:220px; overflow-y:auto; transition:max-height .25s ease, opacity .25s ease; opacity:1; }
-  #logs.collapsed { max-height:0; padding:0; opacity:0; overflow:hidden; border:none; }
-  .log-line { margin:0 0 6px; white-space:pre-wrap; }
-  .log-raw { color:#333; } .log-ok { color:#2d7c2d; } .log-err { color:#9d1c1c; }
-  #results { border:1px solid #ddd; border-radius:8px; padding:10px; background:#fff; margin-top:10px; }
-  .result { margin:8px 0; }
-  .thumb { max-width: 420px; border:1px solid #ccc; border-radius:6px; }
-  .meta { font-size:12px; color:#555; margin-top:4px; }
-  .download { display:inline-block; margin-top:6px; }
-  /* loading mask */
-  #boot-mask{position:fixed;inset:0;background:rgba(255,255,255,.85);display:none;
-    align-items:center;justify-content:center;flex-direction:column;z-index:9999}
-  #boot-msg{margin-top:12px;color:#000;font-weight:600}
-  .spinner{width:36px;height:36px;border:3px solid #ddd;border-top-color:#0078d7;border-radius:50%;
-    animation:spin 1s linear infinite}
-  @keyframes spin{to{transform:rotate(360deg)}}
-</style>
-</head>
-<body>
-  <a href="/trythis" class="button">← Back</a>
-  <div id="chat-window"></div>
-  <div id="input-area">
-    <input type="text" id="user-input" placeholder="Describe an image to generate…" />
-    <button id="send-btn">Send</button>
-  </div>
-  <div id="control-bar">
-    <button id="start-btn">Create inst.</button>
-    <button id="stop-btn">Stop</button>
-    <button id="endall-btn">End All</button>
-  </div>
-  <div id="log-toggle">Hide Logs</div>
-  <div id="logs" aria-live="polite"></div>
-  <div id="results"></div>
-  <div id="boot-mask" aria-live="polite" role="status">
-    <div class="spinner"></div>
-    <div id="boot-msg">Starting…</div>
-  </div>
-<script>
-"""
-    html_tail = """
-  const logs = document.getElementById('logs');
-  const toggleBtn = document.getElementById('log-toggle');
-  const startBtn = document.getElementById('start-btn');
-  const stopBtn = document.getElementById('stop-btn');
-  const endAllBtn = document.getElementById('endall-btn');
-  const sendBtn = document.getElementById('send-btn');
-  const userInput = document.getElementById('user-input');
-  const results = document.getElementById('results');
-  const chat = document.getElementById('chat-window');
-  const bootMask = document.getElementById('boot-mask');
-  const bootMsg  = document.getElementById('boot-msg');
-  let running = false, ready = false;
-  let MODEL_BASE = null, PREDICT_ROUTE = null;
-  // NEW: auto-ingest blob from FE query param on page load
-  (async () => {
-    try {
-      const u = new URL(window.location.href);
-      const blobUrl = u.searchParams.get('blob_url') || '/modelblob.json';
-      const r = await fetch(`/api/ingest/from_landing?blob_url=${encodeURIComponent(blobUrl)}`, { method: 'POST' });
-      const t = await r.text();
-      console.log('ingest-from-landing', r.status, t.slice(0,200));
-    } catch (e) {
-      console.warn('ingest bootstrap failed:', e);
-    }
-  })();
-  function showBoot(msg){ bootMsg.textContent = msg || 'Starting…'; bootMask.style.display = 'flex'; }
-  function setBoot(msg){  bootMsg.textContent = msg || 'Working…'; }
-  function hideBoot(){    bootMask.style.display = 'none'; }
-  function log(msg, cls='log-raw') {
-    const line = document.createElement('div');
-    line.className = 'log-line ' + cls;
-    line.textContent = String(msg);
-    logs.appendChild(line);
-    logs.scrollTop = logs.scrollHeight;
-  }
-  toggleBtn.addEventListener('click', () => {
-    const collapsed = logs.classList.toggle('collapsed');
-    toggleBtn.textContent = collapsed ? "Show Logs" : "Hide Logs";
-  });
-  async function post(path, payload) {
-    const r = await fetch(path, { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify(payload || {}) });
-    const text = await r.text();
-    log(text, r.ok ? 'log-ok' : 'log-err');
-    try { return { ok: r.ok, json: JSON.parse(text) }; } catch { return { ok: r.ok, json: { _raw: text } }; }
-  }
-  async function del(path) {
-    const r = await fetch(path, { method: 'DELETE' });
-    const text = await r.text();
-    log(text, r.ok ? 'log-ok' : 'log-err');
-    try { return { ok: r.ok, json: JSON.parse(text) }; } catch { return { ok: r.ok, json: { _raw: text } }; }
-  }
-  // create + wait for true readiness (isReady)
-  async function begin() {
-    if (running) return;
-    running = true; ready = false;
-    showBoot('Creating instance…');
-    const create = await post('/api/compute/create_instance');
-    if (!create.ok) { running = false; hideBoot(); return; }
-    const ok = await ensureReady(true);
-    hideBoot();
-  }
-  async function stopOnce() {
-    await del('/api/compute/delete_instance');
-    hideBoot(); setBoot(''); ready = false; running = false;
-  }
-  async function endAll() {
-    await stopOnce();
-    MODEL_BASE = null; PREDICT_ROUTE = null;
-  }
-  function appendResult(job_id, b64, timings) {
-    const div = document.createElement('div'); div.className = 'result';
-    const img = document.createElement('img'); img.className = 'thumb'; img.src = 'data:image/png;base64,' + b64;
-    const a = document.createElement('a'); a.className = 'download'; a.textContent = 'Download'; a.href = img.src; a.download = `job_${job_id}.png`;
-    const meta = document.createElement('div'); meta.className = 'meta'; meta.textContent = `job ${job_id} | ${JSON.stringify(timings || {})}`;
-    div.append(img, document.createTextNode(' '), a, meta); results.prepend(div);
-  }
-  // chat helpers
-  function escapeHtml(s){ return String(s).replace(/[&<>"']/g, m => ({ '&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;' }[m])); }
-  function addBlock(sender, html) {
-    const wrap = document.createElement('div');
-    wrap.style.margin = '8px 0';
-    wrap.innerHTML = `<div class="meta"><strong>${sender}:</strong></div><div>${html}</div>`;
-    chat.appendChild(wrap);
-    chat.scrollTop = chat.scrollHeight;
-    return wrap;
-  }
-  function addUser(text)   { return addBlock('You',  `<div>${escapeHtml(text)}</div>`); }
-  function addModel(text)  { return addBlock('Model', `<div>${escapeHtml(text)}</div>`); }
-  function addModelImg(b64){
-    const wrap = addBlock('Model', '');
-    const img = document.createElement('img');
-    img.className = 'thumb';
-    img.src = 'data:image/png;base64,' + b64;
-    wrap.lastElementChild.appendChild(img);
-    return wrap;
-  }
-  function addLoader() {
-    const wrap = addBlock('Model', `<div id="spinner" style="display:inline-block">Loading…</div>`);
-    return wrap;
-  }
-  function looksBase64(s){ return /^[A-Za-z0-9+/=\\s]+$/.test(s||'') && String(s||'').length > 100; }
-  // poll for true readiness provided by BE via cachedState.isReady
-  async function ensureReady(verbose=false) {
-    for (let i = 0; i < 60; i++) {
-      const r = await fetch('/api/compute/wait_instance');
-      const j = await r.json();
-      const cs = j.cachedState || {};
-      const status = (cs.status || '').toUpperCase();
-      if (verbose) {
-        if (cs.isReady === true) setBoot('Ready');
-        else if (status === 'RUNNING' && cs.base) setBoot('Warming model…');
-        else setBoot(`Status: ${status || '…'}`);
-      }
-      if (cs.base && cs.predictRoute && cs.isReady === true) {
-        MODEL_BASE = cs.base;
-        PREDICT_ROUTE = cs.predictRoute.startsWith('/') ? cs.predictRoute : `/${cs.predictRoute}`;
-        log(`PROMPT_ENDPOINT ${MODEL_BASE}${PREDICT_ROUTE}`, 'log-ok');
-        ready = true;
-        return true;
-      }
-      log(`READY_POLL base=${cs.base ? 'yes' : 'no'} route=${cs.predictRoute || ''} isReady=${cs.isReady === true} status=${status}`, 'log-raw');
-      await new Promise(res => setTimeout(res, 1000));
     }
-    ready = false;
-    return false;
-  }
-  // backend hop for prompts
-  async function sendViaBackend(prompt) {
-    const r = await fetch('/api/middleware/infer', {
-      method: 'POST',
-      headers: { 'content-type': 'application/json' },
-      body: JSON.stringify({ prompt })
-    });
-    const text = await r.text();
-    log(`POST /api/middleware/infer → ${r.status}`, r.ok ? 'log-ok' : 'log-err');
-    try { return { ok: r.ok, json: JSON.parse(text) }; } catch { return { ok: r.ok, json: { _raw: text } }; }
-  }
-  async function sendMessage() {
-    const prompt = (userInput.value || '').trim();
-    if (!prompt) return;
-    if (!ready) { addModel('Instance not ready yet. Try Start, or wait a moment.'); return; }
-    addUser(prompt);
-    const loader = addLoader();
-    userInput.disabled = true;
-    try {
-      const { ok, json } = await sendViaBackend(prompt);
-      loader.remove();
-      if (!ok && json?.error) { addModel(`Error: ${json.error}`); return; }
-      if (json?.image_b64) {
-        addModelImg(json.image_b64);
-        if (json.timings) appendResult(String(Date.now()), json.image_b64, json.timings);
-      } else if (typeof json?.output === 'string' && looksBase64(json.output)) {
-        addModelImg(json.output);
-      } else if (typeof json?.output === 'string') {
-        addModel(json.output);
-      } else if (json?._raw) {
-        addModel(json._raw);
-      } else {
-        addModel(JSON.stringify(json || {}, null, 2));
-      }
-    } catch (e) {
-      loader.remove();
-      addModel(`Error: ${String(e)}`);
-    } finally {
-      userInput.disabled = false; userInput.value = '';
-      userInput.focus();
     }
-  }
-  document.getElementById('send-btn').addEventListener('click', sendMessage);
-  document.getElementById('user-input').addEventListener('keypress', (e) => { if (e.key === 'Enter') { e.preventDefault(); sendMessage(); } });
-  document.getElementById('start-btn').addEventListener('click', begin);
-  document.getElementById('stop-btn').addEventListener('click', stopOnce);
-  document.getElementById('endall-btn').addEventListener('click', endAll);
-  (function init(){})();
-</script>
-</body></html>
-"""
-    return HTMLResponse(content=html_head + html_tail)

+from fastapi import APIRouter, HTTPException, Request, Form
+from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
+import os, json, requests, time, re, pathlib
 router = APIRouter()
+# ---------------------------------------------------------------------
+# In-memory job + instance store
+# ---------------------------------------------------------------------
+_JOBS = {}
+_INST = {
+    "podId": "", "status": "", "ip": "", "port": "",
+    "blob": None, "model_id": "", "container_image_hint": "",
+    "predictRoute": None, "healthRoute": None,
+    "readinessRoute": None, "livenessRoute": None,
+}
+def _now_ms(): return int(time.time() * 1000)
+def _job_log(job_id, msg):
+    j = _JOBS.setdefault(job_id, {"status":"created","logs":[],
+                                  "image_b64":None,"timings":{}})
+    j["logs"].append({"t":_now_ms(),"msg":msg})
+    print(f"[{job_id}] {msg}", flush=True)
+def _log_create(msg): _job_log("compute", f"[CREATE] {msg}")
+def _log_status(msg): _job_log("compute", f"[STATUS] {msg}")
+def _log_delete(msg): _job_log("compute", f"[DELETE] {msg}")
+def _log_id(prefix, pid): _job_log("compute", f"{prefix} ID: {pid}")
+# --- local blob ingest (landing page only) ---
+_LOCAL_BLOB_PATH = os.getenv("MODEL_BLOB_PATH", "model_blob.json")
+def _load_local_blob():
+    try:
+        if os.path.exists(_LOCAL_BLOB_PATH):
+            with open(_LOCAL_BLOB_PATH, "r", encoding="utf-8") as f:
+                return json.load(f)
+    except Exception as e:
+        _job_log("compute", f"ERROR LocalBlobLoad: {e}")
+    return None
+def _ingest_blob(parsed: dict, model_id_hint: str = "", container_image_hint: str = ""):
+    if not isinstance(parsed, dict):
+        raise HTTPException(400, "Invalid blob (expected JSON object).")
+    _INST.update({
+        "blob": parsed,
+        "model_id": model_id_hint or "",
+        "container_image_hint": container_image_hint or "",
+    })
+    c = (((parsed.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec")
+         or parsed.get("container") or {}) or {}
+    for k in ("predictRoute", "healthRoute", "readinessRoute", "livenessRoute"):
+        v = c.get(k)
+        if isinstance(v, str) and v.strip():
+            _INST[k] = v.strip()
+    image_uri = (c.get("imageUri") or "").strip().lower()
+    pr, hr = _infer_routes_from_image(image_uri)
+    if pr and not _INST.get("predictRoute"):
+        _INST["predictRoute"] = pr
+    if hr and not _INST.get("healthRoute"):
+        _INST["healthRoute"] = hr
+    return True
+# ---------------------------------------------------------------------
+# Disk persistence for recovery
+# ---------------------------------------------------------------------
+_STATE_PATH = "/tmp/pod_state.json"
+def _save_state():
+    try:
+        pathlib.Path("/tmp").mkdir(parents=True, exist_ok=True)
+        with open(_STATE_PATH, "w") as f:
+            json.dump({k:_INST.get(k,"") for k in
+                      ("podId","status","ip","port")}, f)
+    except Exception as e:
+        _job_log("compute", f"ERROR SaveState: {e}")
+def _load_state():
+    try:
+        if os.path.exists(_STATE_PATH):
+            with open(_STATE_PATH) as f: d = json.load(f)
+            for k in ("podId","status","ip","port"):
+                if k in d: _INST[k]=d[k]
+    except Exception as e:
+        _job_log("compute", f"ERROR LoadState: {e}")
+# ---------------------------------------------------------------------
+# RunPod helpers
+# ---------------------------------------------------------------------
+_RP_BASE = "https://rest.runpod.io/v1"
+def _rp_headers():
+    key=os.getenv("RunPod","").strip()
+    if not key:
+        raise HTTPException(500,"Missing RunPod API key (env var 'RunPod').")
+    return {"Authorization":f"Bearer {key}","Content-Type":"application/json"}
+def _as_json(r):
+    c=(r.headers.get("content-type") or "").lower()
+    if "json" in c:
+        try: return r.json()
+        except Exception: return {"_raw":r.text}
+    return {"_raw":r.text}
+# ---------------------------------------------------------------------
+# Probes and route discovery (new)
+# ---------------------------------------------------------------------
+def _probe(method: str, url: str, timeout=5):
+    t0 = time.time()
+    try:
+        resp = requests.request(method, url, timeout=timeout)
+        ms = int((time.time() - t0) * 1000)
+        return resp.status_code, ms, (resp.text[:200] if resp.text else "")
+    except Exception as e:
+        return -1, int((time.time() - t0) * 1000), str(e)
+_HEALTH_PATHS = ["/health", "/ping", "/healthz", "/v1/models"]
+_POSSIBLE_ROUTES = ["/generate", "/predict", "/predictions",
+                    "/v1/chat/completions", "/v1/models/model:predict"]
+def _infer_routes_from_image(image_uri: str):
+    iu = (image_uri or "").lower()
+    if "vllm-serve" in iu:
+        return ("/generate", "/ping")
+    if "hf-inference-toolkit" in iu or "huggingface-pytorch-inference" in iu:
+        return ("/predict", "/ping")
+    return (None, None)
+def _discover_route(base_url: str):
+    for path in _POSSIBLE_ROUTES:
+        code, ms, _ = _probe("HEAD", f"{base_url}{path}")
+        _log_status(f"ROUTE_PROBE path={path} code={code} ms={ms}")
+        if code in (200, 204, 405):  # exists (405 = method not allowed but present)
+            return path
+    return None
+# ---------------------------------------------------------------------
+# Blob ingest via Model Blob page JSON (with blob_url override)
+# ---------------------------------------------------------------------
+_HF_SPACE_PORT = os.getenv("PORT", "7860")
+_LOCAL_BASE = f"http://127.0.0.1:{_HF_SPACE_PORT}"
+def _fetch_url(u: str):
+    try:
+        r = requests.get(u, timeout=8)
+        if r.ok:
+            return r.json()
+        _job_log("compute", f"ERROR BlobFetch code={r.status_code} url={u} body={r.text[:200]}")
+    except Exception as e:
+        _job_log("compute", f"ERROR BlobFetch url={u}: {e}")
+    return None
+def _fetch_blob_from_page():
+    return _fetch_url(f"{_LOCAL_BASE}/modelblob.json")
+@router.post("/api/ingest/from_landing")
+def api_ingest_from_landing(blob_url: str | None = None):
+    """
+    Ingest the deployment blob for downstream use.
+    Source priority:
+      1) explicit blob_url provided by FE (e.g., /modelblob.json)
+      2) local modelblob page JSON outlet
+    """
+    parsed = _fetch_url(blob_url) if blob_url else _fetch_blob_from_page()
+    if not parsed:
+        return JSONResponse({"error": "Blob not available"}, 404)
+    _ingest_blob(parsed, model_id_hint="", container_image_hint="")
+    return JSONResponse({"ok": True, "source": blob_url or "/modelblob.json"})
+# (Optional compatibility: UI posting to /Deployment_UI; accepts blob_url via query)
+@router.post("/Deployment_UI")
+async def deployment_ui_ingest(request: Request,
+                               model_id: str = Form(""),
+                               container_image: str = Form(""),
+                               blob: str = Form("")):
+    """
+    Legacy entry used by the Deployment UI page.
+    Prefers blob_url from query string; falls back to the modelblob page JSON.
+    """
+    blob_url = request.query_params.get("blob_url")
+    parsed = _fetch_url(blob_url) if blob_url else _fetch_blob_from_page()
+    if not parsed:
+        return HTMLResponse("<pre>Missing blob (no /modelblob.json and no blob_url)</pre>", 400)
+    _ingest_blob(parsed, model_id_hint=model_id, container_image_hint=container_image)
+    return RedirectResponse("/Deployment_UI", 303)
+# ---------------------------------------------------------------------
+# Create instance
+# ---------------------------------------------------------------------
+@router.post("/api/compute/create_instance")
+async def api_create_instance(req: Request):
+    # Ensure blob is present (lazy-load from landing file if needed)
+    if not _INST.get("blob"):
+        lb = _load_local_blob()
+        if lb:
+            _ingest_blob(lb)
+    blob = _INST.get("blob")
+    if not blob:
+        return JSONResponse({"error": "No deployment blob provided."}, 400)
+    c = ((blob.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
+        or blob.get("container")
+    if not isinstance(c, dict) or not c:
+        return JSONResponse({"error": "Blob missing containerSpec."}, 400)
+    image = (c.get("imageUri") or "").strip()
+    if not image:
+        return JSONResponse({"error": "containerSpec.imageUri missing."}, 400)
+    _log_create(f"imageName: {image}")
+    env_list = c.get("env") or []
+    env_obj = {e.get("name"): e.get("value") for e in env_list
+               if isinstance(e, dict) and e.get("name")}
+    _log_create(f"env: {json.dumps(env_obj, ensure_ascii=False)}")
+    ports_list = c.get("ports") or []
+    rp_ports = []
+    for p in ports_list:
+        if isinstance(p, dict):
+            cp = p.get("containerPort")
+            proto = (p.get("protocol") or "http").lower()
+            if proto not in ("http", "tcp"):
+                proto = "http"
+            if isinstance(cp, int):
+                rp_ports.append(f"{cp}/{proto}")
+    if not rp_ports:
+        return JSONResponse({"error": "ports[].containerPort required."}, 400)
+    _log_create(f"ports: {rp_ports}")
+    command = c.get("command") if isinstance(c.get("command"), list) else None
+    args = c.get("args") if isinstance(c.get("args"), list) else None
+    if command: _log_create(f"command: {command}")
+    if args: _log_create(f"args: {args}")
+    # GPU normalization (enum -> pretty string); include only if non-empty
+    dr = c.get("dedicatedResources") or {}
+    gpu_ids = None
+    gpu_count = 1
+    if isinstance(dr, dict):
+        typ = (dr.get("machineSpec", {}) or {}).get("acceleratorType")
+        cnt = (dr.get("machineSpec", {}) or {}).get("acceleratorCount")
+        if typ: gpu_ids = [typ] if isinstance(typ, str) else typ
+        if isinstance(cnt, int) and cnt > 0: gpu_count = cnt
+    def _normalize_gpu_enum(s: str) -> str:
+        if not isinstance(s, str) or not s.strip():
+            return ""
+        t = s.strip().upper().replace("_", " ")
+        vendor = "NVIDIA"
+        if t.startswith("NVIDIA "):
+            t = t[len("NVIDIA "):]
+        elif t.startswith("AMD "):
+            vendor = "AMD"; t = t[len("AMD "):]
+        t = re.sub(r"(\d)(GB\b)", r"\1 \2", t)  # 80GB -> 80 GB
+        return f"{vendor} {t}".strip()
+    rp_gpu = None
+    if gpu_ids:
+        rp_gpu = _normalize_gpu_enum(gpu_ids[0]).strip() or None
+        _log_create(f"GPU_TRANSLATION original={gpu_ids[0]} -> runpod='{rp_gpu}'")
+    _log_create("SECURE_PLACEMENT interruptible=false")
+    payload = {
+        "name": re.sub(r"[^a-z0-9-]", "-", f"ephemeral-{int(time.time())}".lower()),
+        "computeType": "GPU",
+        "interruptible": False,              # On-Demand (not community)
+        "imageName": image,
+        "gpuCount": gpu_count,
+        "ports": rp_ports,
+        "supportPublicIp": True,
+        **({"gpuTypeIds": [rp_gpu]} if rp_gpu else {}),
+        **({"env": env_obj} if env_obj else {}),
+        **({"command": command} if command else {}),
+        **({"args": args} if args else {}),
     }
+    _log_create(f"PAYLOAD_SENT {json.dumps(payload, ensure_ascii=False)}")
+    content = {}
+    pid = None
+    try:
+        r = requests.post(f"{_RP_BASE}/pods", headers=_rp_headers(), json=payload, timeout=60)
+        content = _as_json(r)
+        _log_create(f"RUNPOD_RESPONSE {json.dumps(content, ensure_ascii=False)}")
+        pid = content.get("id")
+        if not pid and isinstance(content, dict):
+            for v in content.values():
+                if isinstance(v, dict) and "id" in v:
+                    pid = v["id"]; break
+    except Exception as e:
+        _log_create(f"ERROR Create: {e}")
+        return JSONResponse({"error": f"RunPod create failed: {e}"}, 500)
+    _log_create(f"ID: {pid}")
+    if not isinstance(r, requests.Response):
+        return JSONResponse({"error": "No HTTP response from RunPod create."}, 502)
+    if not r.ok:
+        return JSONResponse(content if isinstance(content, dict) else {"_raw": str(content)}, r.status_code)
+    if not pid:
+        return JSONResponse({"error": "Create succeeded but no pod ID in response.", "raw": content}, 502)
+    # cache pod id
+    try:
+        _INST["podId"] = str(pid).strip()
+        _log_id("CREATE_SET", _INST["podId"])
+        _save_state()
+    except Exception as e:
+        return JSONResponse({"error": f"Could not cache pod ID: {e}"}, 502)
+    # start the pod immediately so networking/IP can come up
+    try:
+        sr = requests.post(f"{_RP_BASE}/pods/{_INST['podId']}/start", headers=_rp_headers(), timeout=30)
+        scontent = _as_json(sr)
+        _log_status(f"START_RESPONSE {json.dumps(scontent, ensure_ascii=False)}")
+    except Exception as e:
+        _log_status(f"ERROR Start: {e}")
+    # initial status snapshot
+    try:
+        rs = requests.get(f"{_RP_BASE}/pods/{_INST['podId']}", headers=_rp_headers(), timeout=30)
+        st = _as_json(rs)
+        _log_status(f"STATUS_POLL {json.dumps(st, ensure_ascii=False)}")
+        content["_status"] = st
+    except Exception as e:
+        content["_status_error"] = str(e)
+        _log_status(f"ERROR Status: {e}")
+    _INST["status"] = content.get("desiredStatus") or content.get("status") or ""
+    _INST["ip"] = _INST.get("ip") or ""
+    _INST["port"] = _INST.get("port") or ""
+    return JSONResponse(content, r.status_code)
+    # ---------------------------------------------------------------------
+# Poll / read instance status + explicit readiness fields
+# ---------------------------------------------------------------------
+@router.get("/api/compute/pods/{pod_id}")
+def api_get_instance(pod_id: str = None):
+    pid = (pod_id or _INST.get("podId") or "").strip()
+    if not pid:
+        return JSONResponse({"error": "pod_id missing."}, 400)
+    _log_id("STATUS_USES", pid)
+    try:
+        r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=30)
+        last = _as_json(r)
+        _log_status(f"STATUS_POLL {json.dumps(last, ensure_ascii=False)}")
+    except Exception as e:
+        return JSONResponse({"error": f"poll failed: {e}"}, 502)
+    declared = None
+    try:
+        c = (((_INST.get("blob") or {}).get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
+            or (_INST.get("blob") or {}).get("container") or {}
+        declared = int((c.get("ports") or [])[0].get("containerPort"))
+    except Exception:
+        c = {}
+        pass
+    if isinstance(last, dict):
+        ip = last.get("publicIp") or ""
+        pm = last.get("portMappings") or {}
+        if ip and isinstance(pm, dict) and pm:
+            # choose mapped public port for the declared internal port; else first mapping
+            chosen = None
+            if isinstance(declared, int) and str(declared) in pm:
+                chosen = str(pm[str(declared)])
+            else:
+                k = next(iter(pm.keys()))
+                chosen = str(pm[k])
+                _log_status(f"PORT_MAPPING declared={declared} not_found_using_first key={k}")
+            _INST.update({"podId": pid,
+                          "status": last.get("desiredStatus", ""),
+                          "ip": ip,
+                          "port": chosen})
+            _save_state()
+            base = f"http://{_INST['ip']}:{_INST['port']}"
+            _log_status(f"PORT_MAPPING declared={declared} chosen={chosen} all={pm}")
+            _log_status(f"RESOLVED_ENDPOINT base={base}")
+            # health + route discovery (no 8080 fallback, no proxy)
+            code, ms, snippet = _probe("GET", f"{base}/health")
+            _log_status(f"HEALTH code={code} ms={ms} body_snippet={snippet}")
+            # --- Vertex-mirrored route + final prompt URL log ---
+            pr_explicit = (c.get("predictRoute") or "").strip() if isinstance(c, dict) else ""
+            if pr_explicit:
+                _INST["predictRoute"] = pr_explicit
+                _log_status(f"PREDICT_ROUTE_SET {pr_explicit} (from blob)")
+            elif not _INST.get("predictRoute"):
+                _INST["predictRoute"] = "/predict"
+                _log_status("PREDICT_ROUTE_SET /predict (Vertex default)")
+            # If still unset for some reason, try lightweight discovery once
+            if not _INST.get("predictRoute"):
+                route = _discover_route(base)
+                if route:
+                    _INST["predictRoute"] = route
+                    _log_status(f"PREDICT_ROUTE_SET {route} (discovered)")
+                else:
+                    _log_status("PREDICT_ROUTE_SET none")
+            # Final prompt URL (prefer IP; else proxy host)
+            pr = _INST.get("predictRoute") or "/predict"
+            if _INST.get("ip") and _INST.get("port"):
+                prompt_url = f"http://{_INST['ip']}:{_INST['port']}{pr}"
+            else:
+                proxy_base = f"https://{pid}-{declared}.proxy.runpod.net" if declared else ""
+                prompt_url = f"{proxy_base}{pr}" if proxy_base else ""
+            if prompt_url:
+                _log_status(f"PROMPT_ENDPOINT {prompt_url}")
+    # Always include cached readiness data for the UI
+    merged = {**last, "cachedState": {
+        "podId": _INST.get("podId"),
+        "status": _INST.get("status"),
+        "ip": _INST.get("ip"),
+        "port": _INST.get("port"),
+        "predictRoute": _INST.get("predictRoute"),
+    }}
+    return JSONResponse(merged)
+# ---------------------------------------------------------------------
+# Start, Stop, End All — same as before
+# ---------------------------------------------------------------------
+@router.post("/api/compute/pods/{pod_id}/start")
+def api_start_instance(pod_id:str):
+    _log_id("START_USES",pod_id)
+    try:
+        r=requests.post(f"{_RP_BASE}/pods/{pod_id}/start",
+                        headers=_rp_headers(),timeout=30)
+        payload=_as_json(r)
+        _log_status(f"START_RESPONSE {json.dumps(payload,ensure_ascii=False)}")
+        return JSONResponse(payload, r.status_code)
+    except Exception as e:
+        _log_status(f"ERROR Start: {e}")
+        return JSONResponse({"error":f"RunPod start failed: {e}"},500)
+@router.delete("/api/compute/delete_instance")
+async def api_delete_instance():
+    pid = (_INST.get("podId") or "").strip()
+    if not pid:
+        return JSONResponse({"error": "pod_id missing and no cached pod found."}, status_code=400)
+    _log_id("STOP_USES", pid)
+    try:
+        _log_delete(">>> STOP endpoint triggered")
+        r = requests.post(f"{_RP_BASE}/pods/{pid}/stop", headers=_rp_headers(), timeout=60)
+        payload = _as_json(r)
+        _log_delete(f"STOP_RESPONSE {json.dumps(payload, ensure_ascii=False)}")
+        return JSONResponse(status_code=r.status_code, content=payload)
+    except Exception as e:
+        _log_delete(f"ERROR Stop: {e}")
+        return JSONResponse(status_code=500, content={"error": f"RunPod stop failed: {e}"})
+@router.delete("/api/compute/end_all")
+async def api_end_all():
+    pid = (_INST.get("podId") or "").strip()
+    if not pid:
+        return JSONResponse({"error": "pod_id missing and no cached pod found."}, status_code=400)
+    _log_id("DELETE_USES", pid)
+    try:
+        _log_delete(">>> END-ALL endpoint triggered")
+        r = requests.delete(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=60)
+        payload = _as_json(r)
+        _log_delete(f"DELETE_RESPONSE {json.dumps(payload, ensure_ascii=False)}")
+        if r.status_code in (200, 202, 204):
+            _INST.update({"podId": "", "status": "", "ip": "", "port": ""})
+            _save_state()
+        return JSONResponse(status_code=r.status_code, content=payload)
+    except Exception as e:
+        _log_delete(f"ERROR Delete: {e}")
+        return JSONResponse(status_code=500, content={"error": f"RunPod delete failed: {e}"})
+# ---------------------------------------------------------------------
+# Wait instance
+# ---------------------------------------------------------------------
+@router.get("/api/compute/wait_instance")
+def api_wait_instance(pod_id: str = None):
+    pid = (pod_id or _INST.get("podId") or "").strip()
+    if not pid:
+        return JSONResponse({"error": "pod_id missing."}, status_code=400)
+    try:
+        r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=30)
+        last = _as_json(r)
+        _log_status(f"WAIT_STATUS {json.dumps(last, ensure_ascii=False)}")
+    except Exception as e:
+        return JSONResponse({"error": f"wait poll failed: {e}"}, status_code=502)
+    ip = last.get("publicIp") or _INST.get("ip")
+    pm = last.get("portMappings") or {}
+    port = None
+    declared = None
+    try:
+        c = (((_INST.get("blob") or {}).get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
+            or (_INST.get("blob") or {}).get("container") or {}
+        declared = int((c.get("ports") or [])[0].get("containerPort"))
+    except Exception:
+        c = {}
+        pass
+    if ip and pm:
+        try:
+            if isinstance(declared, int) and str(declared) in pm:
+                port = str(pm[str(declared)])
+        except Exception:
+            pass
+        if not port and "8080" in pm:
+            port = str(pm["8080"])
+        elif not port and pm:
+            port = str(pm[next(iter(pm.keys()))])
+    if ip and port:
+        base = f"http://{ip}:{port}"
+        _log_status(f"RESOLVED_IP {base}")
+        code, ms, snippet = _probe("GET", f"{base}/health")
+        _log_status(f"HEALTH code={code} ms={ms} body_snippet={snippet}")
+        # --- Vertex-mirrored route + final prompt URL log ---
+        pr_explicit = (c.get("predictRoute") or "").strip() if isinstance(c, dict) else ""
+        if pr_explicit:
+            _INST["predictRoute"] = pr_explicit
+            _log_status(f"PREDICT_ROUTE_SET {pr_explicit} (from blob)")
+        elif not _INST.get("predictRoute"):
+            _INST["predictRoute"] = "/predict"
+            _log_status("PREDICT_ROUTE_SET /predict (Vertex default)")
+        if not _INST.get("predictRoute"):
+            route = _discover_route(base)
+            if route:
+                _INST["predictRoute"] = route
+                _log_status(f"PREDICT_ROUTE_SET {route} (discovered)")
+            else:
+                _log_status("PREDICT_ROUTE_SET none")
+        pr = _INST.get("predictRoute") or "/predict"
+        prompt_url = f"{base}{pr}"
+        _log_status(f"PROMPT_ENDPOINT {prompt_url}")
+    try:
+        cspec = _get_container_spec()
+        internal, _ = _get_port_and_proto(cspec)
+        if internal:
+            proxy_base = f"https://{pid}-{internal}.proxy.runpod.net"
+            _log_status(f"RESOLVED_PROXY {proxy_base}")
+            _INST["base"] = proxy_base
+            _save_state()
+    except Exception:
+        pass
+    _INST.update({"ip": ip or "", "port": port or "", "status": last.get("desiredStatus", "")})
+    _save_state()
+    merged = {
+        **last,
+        "cachedState": {
+            "podId": _INST.get("podId"),
+            "status": _INST.get("status"),
+            "ip": _INST.get("ip"),
+            "port": _INST.get("port"),
+            "base": _INST.get("base"),
+            "predictRoute": _INST.get("predictRoute"),
+        },
     }
+    return JSONResponse(merged)
+# ---------------------------------------------------------------------
+# Debug: live probes against the instance (IP + Proxy)
+# ---------------------------------------------------------------------
+@router.get("/api/compute/debug/probes")
+def api_debug_probes(pod_id: str = None):
+    pid = (pod_id or _INST.get("podId") or "").strip()
+    if not pid:
+        return JSONResponse({"error": "pod_id missing."}, 400)
+    # latest pod object (for portMappings/publicIp)
+    try:
+        r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=20)
+        pod = _as_json(r)
+        _log_status(f"DEBUG_POD_OBJ {json.dumps(pod, ensure_ascii=False)}")
+    except Exception as e:
+        return JSONResponse({"error": f"pod fetch failed: {e}"}, 502)
+    ip = pod.get("publicIp") or _INST.get("ip")
+    pm = pod.get("portMappings") or {}
+    # choose internal/public ports
+    internal = None
+    try:
+        cs = _get_container_spec()
+        internal = int((cs.get("ports") or [])[0].get("containerPort"))
+    except Exception:
+        pass
+    if internal and str(internal) in pm:
+        public = str(pm[str(internal)])
+    elif "8080" in pm:
+        internal, public = 8080, str(pm["8080"])
+    elif pm:
+        k = next(iter(pm.keys()))
+        internal, public = int(k), str(pm[k])
+    else:
+        public = None
+    # candidate paths
+    healths = [(_INST.get("healthRoute") or "").strip(), "/health", "/ping", "/healthz", "/v1/models"]
+    healths = [p for p in healths if p]
+    predicts = [(_INST.get("predictRoute") or "").strip(), "/generate", "/predict", "/predictions",
+                "/v1/chat/completions", "/v1/models/model:predict"]
+    predicts = [p for p in predicts if p]
+    results = {"podId": pid, "ip": ip, "internalPort": internal, "publicPort": public, "probes": []}
+    # base URLs (IP and proxy)
+    bases = []
+    if ip and public:
+        bases.append(f"http://{ip}:{public}")
+    if internal:
+        bases.append(f"https://{pid}-{internal}.proxy.runpod.net")
+    # probe health
+    for base in bases:
+        for hp in healths:
+            code, ms, snippet = _probe("GET", f"{base}{hp}")
+            _log_status(f"DEBUG_HEALTH base={base} path={hp} code={code} ms={ms}")
+            results["probes"].append({"base": base, "path": hp, "kind": "health", "code": code, "ms": ms, "snippet": snippet})
+    # probe predict (HEAD)
+    for base in bases:
+        for pp in predicts:
+            code, ms, _ = _probe("HEAD", f"{base}{pp}")
+            _log_status(f"DEBUG_PREDICT base={base} path={pp} code={code} ms={ms}")
+            results["probes"].append({"base": base, "path": pp, "kind": "predict", "code": code, "ms": ms})
+    return JSONResponse(results, 200)
+# ---------------------------------------------------------------------
+# Helper functions for containerSpec parsing
+# ---------------------------------------------------------------------
+def _get_container_spec():
+    blob = _INST.get("blob")
+    if not blob:
+        lb = _load_local_blob()
+        if lb:
+            _ingest_blob(lb)
+            blob = lb
+    return (((blob.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec")
+            or blob.get("container") or {})
+def _get_port_and_proto(cspec: dict):
+    try:
+        ports = cspec.get("ports") or []
+        if isinstance(ports, list) and ports:
+            p0 = ports[0]
+            internal = p0.get("containerPort")
+            proto = (p0.get("protocol") or "").lower() or None
+            return (int(internal) if str(internal).isdigit() else None, proto)
+    except Exception:
+        pass
+    return (None, None)
+def _build_proxy_url(route: str) -> str:
+    pid = (_INST.get("podId") or "").strip()
+    if not pid:
+        raise HTTPException(status_code=400, detail="No podId in cache. Create/Start the instance first.")
+    cspec = _get_container_spec()
+    internal_port, _ = _get_port_and_proto(cspec)
+    if not internal_port:
+        raise HTTPException(status_code=400, detail="Cannot resolve internal port from containerSpec.ports[].")
+    return f"https://{pid}-{internal_port}.proxy.runpod.net{route}"
+def _build_ip_url(route: str) -> str:
+    ip, port = _INST.get("ip"), _INST.get("port")
+    if not (ip and port):
+        raise HTTPException(status_code=400, detail="No running instance (ip/port missing).")
+    return f"http://{ip}:{port}{route}"
+def _resolve_infer_url(route: str) -> str:
+    cspec = _get_container_spec()
+    _, proto = _get_port_and_proto(cspec)
+    try:
+        if _INST.get("ip") and _INST.get("port"):
+            url = _build_ip_url(route)
+            _job_log("compute", f"[MW] Using IP path: {url}")
+            return url
+    except HTTPException:
+        pass
+    if proto == "http" or True:
+        url = _build_proxy_url(route)
+        _job_log("compute", f"[MW] Using Proxy path: {url}")
+        return url
+    return _build_ip_url(route)
+# ---------------------------------------------------------------------
+# /api/infer — updated to use resolver
+# ---------------------------------------------------------------------
+@router.post("/api/infer")
+async def api_infer(req: Request):
+    route = _INST.get("predictRoute")
+    if not route:
+        return JSONResponse(
+            {"error": "predictRoute unresolved; check ROUTE_PROBE logs and HEALTH results."},
+            status_code=428
+        )
+    body = await req.json()
+    try:
+        url = _resolve_infer_url(route)
+        r = requests.post(url, json=body, timeout=120)
+        ct = (r.headers.get("content-type") or "").lower()
+        if "application/json" in ct:
+            return JSONResponse(status_code=r.status_code, content=r.json())
+        return HTMLResponse(status_code=r.status_code, content=r.text)
+    except HTTPException as he:
+        return JSONResponse({"error": he.detail}, status_code=he.status_code)
+    except Exception as e:
+        return JSONResponse({"error": f"inference request failed: {e}"}, status_code=502)
+# ---------------------------------------------------------------------
+# /api/middleware/infer — middleware prompt routing and normalization
+# ---------------------------------------------------------------------
+@router.post("/api/middleware/infer")
+async def api_middleware_infer(req: Request):
+    route = _INST.get("predictRoute")
+    if not route:
+        return JSONResponse(
+            {"error": "predictRoute unresolved; check ROUTE_PROBE logs and HEALTH results."},
+            status_code=428
+        )
+    payload = await req.json()
+    prompt = payload.get("prompt")
+    if not isinstance(prompt, str) or not prompt.strip():
+        return JSONResponse({"error": "Missing 'prompt' in request body."}, status_code=400)
+    # HF text-classification shim: wrap into Vertex-style instances
+    img = (_get_container_spec().get("imageUri","")).lower()
+    if "huggingface-pytorch-inference" in img and isinstance(payload.get("prompt"), str):
+        payload = {"instances": [payload["prompt"]]}
+    try:
+        # Prefer proxy base; fall back to direct IP if proxy not cached
+        pid = (_INST.get("podId") or "").strip()
+        proxy_base = None
+        try:
+            cspec = _get_container_spec()
+            internal, _ = _get_port_and_proto(cspec)
+            if pid and internal:
+                proxy_base = f"https://{pid}-{internal}.proxy.runpod.net"
+                _log_status(f"PROMPT_BASE proxy={proxy_base}")
+        except Exception:
+            pass
+        if not proxy_base:
+            ip, port = _INST.get("ip"), _INST.get("port")
+            if ip and port:
+                proxy_base = f"http://{ip}:{port}"
+                _log_status(f"PROMPT_BASE direct={proxy_base}")
+        if not proxy_base:
+            return JSONResponse({"error": "instance not ready (no base URL)"}, status_code=409)
+        url = f"{proxy_base}{route}"
+        _log_status(f"PROMPT_ENDPOINT {url}")
+        _job_log("compute", f"[MW] Forwarding infer to {url}")
+        # Try multiple prompt body formats until success
+        bodies = [
+            payload,
+            {"prompt": prompt},
+            {"text": prompt},
+            {"inputs": prompt},
+            {"input": prompt},
+        ]
+        rp, data = None, None
+        for body in bodies:
+            try:
+                rp = requests.post(url, json=body, timeout=120)
+                _log_status(f"PREDICT_RESP code={rp.status_code} len={len(rp.text)}")
+                if rp.ok:
+                    break
+            except Exception as e:
+                _log_status(f"PREDICT_ERR {e}")
+        if not rp:
+            return JSONResponse({"error": "no response from model"}, status_code=504)
+        ct = (rp.headers.get("content-type") or "").lower()
+        data = _as_json(rp) if "application/json" in ct else {"_raw": rp.text}
+        if isinstance(data, dict):
+            if "image_b64" in data:
+                return JSONResponse({"image_b64": data["image_b64"], "timings": data.get("timings")}, status_code=rp.status_code)
+            if isinstance(data.get("output"), str):
+                return JSONResponse({"output": data["output"]}, status_code=rp.status_code)
+            if "_raw" in data:
+                return JSONResponse({"output": data["_raw"]}, status_code=rp.status_code)
+            return JSONResponse({"output": json.dumps(data, ensure_ascii=False)}, status_code=rp.status_code)
+        return JSONResponse({"output": str(data)}, status_code=rp.status_code)
+    except HTTPException as he:
+        _job_log("compute", f"[MW] ERROR {he.status_code}: {he.detail}")
+        return JSONResponse({"error": he.detail}, status_code=he.status_code)
+    except Exception as e:
+        _job_log("compute", f"[MW] ERROR infer: {e}")
+        return JSONResponse({"error": f"middleware infer failed: {e}"}, status_code=502)
+# ---------------------------------------------------------------------
+# Job progress + callback routes
+# ---------------------------------------------------------------------
+@router.post("/api/job/ready")
+async def api_job_ready(req: Request):
+    return JSONResponse({"ok": True})
+@router.post("/api/job/progress")
+async def api_job_progress(req: Request):
+    data = await req.json()
+    job_id = str(data.get("job_id", "unknown"))
+    msg = data.get("message", "")
+    _job_log(job_id, msg or "progress")
+    return JSONResponse({"ok": True})
+@router.post("/api/job/done")
+async def api_job_done(req: Request):
+    data = await req.json()
+    job_id = str(data.get("job_id", "unknown"))
+    j = _JOBS.setdefault(job_id, {"status": "created", "logs": [], "image_b64": None, "timings": {}})
+    j["status"] = "done"
+    j["image_b64"] = data.get("image_b64")
+    j["timings"] = data.get("timings", {})
+    _job_log(job_id, "completed")
+    return JSONResponse({"ok": True})
+@router.get("/api/job/status")
+def api_job_status(job_id: str):
+    return JSONResponse(_JOBS.get(job_id, {"status": "unknown"}))