Theflame47 commited on
Commit
9dd3a51
·
verified ·
1 Parent(s): e1eefef

Update Deployment_UI_BE.py

Browse files
Files changed (1) hide show
  1. Deployment_UI_BE.py +838 -239
Deployment_UI_BE.py CHANGED
@@ -1,244 +1,843 @@
1
- # Deployment_UI.py UI-only (binds to /api/* provided by Deployment_UI_BE.py)
2
- from fastapi import APIRouter
3
- from fastapi.responses import HTMLResponse
4
 
5
  router = APIRouter()
6
 
7
- @router.get("/Deployment_UI", response_class=HTMLResponse)
8
- def deployment_ui_page():
9
- html_head = """
10
- <!doctype html>
11
- <html><head><meta charset="utf-8"/><title>Deployment UI</title>
12
- <style>
13
- body { font-family: system-ui, sans-serif; margin: 24px; display: flex; flex-direction: column; height: 90vh; }
14
- a.button { display:inline-block; padding:8px 14px; margin-bottom:10px; border:1px solid #ccc; border-radius:6px; text-decoration:none; color:#000; background:#f7f7f7; }
15
- a.button:hover { background:#eee; }
16
- #chat-window { flex:1; border:1px solid #ccc; border-radius:8px; padding:12px; overflow-y:auto; background:#fafafa; margin-bottom:10px; }
17
- #input-area { display:flex; gap:8px; margin-bottom:10px; }
18
- #user-input { flex:1; padding:10px; border:1px solid #ccc; border-radius:8px; }
19
- #send-btn { padding:10px 16px; border:none; border-radius:8px; background:#0078d7; color:#fff; cursor:pointer; }
20
- #send-btn:hover { background:#005fa3; }
21
- #control-bar { display:flex; gap:8px; margin:0 0 10px; }
22
- #start-btn, #stop-btn, #endall-btn { padding:8px 14px; border:1px solid #ccc; border-radius:8px; background:#f7f7f7; cursor:pointer; }
23
- #start-btn:hover, #stop-btn:hover, #endall-btn:hover { background:#eee; }
24
- #log-toggle { cursor:pointer; font-size:14px; color:#0078d7; text-decoration:underline; margin-bottom:6px; align-self:flex-start; }
25
- #logs { border:1px dashed #bbb; border-radius:8px; padding:10px; font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace; font-size:12px; background:#fff; max-height:220px; overflow-y:auto; transition:max-height .25s ease, opacity .25s ease; opacity:1; }
26
- #logs.collapsed { max-height:0; padding:0; opacity:0; overflow:hidden; border:none; }
27
- .log-line { margin:0 0 6px; white-space:pre-wrap; }
28
- .log-raw { color:#333; } .log-ok { color:#2d7c2d; } .log-err { color:#9d1c1c; }
29
- #results { border:1px solid #ddd; border-radius:8px; padding:10px; background:#fff; margin-top:10px; }
30
- .result { margin:8px 0; }
31
- .thumb { max-width: 420px; border:1px solid #ccc; border-radius:6px; }
32
- .meta { font-size:12px; color:#555; margin-top:4px; }
33
- .download { display:inline-block; margin-top:6px; }
34
- /* loading mask */
35
- #boot-mask{position:fixed;inset:0;background:rgba(255,255,255,.85);display:none;
36
- align-items:center;justify-content:center;flex-direction:column;z-index:9999}
37
- #boot-msg{margin-top:12px;color:#000;font-weight:600}
38
- .spinner{width:36px;height:36px;border:3px solid #ddd;border-top-color:#0078d7;border-radius:50%;
39
- animation:spin 1s linear infinite}
40
- @keyframes spin{to{transform:rotate(360deg)}}
41
- </style>
42
- </head>
43
- <body>
44
- <a href="/trythis" class="button">← Back</a>
45
- <div id="chat-window"></div>
46
- <div id="input-area">
47
- <input type="text" id="user-input" placeholder="Describe an image to generate…" />
48
- <button id="send-btn">Send</button>
49
- </div>
50
- <div id="control-bar">
51
- <button id="start-btn">Create inst.</button>
52
- <button id="stop-btn">Stop</button>
53
- <button id="endall-btn">End All</button>
54
- </div>
55
- <div id="log-toggle">Hide Logs</div>
56
- <div id="logs" aria-live="polite"></div>
57
- <div id="results"></div>
58
- <div id="boot-mask" aria-live="polite" role="status">
59
- <div class="spinner"></div>
60
- <div id="boot-msg">Starting…</div>
61
- </div>
62
- <script>
63
- """
64
- html_tail = """
65
- const logs = document.getElementById('logs');
66
- const toggleBtn = document.getElementById('log-toggle');
67
- const startBtn = document.getElementById('start-btn');
68
- const stopBtn = document.getElementById('stop-btn');
69
- const endAllBtn = document.getElementById('endall-btn');
70
- const sendBtn = document.getElementById('send-btn');
71
- const userInput = document.getElementById('user-input');
72
- const results = document.getElementById('results');
73
- const chat = document.getElementById('chat-window');
74
- const bootMask = document.getElementById('boot-mask');
75
- const bootMsg = document.getElementById('boot-msg');
76
- let running = false, ready = false;
77
- let MODEL_BASE = null, PREDICT_ROUTE = null;
78
-
79
- // NEW: auto-ingest blob from FE query param on page load
80
- (async () => {
81
- try {
82
- const u = new URL(window.location.href);
83
- const blobUrl = u.searchParams.get('blob_url') || '/modelblob.json';
84
- const r = await fetch(`/api/ingest/from_landing?blob_url=${encodeURIComponent(blobUrl)}`, { method: 'POST' });
85
- const t = await r.text();
86
- console.log('ingest-from-landing', r.status, t.slice(0,200));
87
- } catch (e) {
88
- console.warn('ingest bootstrap failed:', e);
89
- }
90
- })();
91
-
92
- function showBoot(msg){ bootMsg.textContent = msg || 'Starting…'; bootMask.style.display = 'flex'; }
93
- function setBoot(msg){ bootMsg.textContent = msg || 'Working…'; }
94
- function hideBoot(){ bootMask.style.display = 'none'; }
95
- function log(msg, cls='log-raw') {
96
- const line = document.createElement('div');
97
- line.className = 'log-line ' + cls;
98
- line.textContent = String(msg);
99
- logs.appendChild(line);
100
- logs.scrollTop = logs.scrollHeight;
101
- }
102
- toggleBtn.addEventListener('click', () => {
103
- const collapsed = logs.classList.toggle('collapsed');
104
- toggleBtn.textContent = collapsed ? "Show Logs" : "Hide Logs";
105
- });
106
- async function post(path, payload) {
107
- const r = await fetch(path, { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify(payload || {}) });
108
- const text = await r.text();
109
- log(text, r.ok ? 'log-ok' : 'log-err');
110
- try { return { ok: r.ok, json: JSON.parse(text) }; } catch { return { ok: r.ok, json: { _raw: text } }; }
111
- }
112
- async function del(path) {
113
- const r = await fetch(path, { method: 'DELETE' });
114
- const text = await r.text();
115
- log(text, r.ok ? 'log-ok' : 'log-err');
116
- try { return { ok: r.ok, json: JSON.parse(text) }; } catch { return { ok: r.ok, json: { _raw: text } }; }
117
- }
118
- // create + wait for true readiness (isReady)
119
- async function begin() {
120
- if (running) return;
121
- running = true; ready = false;
122
- showBoot('Creating instance…');
123
- const create = await post('/api/compute/create_instance');
124
- if (!create.ok) { running = false; hideBoot(); return; }
125
- const ok = await ensureReady(true);
126
- hideBoot();
127
- }
128
- async function stopOnce() {
129
- await del('/api/compute/delete_instance');
130
- hideBoot(); setBoot(''); ready = false; running = false;
131
- }
132
- async function endAll() {
133
- await stopOnce();
134
- MODEL_BASE = null; PREDICT_ROUTE = null;
135
- }
136
- function appendResult(job_id, b64, timings) {
137
- const div = document.createElement('div'); div.className = 'result';
138
- const img = document.createElement('img'); img.className = 'thumb'; img.src = 'data:image/png;base64,' + b64;
139
- const a = document.createElement('a'); a.className = 'download'; a.textContent = 'Download'; a.href = img.src; a.download = `job_${job_id}.png`;
140
- const meta = document.createElement('div'); meta.className = 'meta'; meta.textContent = `job ${job_id} | ${JSON.stringify(timings || {})}`;
141
- div.append(img, document.createTextNode(' '), a, meta); results.prepend(div);
142
- }
143
- // chat helpers
144
- function escapeHtml(s){ return String(s).replace(/[&<>"']/g, m => ({ '&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;' }[m])); }
145
- function addBlock(sender, html) {
146
- const wrap = document.createElement('div');
147
- wrap.style.margin = '8px 0';
148
- wrap.innerHTML = `<div class="meta"><strong>${sender}:</strong></div><div>${html}</div>`;
149
- chat.appendChild(wrap);
150
- chat.scrollTop = chat.scrollHeight;
151
- return wrap;
152
- }
153
- function addUser(text) { return addBlock('You', `<div>${escapeHtml(text)}</div>`); }
154
- function addModel(text) { return addBlock('Model', `<div>${escapeHtml(text)}</div>`); }
155
- function addModelImg(b64){
156
- const wrap = addBlock('Model', '');
157
- const img = document.createElement('img');
158
- img.className = 'thumb';
159
- img.src = 'data:image/png;base64,' + b64;
160
- wrap.lastElementChild.appendChild(img);
161
- return wrap;
162
- }
163
- function addLoader() {
164
- const wrap = addBlock('Model', `<div id="spinner" style="display:inline-block">Loading…</div>`);
165
- return wrap;
166
- }
167
- function looksBase64(s){ return /^[A-Za-z0-9+/=\\s]+$/.test(s||'') && String(s||'').length > 100; }
168
- // poll for true readiness provided by BE via cachedState.isReady
169
- async function ensureReady(verbose=false) {
170
- for (let i = 0; i < 60; i++) {
171
- const r = await fetch('/api/compute/wait_instance');
172
- const j = await r.json();
173
- const cs = j.cachedState || {};
174
- const status = (cs.status || '').toUpperCase();
175
- if (verbose) {
176
- if (cs.isReady === true) setBoot('Ready');
177
- else if (status === 'RUNNING' && cs.base) setBoot('Warming model…');
178
- else setBoot(`Status: ${status || '…'}`);
179
- }
180
- if (cs.base && cs.predictRoute && cs.isReady === true) {
181
- MODEL_BASE = cs.base;
182
- PREDICT_ROUTE = cs.predictRoute.startsWith('/') ? cs.predictRoute : `/${cs.predictRoute}`;
183
- log(`PROMPT_ENDPOINT ${MODEL_BASE}${PREDICT_ROUTE}`, 'log-ok');
184
- ready = true;
185
- return true;
186
- }
187
- log(`READY_POLL base=${cs.base ? 'yes' : 'no'} route=${cs.predictRoute || ''} isReady=${cs.isReady === true} status=${status}`, 'log-raw');
188
- await new Promise(res => setTimeout(res, 1000));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
- ready = false;
191
- return false;
192
- }
193
- // backend hop for prompts
194
- async function sendViaBackend(prompt) {
195
- const r = await fetch('/api/middleware/infer', {
196
- method: 'POST',
197
- headers: { 'content-type': 'application/json' },
198
- body: JSON.stringify({ prompt })
199
- });
200
- const text = await r.text();
201
- log(`POST /api/middleware/infer ${r.status}`, r.ok ? 'log-ok' : 'log-err');
202
- try { return { ok: r.ok, json: JSON.parse(text) }; } catch { return { ok: r.ok, json: { _raw: text } }; }
203
- }
204
- async function sendMessage() {
205
- const prompt = (userInput.value || '').trim();
206
- if (!prompt) return;
207
- if (!ready) { addModel('Instance not ready yet. Try Start, or wait a moment.'); return; }
208
- addUser(prompt);
209
- const loader = addLoader();
210
- userInput.disabled = true;
211
- try {
212
- const { ok, json } = await sendViaBackend(prompt);
213
- loader.remove();
214
- if (!ok && json?.error) { addModel(`Error: ${json.error}`); return; }
215
- if (json?.image_b64) {
216
- addModelImg(json.image_b64);
217
- if (json.timings) appendResult(String(Date.now()), json.image_b64, json.timings);
218
- } else if (typeof json?.output === 'string' && looksBase64(json.output)) {
219
- addModelImg(json.output);
220
- } else if (typeof json?.output === 'string') {
221
- addModel(json.output);
222
- } else if (json?._raw) {
223
- addModel(json._raw);
224
- } else {
225
- addModel(JSON.stringify(json || {}, null, 2));
226
- }
227
- } catch (e) {
228
- loader.remove();
229
- addModel(`Error: ${String(e)}`);
230
- } finally {
231
- userInput.disabled = false; userInput.value = '';
232
- userInput.focus();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  }
234
- }
235
- document.getElementById('send-btn').addEventListener('click', sendMessage);
236
- document.getElementById('user-input').addEventListener('keypress', (e) => { if (e.key === 'Enter') { e.preventDefault(); sendMessage(); } });
237
- document.getElementById('start-btn').addEventListener('click', begin);
238
- document.getElementById('stop-btn').addEventListener('click', stopOnce);
239
- document.getElementById('endall-btn').addEventListener('click', endAll);
240
- (function init(){})();
241
- </script>
242
- </body></html>
243
- """
244
- return HTMLResponse(content=html_head + html_tail)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Request, Form
2
+ from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
3
+ import os, json, requests, time, re, pathlib
4
 
5
  router = APIRouter()
6
 
7
+ # ---------------------------------------------------------------------
8
+ # In-memory job + instance store
9
+ # ---------------------------------------------------------------------
10
+ _JOBS = {}
11
+ _INST = {
12
+ "podId": "", "status": "", "ip": "", "port": "",
13
+ "blob": None, "model_id": "", "container_image_hint": "",
14
+ "predictRoute": None, "healthRoute": None,
15
+ "readinessRoute": None, "livenessRoute": None,
16
+ }
17
+
18
+ def _now_ms(): return int(time.time() * 1000)
19
+
20
+ def _job_log(job_id, msg):
21
+ j = _JOBS.setdefault(job_id, {"status":"created","logs":[],
22
+ "image_b64":None,"timings":{}})
23
+ j["logs"].append({"t":_now_ms(),"msg":msg})
24
+ print(f"[{job_id}] {msg}", flush=True)
25
+
26
+ def _log_create(msg): _job_log("compute", f"[CREATE] {msg}")
27
+ def _log_status(msg): _job_log("compute", f"[STATUS] {msg}")
28
+ def _log_delete(msg): _job_log("compute", f"[DELETE] {msg}")
29
+ def _log_id(prefix, pid): _job_log("compute", f"{prefix} ID: {pid}")
30
+
31
+ # --- local blob ingest (landing page only) ---
32
+ _LOCAL_BLOB_PATH = os.getenv("MODEL_BLOB_PATH", "model_blob.json")
33
+
34
+ def _load_local_blob():
35
+ try:
36
+ if os.path.exists(_LOCAL_BLOB_PATH):
37
+ with open(_LOCAL_BLOB_PATH, "r", encoding="utf-8") as f:
38
+ return json.load(f)
39
+ except Exception as e:
40
+ _job_log("compute", f"ERROR LocalBlobLoad: {e}")
41
+ return None
42
+
43
+ def _ingest_blob(parsed: dict, model_id_hint: str = "", container_image_hint: str = ""):
44
+ if not isinstance(parsed, dict):
45
+ raise HTTPException(400, "Invalid blob (expected JSON object).")
46
+
47
+ _INST.update({
48
+ "blob": parsed,
49
+ "model_id": model_id_hint or "",
50
+ "container_image_hint": container_image_hint or "",
51
+ })
52
+
53
+ c = (((parsed.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec")
54
+ or parsed.get("container") or {}) or {}
55
+
56
+ for k in ("predictRoute", "healthRoute", "readinessRoute", "livenessRoute"):
57
+ v = c.get(k)
58
+ if isinstance(v, str) and v.strip():
59
+ _INST[k] = v.strip()
60
+
61
+ image_uri = (c.get("imageUri") or "").strip().lower()
62
+ pr, hr = _infer_routes_from_image(image_uri)
63
+ if pr and not _INST.get("predictRoute"):
64
+ _INST["predictRoute"] = pr
65
+ if hr and not _INST.get("healthRoute"):
66
+ _INST["healthRoute"] = hr
67
+
68
+ return True
69
+
70
+ # ---------------------------------------------------------------------
71
+ # Disk persistence for recovery
72
+ # ---------------------------------------------------------------------
73
+ _STATE_PATH = "/tmp/pod_state.json"
74
+
75
+ def _save_state():
76
+ try:
77
+ pathlib.Path("/tmp").mkdir(parents=True, exist_ok=True)
78
+ with open(_STATE_PATH, "w") as f:
79
+ json.dump({k:_INST.get(k,"") for k in
80
+ ("podId","status","ip","port")}, f)
81
+ except Exception as e:
82
+ _job_log("compute", f"ERROR SaveState: {e}")
83
+
84
+ def _load_state():
85
+ try:
86
+ if os.path.exists(_STATE_PATH):
87
+ with open(_STATE_PATH) as f: d = json.load(f)
88
+ for k in ("podId","status","ip","port"):
89
+ if k in d: _INST[k]=d[k]
90
+ except Exception as e:
91
+ _job_log("compute", f"ERROR LoadState: {e}")
92
+
93
+ # ---------------------------------------------------------------------
94
+ # RunPod helpers
95
+ # ---------------------------------------------------------------------
96
+ _RP_BASE = "https://rest.runpod.io/v1"
97
+
98
+ def _rp_headers():
99
+ key=os.getenv("RunPod","").strip()
100
+ if not key:
101
+ raise HTTPException(500,"Missing RunPod API key (env var 'RunPod').")
102
+ return {"Authorization":f"Bearer {key}","Content-Type":"application/json"}
103
+
104
+ def _as_json(r):
105
+ c=(r.headers.get("content-type") or "").lower()
106
+ if "json" in c:
107
+ try: return r.json()
108
+ except Exception: return {"_raw":r.text}
109
+ return {"_raw":r.text}
110
+
111
+ # ---------------------------------------------------------------------
112
+ # Probes and route discovery (new)
113
+ # ---------------------------------------------------------------------
114
+ def _probe(method: str, url: str, timeout=5):
115
+ t0 = time.time()
116
+ try:
117
+ resp = requests.request(method, url, timeout=timeout)
118
+ ms = int((time.time() - t0) * 1000)
119
+ return resp.status_code, ms, (resp.text[:200] if resp.text else "")
120
+ except Exception as e:
121
+ return -1, int((time.time() - t0) * 1000), str(e)
122
+
123
+ _HEALTH_PATHS = ["/health", "/ping", "/healthz", "/v1/models"]
124
+ _POSSIBLE_ROUTES = ["/generate", "/predict", "/predictions",
125
+ "/v1/chat/completions", "/v1/models/model:predict"]
126
+
127
+ def _infer_routes_from_image(image_uri: str):
128
+ iu = (image_uri or "").lower()
129
+ if "vllm-serve" in iu:
130
+ return ("/generate", "/ping")
131
+ if "hf-inference-toolkit" in iu or "huggingface-pytorch-inference" in iu:
132
+ return ("/predict", "/ping")
133
+ return (None, None)
134
+
135
+ def _discover_route(base_url: str):
136
+ for path in _POSSIBLE_ROUTES:
137
+ code, ms, _ = _probe("HEAD", f"{base_url}{path}")
138
+ _log_status(f"ROUTE_PROBE path={path} code={code} ms={ms}")
139
+ if code in (200, 204, 405): # exists (405 = method not allowed but present)
140
+ return path
141
+ return None
142
+
143
+ # ---------------------------------------------------------------------
144
+ # Blob ingest via Model Blob page JSON (with blob_url override)
145
+ # ---------------------------------------------------------------------
146
+ _HF_SPACE_PORT = os.getenv("PORT", "7860")
147
+ _LOCAL_BASE = f"http://127.0.0.1:{_HF_SPACE_PORT}"
148
+
149
+ def _fetch_url(u: str):
150
+ try:
151
+ r = requests.get(u, timeout=8)
152
+ if r.ok:
153
+ return r.json()
154
+ _job_log("compute", f"ERROR BlobFetch code={r.status_code} url={u} body={r.text[:200]}")
155
+ except Exception as e:
156
+ _job_log("compute", f"ERROR BlobFetch url={u}: {e}")
157
+ return None
158
+
159
+ def _fetch_blob_from_page():
160
+ return _fetch_url(f"{_LOCAL_BASE}/modelblob.json")
161
+
162
+ @router.post("/api/ingest/from_landing")
163
+ def api_ingest_from_landing(blob_url: str | None = None):
164
+ """
165
+ Ingest the deployment blob for downstream use.
166
+ Source priority:
167
+ 1) explicit blob_url provided by FE (e.g., /modelblob.json)
168
+ 2) local modelblob page JSON outlet
169
+ """
170
+ parsed = _fetch_url(blob_url) if blob_url else _fetch_blob_from_page()
171
+ if not parsed:
172
+ return JSONResponse({"error": "Blob not available"}, 404)
173
+ _ingest_blob(parsed, model_id_hint="", container_image_hint="")
174
+ return JSONResponse({"ok": True, "source": blob_url or "/modelblob.json"})
175
+
176
+ # (Optional compatibility: UI posting to /Deployment_UI; accepts blob_url via query)
177
+ @router.post("/Deployment_UI")
178
+ async def deployment_ui_ingest(request: Request,
179
+ model_id: str = Form(""),
180
+ container_image: str = Form(""),
181
+ blob: str = Form("")):
182
+ """
183
+ Legacy entry used by the Deployment UI page.
184
+ Prefers blob_url from query string; falls back to the modelblob page JSON.
185
+ """
186
+ blob_url = request.query_params.get("blob_url")
187
+ parsed = _fetch_url(blob_url) if blob_url else _fetch_blob_from_page()
188
+ if not parsed:
189
+ return HTMLResponse("<pre>Missing blob (no /modelblob.json and no blob_url)</pre>", 400)
190
+ _ingest_blob(parsed, model_id_hint=model_id, container_image_hint=container_image)
191
+ return RedirectResponse("/Deployment_UI", 303)
192
+
193
+ # ---------------------------------------------------------------------
194
+ # Create instance
195
+ # ---------------------------------------------------------------------
196
+ @router.post("/api/compute/create_instance")
197
+ async def api_create_instance(req: Request):
198
+ # Ensure blob is present (lazy-load from landing file if needed)
199
+ if not _INST.get("blob"):
200
+ lb = _load_local_blob()
201
+ if lb:
202
+ _ingest_blob(lb)
203
+ blob = _INST.get("blob")
204
+ if not blob:
205
+ return JSONResponse({"error": "No deployment blob provided."}, 400)
206
+
207
+ c = ((blob.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
208
+ or blob.get("container")
209
+ if not isinstance(c, dict) or not c:
210
+ return JSONResponse({"error": "Blob missing containerSpec."}, 400)
211
+
212
+ image = (c.get("imageUri") or "").strip()
213
+ if not image:
214
+ return JSONResponse({"error": "containerSpec.imageUri missing."}, 400)
215
+ _log_create(f"imageName: {image}")
216
+
217
+ env_list = c.get("env") or []
218
+ env_obj = {e.get("name"): e.get("value") for e in env_list
219
+ if isinstance(e, dict) and e.get("name")}
220
+ _log_create(f"env: {json.dumps(env_obj, ensure_ascii=False)}")
221
+
222
+ ports_list = c.get("ports") or []
223
+ rp_ports = []
224
+ for p in ports_list:
225
+ if isinstance(p, dict):
226
+ cp = p.get("containerPort")
227
+ proto = (p.get("protocol") or "http").lower()
228
+ if proto not in ("http", "tcp"):
229
+ proto = "http"
230
+ if isinstance(cp, int):
231
+ rp_ports.append(f"{cp}/{proto}")
232
+ if not rp_ports:
233
+ return JSONResponse({"error": "ports[].containerPort required."}, 400)
234
+ _log_create(f"ports: {rp_ports}")
235
+
236
+ command = c.get("command") if isinstance(c.get("command"), list) else None
237
+ args = c.get("args") if isinstance(c.get("args"), list) else None
238
+ if command: _log_create(f"command: {command}")
239
+ if args: _log_create(f"args: {args}")
240
+
241
+ # GPU normalization (enum -> pretty string); include only if non-empty
242
+ dr = c.get("dedicatedResources") or {}
243
+ gpu_ids = None
244
+ gpu_count = 1
245
+ if isinstance(dr, dict):
246
+ typ = (dr.get("machineSpec", {}) or {}).get("acceleratorType")
247
+ cnt = (dr.get("machineSpec", {}) or {}).get("acceleratorCount")
248
+ if typ: gpu_ids = [typ] if isinstance(typ, str) else typ
249
+ if isinstance(cnt, int) and cnt > 0: gpu_count = cnt
250
+
251
+ def _normalize_gpu_enum(s: str) -> str:
252
+ if not isinstance(s, str) or not s.strip():
253
+ return ""
254
+ t = s.strip().upper().replace("_", " ")
255
+ vendor = "NVIDIA"
256
+ if t.startswith("NVIDIA "):
257
+ t = t[len("NVIDIA "):]
258
+ elif t.startswith("AMD "):
259
+ vendor = "AMD"; t = t[len("AMD "):]
260
+ t = re.sub(r"(\d)(GB\b)", r"\1 \2", t) # 80GB -> 80 GB
261
+ return f"{vendor} {t}".strip()
262
+
263
+ rp_gpu = None
264
+ if gpu_ids:
265
+ rp_gpu = _normalize_gpu_enum(gpu_ids[0]).strip() or None
266
+ _log_create(f"GPU_TRANSLATION original={gpu_ids[0]} -> runpod='{rp_gpu}'")
267
+
268
+ _log_create("SECURE_PLACEMENT interruptible=false")
269
+
270
+ payload = {
271
+ "name": re.sub(r"[^a-z0-9-]", "-", f"ephemeral-{int(time.time())}".lower()),
272
+ "computeType": "GPU",
273
+ "interruptible": False, # On-Demand (not community)
274
+ "imageName": image,
275
+ "gpuCount": gpu_count,
276
+ "ports": rp_ports,
277
+ "supportPublicIp": True,
278
+ **({"gpuTypeIds": [rp_gpu]} if rp_gpu else {}),
279
+ **({"env": env_obj} if env_obj else {}),
280
+ **({"command": command} if command else {}),
281
+ **({"args": args} if args else {}),
282
  }
283
+
284
+ _log_create(f"PAYLOAD_SENT {json.dumps(payload, ensure_ascii=False)}")
285
+
286
+ content = {}
287
+ pid = None
288
+ try:
289
+ r = requests.post(f"{_RP_BASE}/pods", headers=_rp_headers(), json=payload, timeout=60)
290
+ content = _as_json(r)
291
+ _log_create(f"RUNPOD_RESPONSE {json.dumps(content, ensure_ascii=False)}")
292
+ pid = content.get("id")
293
+ if not pid and isinstance(content, dict):
294
+ for v in content.values():
295
+ if isinstance(v, dict) and "id" in v:
296
+ pid = v["id"]; break
297
+ except Exception as e:
298
+ _log_create(f"ERROR Create: {e}")
299
+ return JSONResponse({"error": f"RunPod create failed: {e}"}, 500)
300
+
301
+ _log_create(f"ID: {pid}")
302
+
303
+ if not isinstance(r, requests.Response):
304
+ return JSONResponse({"error": "No HTTP response from RunPod create."}, 502)
305
+ if not r.ok:
306
+ return JSONResponse(content if isinstance(content, dict) else {"_raw": str(content)}, r.status_code)
307
+ if not pid:
308
+ return JSONResponse({"error": "Create succeeded but no pod ID in response.", "raw": content}, 502)
309
+
310
+ # cache pod id
311
+ try:
312
+ _INST["podId"] = str(pid).strip()
313
+ _log_id("CREATE_SET", _INST["podId"])
314
+ _save_state()
315
+ except Exception as e:
316
+ return JSONResponse({"error": f"Could not cache pod ID: {e}"}, 502)
317
+
318
+ # start the pod immediately so networking/IP can come up
319
+ try:
320
+ sr = requests.post(f"{_RP_BASE}/pods/{_INST['podId']}/start", headers=_rp_headers(), timeout=30)
321
+ scontent = _as_json(sr)
322
+ _log_status(f"START_RESPONSE {json.dumps(scontent, ensure_ascii=False)}")
323
+ except Exception as e:
324
+ _log_status(f"ERROR Start: {e}")
325
+
326
+ # initial status snapshot
327
+ try:
328
+ rs = requests.get(f"{_RP_BASE}/pods/{_INST['podId']}", headers=_rp_headers(), timeout=30)
329
+ st = _as_json(rs)
330
+ _log_status(f"STATUS_POLL {json.dumps(st, ensure_ascii=False)}")
331
+ content["_status"] = st
332
+ except Exception as e:
333
+ content["_status_error"] = str(e)
334
+ _log_status(f"ERROR Status: {e}")
335
+
336
+ _INST["status"] = content.get("desiredStatus") or content.get("status") or ""
337
+ _INST["ip"] = _INST.get("ip") or ""
338
+ _INST["port"] = _INST.get("port") or ""
339
+ return JSONResponse(content, r.status_code)
340
+ # ---------------------------------------------------------------------
341
+ # Poll / read instance status + explicit readiness fields
342
+ # ---------------------------------------------------------------------
343
+ @router.get("/api/compute/pods/{pod_id}")
344
+ def api_get_instance(pod_id: str = None):
345
+ pid = (pod_id or _INST.get("podId") or "").strip()
346
+ if not pid:
347
+ return JSONResponse({"error": "pod_id missing."}, 400)
348
+ _log_id("STATUS_USES", pid)
349
+
350
+ try:
351
+ r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=30)
352
+ last = _as_json(r)
353
+ _log_status(f"STATUS_POLL {json.dumps(last, ensure_ascii=False)}")
354
+ except Exception as e:
355
+ return JSONResponse({"error": f"poll failed: {e}"}, 502)
356
+
357
+ declared = None
358
+ try:
359
+ c = (((_INST.get("blob") or {}).get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
360
+ or (_INST.get("blob") or {}).get("container") or {}
361
+ declared = int((c.get("ports") or [])[0].get("containerPort"))
362
+ except Exception:
363
+ c = {}
364
+ pass
365
+
366
+ if isinstance(last, dict):
367
+ ip = last.get("publicIp") or ""
368
+ pm = last.get("portMappings") or {}
369
+
370
+ if ip and isinstance(pm, dict) and pm:
371
+ # choose mapped public port for the declared internal port; else first mapping
372
+ chosen = None
373
+ if isinstance(declared, int) and str(declared) in pm:
374
+ chosen = str(pm[str(declared)])
375
+ else:
376
+ k = next(iter(pm.keys()))
377
+ chosen = str(pm[k])
378
+ _log_status(f"PORT_MAPPING declared={declared} not_found_using_first key={k}")
379
+
380
+ _INST.update({"podId": pid,
381
+ "status": last.get("desiredStatus", ""),
382
+ "ip": ip,
383
+ "port": chosen})
384
+ _save_state()
385
+
386
+ base = f"http://{_INST['ip']}:{_INST['port']}"
387
+ _log_status(f"PORT_MAPPING declared={declared} chosen={chosen} all={pm}")
388
+ _log_status(f"RESOLVED_ENDPOINT base={base}")
389
+
390
+ # health + route discovery (no 8080 fallback, no proxy)
391
+ code, ms, snippet = _probe("GET", f"{base}/health")
392
+ _log_status(f"HEALTH code={code} ms={ms} body_snippet={snippet}")
393
+
394
+ # --- Vertex-mirrored route + final prompt URL log ---
395
+ pr_explicit = (c.get("predictRoute") or "").strip() if isinstance(c, dict) else ""
396
+ if pr_explicit:
397
+ _INST["predictRoute"] = pr_explicit
398
+ _log_status(f"PREDICT_ROUTE_SET {pr_explicit} (from blob)")
399
+ elif not _INST.get("predictRoute"):
400
+ _INST["predictRoute"] = "/predict"
401
+ _log_status("PREDICT_ROUTE_SET /predict (Vertex default)")
402
+
403
+ # If still unset for some reason, try lightweight discovery once
404
+ if not _INST.get("predictRoute"):
405
+ route = _discover_route(base)
406
+ if route:
407
+ _INST["predictRoute"] = route
408
+ _log_status(f"PREDICT_ROUTE_SET {route} (discovered)")
409
+ else:
410
+ _log_status("PREDICT_ROUTE_SET none")
411
+
412
+ # Final prompt URL (prefer IP; else proxy host)
413
+ pr = _INST.get("predictRoute") or "/predict"
414
+ if _INST.get("ip") and _INST.get("port"):
415
+ prompt_url = f"http://{_INST['ip']}:{_INST['port']}{pr}"
416
+ else:
417
+ proxy_base = f"https://{pid}-{declared}.proxy.runpod.net" if declared else ""
418
+ prompt_url = f"{proxy_base}{pr}" if proxy_base else ""
419
+ if prompt_url:
420
+ _log_status(f"PROMPT_ENDPOINT {prompt_url}")
421
+
422
+ # Always include cached readiness data for the UI
423
+ merged = {**last, "cachedState": {
424
+ "podId": _INST.get("podId"),
425
+ "status": _INST.get("status"),
426
+ "ip": _INST.get("ip"),
427
+ "port": _INST.get("port"),
428
+ "predictRoute": _INST.get("predictRoute"),
429
+ }}
430
+ return JSONResponse(merged)
431
+
432
+ # ---------------------------------------------------------------------
433
+ # Start, Stop, End All — same as before
434
+ # ---------------------------------------------------------------------
435
+ @router.post("/api/compute/pods/{pod_id}/start")
436
+ def api_start_instance(pod_id:str):
437
+ _log_id("START_USES",pod_id)
438
+ try:
439
+ r=requests.post(f"{_RP_BASE}/pods/{pod_id}/start",
440
+ headers=_rp_headers(),timeout=30)
441
+ payload=_as_json(r)
442
+ _log_status(f"START_RESPONSE {json.dumps(payload,ensure_ascii=False)}")
443
+ return JSONResponse(payload, r.status_code)
444
+ except Exception as e:
445
+ _log_status(f"ERROR Start: {e}")
446
+ return JSONResponse({"error":f"RunPod start failed: {e}"},500)
447
+
448
+ @router.delete("/api/compute/delete_instance")
449
+ async def api_delete_instance():
450
+ pid = (_INST.get("podId") or "").strip()
451
+ if not pid:
452
+ return JSONResponse({"error": "pod_id missing and no cached pod found."}, status_code=400)
453
+ _log_id("STOP_USES", pid)
454
+ try:
455
+ _log_delete(">>> STOP endpoint triggered")
456
+ r = requests.post(f"{_RP_BASE}/pods/{pid}/stop", headers=_rp_headers(), timeout=60)
457
+ payload = _as_json(r)
458
+ _log_delete(f"STOP_RESPONSE {json.dumps(payload, ensure_ascii=False)}")
459
+ return JSONResponse(status_code=r.status_code, content=payload)
460
+ except Exception as e:
461
+ _log_delete(f"ERROR Stop: {e}")
462
+ return JSONResponse(status_code=500, content={"error": f"RunPod stop failed: {e}"})
463
+
464
+ @router.delete("/api/compute/end_all")
465
+ async def api_end_all():
466
+ pid = (_INST.get("podId") or "").strip()
467
+ if not pid:
468
+ return JSONResponse({"error": "pod_id missing and no cached pod found."}, status_code=400)
469
+ _log_id("DELETE_USES", pid)
470
+ try:
471
+ _log_delete(">>> END-ALL endpoint triggered")
472
+ r = requests.delete(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=60)
473
+ payload = _as_json(r)
474
+ _log_delete(f"DELETE_RESPONSE {json.dumps(payload, ensure_ascii=False)}")
475
+ if r.status_code in (200, 202, 204):
476
+ _INST.update({"podId": "", "status": "", "ip": "", "port": ""})
477
+ _save_state()
478
+ return JSONResponse(status_code=r.status_code, content=payload)
479
+ except Exception as e:
480
+ _log_delete(f"ERROR Delete: {e}")
481
+ return JSONResponse(status_code=500, content={"error": f"RunPod delete failed: {e}"})
482
+
483
+ # ---------------------------------------------------------------------
484
+ # Wait instance
485
+ # ---------------------------------------------------------------------
486
+ @router.get("/api/compute/wait_instance")
487
+ def api_wait_instance(pod_id: str = None):
488
+ pid = (pod_id or _INST.get("podId") or "").strip()
489
+ if not pid:
490
+ return JSONResponse({"error": "pod_id missing."}, status_code=400)
491
+ try:
492
+ r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=30)
493
+ last = _as_json(r)
494
+ _log_status(f"WAIT_STATUS {json.dumps(last, ensure_ascii=False)}")
495
+ except Exception as e:
496
+ return JSONResponse({"error": f"wait poll failed: {e}"}, status_code=502)
497
+
498
+ ip = last.get("publicIp") or _INST.get("ip")
499
+ pm = last.get("portMappings") or {}
500
+ port = None
501
+ declared = None
502
+ try:
503
+ c = (((_INST.get("blob") or {}).get("supportedActions") or {}).get("deploy") or {}).get("containerSpec") \
504
+ or (_INST.get("blob") or {}).get("container") or {}
505
+ declared = int((c.get("ports") or [])[0].get("containerPort"))
506
+ except Exception:
507
+ c = {}
508
+ pass
509
+
510
+ if ip and pm:
511
+ try:
512
+ if isinstance(declared, int) and str(declared) in pm:
513
+ port = str(pm[str(declared)])
514
+ except Exception:
515
+ pass
516
+ if not port and "8080" in pm:
517
+ port = str(pm["8080"])
518
+ elif not port and pm:
519
+ port = str(pm[next(iter(pm.keys()))])
520
+
521
+ if ip and port:
522
+ base = f"http://{ip}:{port}"
523
+ _log_status(f"RESOLVED_IP {base}")
524
+ code, ms, snippet = _probe("GET", f"{base}/health")
525
+ _log_status(f"HEALTH code={code} ms={ms} body_snippet={snippet}")
526
+
527
+ # --- Vertex-mirrored route + final prompt URL log ---
528
+ pr_explicit = (c.get("predictRoute") or "").strip() if isinstance(c, dict) else ""
529
+ if pr_explicit:
530
+ _INST["predictRoute"] = pr_explicit
531
+ _log_status(f"PREDICT_ROUTE_SET {pr_explicit} (from blob)")
532
+ elif not _INST.get("predictRoute"):
533
+ _INST["predictRoute"] = "/predict"
534
+ _log_status("PREDICT_ROUTE_SET /predict (Vertex default)")
535
+
536
+ if not _INST.get("predictRoute"):
537
+ route = _discover_route(base)
538
+ if route:
539
+ _INST["predictRoute"] = route
540
+ _log_status(f"PREDICT_ROUTE_SET {route} (discovered)")
541
+ else:
542
+ _log_status("PREDICT_ROUTE_SET none")
543
+
544
+ pr = _INST.get("predictRoute") or "/predict"
545
+ prompt_url = f"{base}{pr}"
546
+ _log_status(f"PROMPT_ENDPOINT {prompt_url}")
547
+
548
+ try:
549
+ cspec = _get_container_spec()
550
+ internal, _ = _get_port_and_proto(cspec)
551
+ if internal:
552
+ proxy_base = f"https://{pid}-{internal}.proxy.runpod.net"
553
+ _log_status(f"RESOLVED_PROXY {proxy_base}")
554
+ _INST["base"] = proxy_base
555
+ _save_state()
556
+ except Exception:
557
+ pass
558
+
559
+ _INST.update({"ip": ip or "", "port": port or "", "status": last.get("desiredStatus", "")})
560
+ _save_state()
561
+
562
+ merged = {
563
+ **last,
564
+ "cachedState": {
565
+ "podId": _INST.get("podId"),
566
+ "status": _INST.get("status"),
567
+ "ip": _INST.get("ip"),
568
+ "port": _INST.get("port"),
569
+ "base": _INST.get("base"),
570
+ "predictRoute": _INST.get("predictRoute"),
571
+ },
572
  }
573
+ return JSONResponse(merged)
574
+
575
+ # ---------------------------------------------------------------------
576
+ # Debug: live probes against the instance (IP + Proxy)
577
+ # ---------------------------------------------------------------------
578
+ @router.get("/api/compute/debug/probes")
579
+ def api_debug_probes(pod_id: str = None):
580
+ pid = (pod_id or _INST.get("podId") or "").strip()
581
+ if not pid:
582
+ return JSONResponse({"error": "pod_id missing."}, 400)
583
+
584
+ # latest pod object (for portMappings/publicIp)
585
+ try:
586
+ r = requests.get(f"{_RP_BASE}/pods/{pid}", headers=_rp_headers(), timeout=20)
587
+ pod = _as_json(r)
588
+ _log_status(f"DEBUG_POD_OBJ {json.dumps(pod, ensure_ascii=False)}")
589
+ except Exception as e:
590
+ return JSONResponse({"error": f"pod fetch failed: {e}"}, 502)
591
+
592
+ ip = pod.get("publicIp") or _INST.get("ip")
593
+ pm = pod.get("portMappings") or {}
594
+
595
+ # choose internal/public ports
596
+ internal = None
597
+ try:
598
+ cs = _get_container_spec()
599
+ internal = int((cs.get("ports") or [])[0].get("containerPort"))
600
+ except Exception:
601
+ pass
602
+
603
+ if internal and str(internal) in pm:
604
+ public = str(pm[str(internal)])
605
+ elif "8080" in pm:
606
+ internal, public = 8080, str(pm["8080"])
607
+ elif pm:
608
+ k = next(iter(pm.keys()))
609
+ internal, public = int(k), str(pm[k])
610
+ else:
611
+ public = None
612
+
613
+ # candidate paths
614
+ healths = [(_INST.get("healthRoute") or "").strip(), "/health", "/ping", "/healthz", "/v1/models"]
615
+ healths = [p for p in healths if p]
616
+ predicts = [(_INST.get("predictRoute") or "").strip(), "/generate", "/predict", "/predictions",
617
+ "/v1/chat/completions", "/v1/models/model:predict"]
618
+ predicts = [p for p in predicts if p]
619
+
620
+ results = {"podId": pid, "ip": ip, "internalPort": internal, "publicPort": public, "probes": []}
621
+
622
+ # base URLs (IP and proxy)
623
+ bases = []
624
+ if ip and public:
625
+ bases.append(f"http://{ip}:{public}")
626
+ if internal:
627
+ bases.append(f"https://{pid}-{internal}.proxy.runpod.net")
628
+
629
+ # probe health
630
+ for base in bases:
631
+ for hp in healths:
632
+ code, ms, snippet = _probe("GET", f"{base}{hp}")
633
+ _log_status(f"DEBUG_HEALTH base={base} path={hp} code={code} ms={ms}")
634
+ results["probes"].append({"base": base, "path": hp, "kind": "health", "code": code, "ms": ms, "snippet": snippet})
635
+
636
+ # probe predict (HEAD)
637
+ for base in bases:
638
+ for pp in predicts:
639
+ code, ms, _ = _probe("HEAD", f"{base}{pp}")
640
+ _log_status(f"DEBUG_PREDICT base={base} path={pp} code={code} ms={ms}")
641
+ results["probes"].append({"base": base, "path": pp, "kind": "predict", "code": code, "ms": ms})
642
+
643
+ return JSONResponse(results, 200)
644
+
645
+ # ---------------------------------------------------------------------
646
+ # Helper functions for containerSpec parsing
647
+ # ---------------------------------------------------------------------
648
+ def _get_container_spec():
649
+ blob = _INST.get("blob")
650
+ if not blob:
651
+ lb = _load_local_blob()
652
+ if lb:
653
+ _ingest_blob(lb)
654
+ blob = lb
655
+ return (((blob.get("supportedActions") or {}).get("deploy") or {}).get("containerSpec")
656
+ or blob.get("container") or {})
657
+
658
+ def _get_port_and_proto(cspec: dict):
659
+ try:
660
+ ports = cspec.get("ports") or []
661
+ if isinstance(ports, list) and ports:
662
+ p0 = ports[0]
663
+ internal = p0.get("containerPort")
664
+ proto = (p0.get("protocol") or "").lower() or None
665
+ return (int(internal) if str(internal).isdigit() else None, proto)
666
+ except Exception:
667
+ pass
668
+ return (None, None)
669
+
670
+ def _build_proxy_url(route: str) -> str:
671
+ pid = (_INST.get("podId") or "").strip()
672
+ if not pid:
673
+ raise HTTPException(status_code=400, detail="No podId in cache. Create/Start the instance first.")
674
+ cspec = _get_container_spec()
675
+ internal_port, _ = _get_port_and_proto(cspec)
676
+ if not internal_port:
677
+ raise HTTPException(status_code=400, detail="Cannot resolve internal port from containerSpec.ports[].")
678
+ return f"https://{pid}-{internal_port}.proxy.runpod.net{route}"
679
+
680
+ def _build_ip_url(route: str) -> str:
681
+ ip, port = _INST.get("ip"), _INST.get("port")
682
+ if not (ip and port):
683
+ raise HTTPException(status_code=400, detail="No running instance (ip/port missing).")
684
+ return f"http://{ip}:{port}{route}"
685
+
686
+ def _resolve_infer_url(route: str) -> str:
687
+ cspec = _get_container_spec()
688
+ _, proto = _get_port_and_proto(cspec)
689
+ try:
690
+ if _INST.get("ip") and _INST.get("port"):
691
+ url = _build_ip_url(route)
692
+ _job_log("compute", f"[MW] Using IP path: {url}")
693
+ return url
694
+ except HTTPException:
695
+ pass
696
+ if proto == "http" or True:
697
+ url = _build_proxy_url(route)
698
+ _job_log("compute", f"[MW] Using Proxy path: {url}")
699
+ return url
700
+ return _build_ip_url(route)
701
+
702
+ # ---------------------------------------------------------------------
703
+ # /api/infer — updated to use resolver
704
+ # ---------------------------------------------------------------------
705
+ @router.post("/api/infer")
706
+ async def api_infer(req: Request):
707
+ route = _INST.get("predictRoute")
708
+ if not route:
709
+ return JSONResponse(
710
+ {"error": "predictRoute unresolved; check ROUTE_PROBE logs and HEALTH results."},
711
+ status_code=428
712
+ )
713
+ body = await req.json()
714
+ try:
715
+ url = _resolve_infer_url(route)
716
+ r = requests.post(url, json=body, timeout=120)
717
+ ct = (r.headers.get("content-type") or "").lower()
718
+ if "application/json" in ct:
719
+ return JSONResponse(status_code=r.status_code, content=r.json())
720
+ return HTMLResponse(status_code=r.status_code, content=r.text)
721
+ except HTTPException as he:
722
+ return JSONResponse({"error": he.detail}, status_code=he.status_code)
723
+ except Exception as e:
724
+ return JSONResponse({"error": f"inference request failed: {e}"}, status_code=502)
725
+
726
+ # ---------------------------------------------------------------------
727
+ # /api/middleware/infer — middleware prompt routing and normalization
728
+ # ---------------------------------------------------------------------
729
+ @router.post("/api/middleware/infer")
730
+ async def api_middleware_infer(req: Request):
731
+ route = _INST.get("predictRoute")
732
+ if not route:
733
+ return JSONResponse(
734
+ {"error": "predictRoute unresolved; check ROUTE_PROBE logs and HEALTH results."},
735
+ status_code=428
736
+ )
737
+
738
+ payload = await req.json()
739
+ prompt = payload.get("prompt")
740
+ if not isinstance(prompt, str) or not prompt.strip():
741
+ return JSONResponse({"error": "Missing 'prompt' in request body."}, status_code=400)
742
+
743
+ # HF text-classification shim: wrap into Vertex-style instances
744
+ img = (_get_container_spec().get("imageUri","")).lower()
745
+ if "huggingface-pytorch-inference" in img and isinstance(payload.get("prompt"), str):
746
+ payload = {"instances": [payload["prompt"]]}
747
+
748
+ try:
749
+ # Prefer proxy base; fall back to direct IP if proxy not cached
750
+ pid = (_INST.get("podId") or "").strip()
751
+ proxy_base = None
752
+ try:
753
+ cspec = _get_container_spec()
754
+ internal, _ = _get_port_and_proto(cspec)
755
+ if pid and internal:
756
+ proxy_base = f"https://{pid}-{internal}.proxy.runpod.net"
757
+ _log_status(f"PROMPT_BASE proxy={proxy_base}")
758
+ except Exception:
759
+ pass
760
+ if not proxy_base:
761
+ ip, port = _INST.get("ip"), _INST.get("port")
762
+ if ip and port:
763
+ proxy_base = f"http://{ip}:{port}"
764
+ _log_status(f"PROMPT_BASE direct={proxy_base}")
765
+
766
+ if not proxy_base:
767
+ return JSONResponse({"error": "instance not ready (no base URL)"}, status_code=409)
768
+
769
+ url = f"{proxy_base}{route}"
770
+ _log_status(f"PROMPT_ENDPOINT {url}")
771
+ _job_log("compute", f"[MW] Forwarding infer to {url}")
772
+
773
+ # Try multiple prompt body formats until success
774
+ bodies = [
775
+ payload,
776
+ {"prompt": prompt},
777
+ {"text": prompt},
778
+ {"inputs": prompt},
779
+ {"input": prompt},
780
+ ]
781
+ rp, data = None, None
782
+ for body in bodies:
783
+ try:
784
+ rp = requests.post(url, json=body, timeout=120)
785
+ _log_status(f"PREDICT_RESP code={rp.status_code} len={len(rp.text)}")
786
+ if rp.ok:
787
+ break
788
+ except Exception as e:
789
+ _log_status(f"PREDICT_ERR {e}")
790
+
791
+ if not rp:
792
+ return JSONResponse({"error": "no response from model"}, status_code=504)
793
+
794
+ ct = (rp.headers.get("content-type") or "").lower()
795
+ data = _as_json(rp) if "application/json" in ct else {"_raw": rp.text}
796
+
797
+ if isinstance(data, dict):
798
+ if "image_b64" in data:
799
+ return JSONResponse({"image_b64": data["image_b64"], "timings": data.get("timings")}, status_code=rp.status_code)
800
+ if isinstance(data.get("output"), str):
801
+ return JSONResponse({"output": data["output"]}, status_code=rp.status_code)
802
+ if "_raw" in data:
803
+ return JSONResponse({"output": data["_raw"]}, status_code=rp.status_code)
804
+ return JSONResponse({"output": json.dumps(data, ensure_ascii=False)}, status_code=rp.status_code)
805
+
806
+ return JSONResponse({"output": str(data)}, status_code=rp.status_code)
807
+
808
+ except HTTPException as he:
809
+ _job_log("compute", f"[MW] ERROR {he.status_code}: {he.detail}")
810
+ return JSONResponse({"error": he.detail}, status_code=he.status_code)
811
+ except Exception as e:
812
+ _job_log("compute", f"[MW] ERROR infer: {e}")
813
+ return JSONResponse({"error": f"middleware infer failed: {e}"}, status_code=502)
814
+
815
+ # ---------------------------------------------------------------------
816
+ # Job progress + callback routes
817
+ # ---------------------------------------------------------------------
818
+ @router.post("/api/job/ready")
819
+ async def api_job_ready(req: Request):
820
+ return JSONResponse({"ok": True})
821
+
822
+ @router.post("/api/job/progress")
823
+ async def api_job_progress(req: Request):
824
+ data = await req.json()
825
+ job_id = str(data.get("job_id", "unknown"))
826
+ msg = data.get("message", "")
827
+ _job_log(job_id, msg or "progress")
828
+ return JSONResponse({"ok": True})
829
+
830
+ @router.post("/api/job/done")
831
+ async def api_job_done(req: Request):
832
+ data = await req.json()
833
+ job_id = str(data.get("job_id", "unknown"))
834
+ j = _JOBS.setdefault(job_id, {"status": "created", "logs": [], "image_b64": None, "timings": {}})
835
+ j["status"] = "done"
836
+ j["image_b64"] = data.get("image_b64")
837
+ j["timings"] = data.get("timings", {})
838
+ _job_log(job_id, "completed")
839
+ return JSONResponse({"ok": True})
840
+
841
+ @router.get("/api/job/status")
842
+ def api_job_status(job_id: str):
843
+ return JSONResponse(_JOBS.get(job_id, {"status": "unknown"}))