Hug0endob commited on
Commit
58d3ae4
·
verified ·
1 Parent(s): d64e77d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -193
app.py CHANGED
@@ -2,14 +2,14 @@
2
  # -*- coding: utf-8 -*-
3
 
4
  from __future__ import annotations
5
- import os, shutil, subprocess, tempfile, base64, json
6
  from io import BytesIO
7
  from typing import List, Tuple
8
  import requests
9
  from PIL import Image, ImageFile, UnidentifiedImageError
10
  import gradio as gr
11
 
12
- # --- Config
13
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
14
  PIXTRAL_MODEL = "pixtral-12b-2409"
15
  VIDEO_MODEL = "voxtral-mini-latest"
@@ -18,10 +18,8 @@ FFMPEG_BIN = shutil.which("ffmpeg")
18
  IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
19
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
20
 
21
- SYSTEM_INSTRUCTION = (
22
- "You are a clinical visual analyst. Only analyze media actually provided (image or video data). If video review entirely, not frame by frame. "
23
- "Provide factual, clinical descriptions of observable features. Avoid inventing sensory information."
24
- )
25
 
26
  ImageFile.LOAD_TRUNCATED_IMAGES = True
27
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
@@ -34,13 +32,13 @@ except Exception:
34
  def get_client(key: str | None = None):
35
  api_key = (key or "").strip() or DEFAULT_KEY
36
  if Mistral is None:
37
- class Dummy:
38
- def __init__(self, k): self.api_key = k
39
  return Dummy(api_key)
40
  return Mistral(api_key=api_key)
41
 
42
  def is_remote(src: str) -> bool:
43
- return bool(src) and src.startswith(("http://", "https://"))
44
 
45
  def ext_from_src(src: str) -> str:
46
  if not src: return ""
@@ -68,13 +66,12 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
68
  if cl and int(cl) > stream_threshold:
69
  with requests.get(src, timeout=timeout, stream=True) as r:
70
  r.raise_for_status()
71
- fd, p = tempfile.mkstemp()
72
- os.close(fd)
73
  try:
74
- with open(p, "wb") as fh:
75
  for chunk in r.iter_content(8192):
76
  if chunk: fh.write(chunk)
77
- with open(p, "rb") as fh: return fh.read()
78
  finally:
79
  try: os.remove(p)
80
  except Exception: pass
@@ -83,242 +80,115 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
83
  r = safe_get(src, timeout=timeout)
84
  return r.content
85
  else:
86
- with open(src, "rb") as f: return f.read()
87
 
88
  def save_bytes_to_temp(b: bytes, suffix: str) -> str:
89
  fd, path = tempfile.mkstemp(suffix=suffix); os.close(fd)
90
- with open(path, "wb") as f: f.write(b)
91
  return path
92
 
93
  def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
94
  img = Image.open(BytesIO(img_bytes))
95
  try:
96
- if getattr(img, "is_animated", False): img.seek(0)
97
  except Exception: pass
98
  if img.mode != "RGB": img = img.convert("RGB")
99
- h = base_h
100
- w = max(1, int(img.width * (h / img.height)))
101
- img = img.resize((w, h), Image.LANCZOS)
102
  buf = BytesIO(); img.save(buf, format="JPEG", quality=85)
103
  return buf.getvalue()
104
 
105
  def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
106
  return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
107
 
108
- def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
109
- frames: List[bytes] = []
110
- if not FFMPEG_BIN or not os.path.exists(media_path): return frames
111
- timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
112
- for i, t in enumerate(timestamps):
113
- fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg"); os.close(fd)
114
- cmd = [FFMPEG_BIN, "-nostdin", "-y", "-ss", str(t), "-i", media_path, "-frames:v", "1", "-q:v", "2", tmp]
115
- try:
116
- subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
117
- if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
118
- with open(tmp, "rb") as f: frames.append(f.read())
119
- except Exception:
120
- pass
121
- finally:
122
- try: os.remove(tmp)
123
- except Exception: pass
124
- return frames
125
 
126
- def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
127
- try:
128
- if hasattr(client, "chat") and hasattr(client.chat, "complete"):
129
- res = client.chat.complete(model=model, messages=messages, stream=False)
130
- else:
131
- api_key = getattr(client, "api_key", "") or DEFAULT_KEY
132
- url = "https://api.mistral.ai/v1/chat/completions"
133
- headers = ({"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if api_key else {"Content-Type": "application/json"})
134
- r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
135
- r.raise_for_status(); res = r.json()
136
- choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
137
- if not choices: return str(res)
138
- first = choices[0]
139
- msg = first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first)
140
- content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
141
- return content.strip() if isinstance(content, str) else str(content)
142
- except Exception as e:
143
- return f"Error during model call: {e}"
144
-
145
- def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120) -> str:
146
- fname = filename or os.path.basename(path)
147
- try:
148
- if hasattr(client, "files") and hasattr(client.files, "upload"):
149
- with open(path, "rb") as fh:
150
- res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
151
- fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
152
- if not fid: fid = res["data"][0]["id"]
153
- return fid
154
- except Exception:
155
- pass
156
- api_key = getattr(client, "api_key", "") or DEFAULT_KEY
157
- url = "https://api.mistral.ai/v1/files"
158
- headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
159
- with open(path, "rb") as fh:
160
- files = {"file": (fname, fh)}; data = {"purpose": purpose}
161
- r = requests.post(url, headers=headers, files=files, data=data, timeout=timeout); r.raise_for_status(); jr = r.json()
162
- return jr.get("id") or jr.get("data", [{}])[0].get("id")
163
-
164
- def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
165
- jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
166
- data_url = b64_bytes(jpeg, mime="image/jpeg")
167
- messages = [{"role": "system", "content": SYSTEM_INSTRUCTION},
168
- {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": data_url}]}]
169
- return chat_complete(client, PIXTRAL_MODEL, messages)
170
-
171
- def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
172
- try:
173
- file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
174
- extra_msg = f"Uploaded video file id: {file_id}\n\nInstruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
175
- messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": extra_msg + "\n\n" + prompt}]
176
- return chat_complete(client, VIDEO_MODEL, messages)
177
- except Exception:
178
- frames = extract_best_frames_bytes(video_path, sample_count=6)
179
- if not frames: return "Error: could not upload video and no frames could be extracted."
180
- image_entries = []
181
- for i, fb in enumerate(frames, start=1):
182
- try:
183
- j = convert_to_jpeg_bytes(fb, base_h=720)
184
- image_entries.append({"type": "image_url", "image_url": b64_bytes(j, mime="image/jpeg"), "meta": {"frame_index": i}})
185
- except Exception:
186
- continue
187
- content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
188
- messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
189
- return chat_complete(client, PIXTRAL_MODEL, messages)
190
-
191
- def determine_media_type(src: str) -> Tuple[bool, bool]:
192
- is_image = False; is_video = False
193
- ext = ext_from_src(src)
194
- if ext in IMAGE_EXTS: is_image = True
195
- if ext in VIDEO_EXTS: is_video = True
196
- if is_remote(src):
197
- head = safe_head(src)
198
- if head:
199
- ctype = (head.headers.get("content-type") or "").lower()
200
- if ctype.startswith("image/"): is_image, is_video = True, False
201
- elif ctype.startswith("video/"): is_video, is_image = True, False
202
- return is_image, is_video
203
-
204
- def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
205
- client = get_client(api_key)
206
- prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
207
- if not src: return "No URL or path provided."
208
- progress(0.05, desc="Determining media type")
209
- is_image, is_video = determine_media_type(src)
210
- if is_image:
211
- try:
212
- raw = fetch_bytes(src)
213
- except Exception as e:
214
- return f"Error fetching image: {e}"
215
- progress(0.2, desc="Analyzing image")
216
- try:
217
- return analyze_image_structured(client, raw, prompt)
218
- except UnidentifiedImageError:
219
- return "Error: provided file is not a valid image."
220
- except Exception as e:
221
- return f"Error analyzing image: {e}"
222
- if is_video:
223
- try:
224
- raw = fetch_bytes(src, timeout=120)
225
- except Exception as e:
226
- return f"Error fetching video: {e}"
227
- tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
228
- try:
229
- progress(0.2, desc="Analyzing video")
230
- return analyze_video_cohesive(client, tmp_path, prompt)
231
- finally:
232
- try: os.remove(tmp_path)
233
- except Exception: pass
234
- try:
235
- raw = fetch_bytes(src)
236
- progress(0.2, desc="Treating as image")
237
- return analyze_image_structured(client, raw, prompt)
238
- except Exception as e:
239
- return f"Unable to determine media type or fetch file: {e}"
240
-
241
- # --- Gradio UI
242
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
243
 
244
  def _btn_label_for_status(status: str) -> str:
245
- return {"idle": "Submit", "busy": "Processing…", "done": "Submit", "error": "Retry"}.get(status or "idle", "Submit")
246
 
247
  def create_demo():
248
  with gr.Blocks(title="Flux Multimodal", css=css) as demo:
249
- with gr.Row():
250
- with gr.Column(scale=1):
251
  preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
252
  preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
253
- pip_button = gr.Button("Open Video in PiP", visible=False)
254
- with gr.Column(scale=2):
255
- url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
256
- with gr.Accordion("Prompt (optional)", open=False):
257
- custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
258
- with gr.Accordion("Mistral API Key (optional)", open=False):
259
- api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
 
260
  submit_btn = gr.Button(_btn_label_for_status("idle"))
261
  clear_btn = gr.Button("Clear")
262
- output_md = gr.Markdown("")
263
- status_state = gr.State("idle")
264
-
265
- pip_html = gr.HTML("""<div id="pip-root" style="display:none"></div>
266
- <script>
267
- window.openPiP = (sel) => {
268
- try {
269
- const v = document.querySelector(sel);
270
- if (!v) return "no-video";
271
- if (v.requestPictureInPicture) { v.requestPictureInPicture(); return "opened"; }
272
- return "unsupported";
273
- } catch(e){ return "error:"+e; }
274
- };
275
- </script>""")
276
 
277
  def load_preview(url: str):
278
  empty_img = gr.update(value=None, visible=False)
279
  empty_vid = gr.update(value=None, visible=False)
280
- pip_vis = gr.update(visible=False)
281
- if not url: return empty_img, empty_vid, pip_vis
 
282
  if not is_remote(url) and os.path.exists(url):
283
  ext = ext_from_src(url)
284
- if ext in VIDEO_EXTS: return empty_img, gr.update(value=os.path.abspath(url), visible=True), gr.update(visible=True)
 
285
  if ext in IMAGE_EXTS:
286
  try:
287
  img = Image.open(url)
288
- if getattr(img, "is_animated", False): img.seek(0)
289
- return gr.update(value=img.convert("RGB"), visible=True), empty_vid, pip_vis
290
- except Exception: return empty_img, empty_vid, pip_vis
 
 
291
  head = safe_head(url)
292
  if head:
293
  ctype = (head.headers.get("content-type") or "").lower()
294
  if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
295
- return empty_img, gr.update(value=url, visible=True), gr.update(visible=True)
 
 
 
 
 
 
 
 
 
296
  try:
297
  r = safe_get(url, timeout=15)
298
  img = Image.open(BytesIO(r.content))
299
- if getattr(img, "is_animated", False): img.seek(0)
300
- return gr.update(value=img.convert("RGB"), visible=True), empty_vid, pip_vis
301
  except Exception:
302
- return empty_img, empty_vid, pip_vis
 
303
 
304
- url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, pip_button])
305
 
306
  def clear_all():
307
  return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle", gr.update(value=_btn_label_for_status("idle"))
308
  clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, submit_btn])
309
 
310
- def pip_click(_):
311
- js = "<script>setTimeout(()=>window.openPiP('video.preview_media'),50);</script>"
312
- return gr.HTML.update(value=js)
313
- pip_button.click(fn=pip_click, inputs=[url_input], outputs=[pip_html])
314
-
315
  def start_busy():
316
  s = "busy"
317
  return s, gr.update(value=_btn_label_for_status(s))
 
318
  submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state, submit_btn])
319
 
320
  def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
321
  return process_media(url or "", prompt or "", key or "", progress=progress)
 
322
  submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
323
  fn=lambda res: ("error", "**Error:** no result returned.") if not res else
324
  ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
@@ -333,4 +203,6 @@ window.openPiP = (sel) => {
333
  return demo
334
 
335
  if __name__ == "__main__":
336
- create_demo().launch()
 
 
 
2
  # -*- coding: utf-8 -*-
3
 
4
  from __future__ import annotations
5
+ import os, shutil, subprocess, tempfile, base64
6
  from io import BytesIO
7
  from typing import List, Tuple
8
  import requests
9
  from PIL import Image, ImageFile, UnidentifiedImageError
10
  import gradio as gr
11
 
12
+ # --- CONFIG (keep or set env MISTRAL_API_KEY)
13
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
14
  PIXTRAL_MODEL = "pixtral-12b-2409"
15
  VIDEO_MODEL = "voxtral-mini-latest"
 
18
  IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
19
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
20
 
21
+ SYSTEM_INSTRUCTION = ("You are a clinical visual analyst. Only analyze media actually provided. "
22
+ "Provide factual descriptions; do not invent sensory info.")
 
 
23
 
24
  ImageFile.LOAD_TRUNCATED_IMAGES = True
25
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
 
32
  def get_client(key: str | None = None):
33
  api_key = (key or "").strip() or DEFAULT_KEY
34
  if Mistral is None:
35
+ class Dummy:
36
+ def __init__(self,k): self.api_key=k
37
  return Dummy(api_key)
38
  return Mistral(api_key=api_key)
39
 
40
  def is_remote(src: str) -> bool:
41
+ return bool(src) and src.startswith(("http://","https://"))
42
 
43
  def ext_from_src(src: str) -> str:
44
  if not src: return ""
 
66
  if cl and int(cl) > stream_threshold:
67
  with requests.get(src, timeout=timeout, stream=True) as r:
68
  r.raise_for_status()
69
+ fd, p = tempfile.mkstemp(); os.close(fd)
 
70
  try:
71
+ with open(p,"wb") as fh:
72
  for chunk in r.iter_content(8192):
73
  if chunk: fh.write(chunk)
74
+ with open(p,"rb") as fh: return fh.read()
75
  finally:
76
  try: os.remove(p)
77
  except Exception: pass
 
80
  r = safe_get(src, timeout=timeout)
81
  return r.content
82
  else:
83
+ with open(src,"rb") as f: return f.read()
84
 
85
  def save_bytes_to_temp(b: bytes, suffix: str) -> str:
86
  fd, path = tempfile.mkstemp(suffix=suffix); os.close(fd)
87
+ with open(path,"wb") as f: f.write(b)
88
  return path
89
 
90
  def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
91
  img = Image.open(BytesIO(img_bytes))
92
  try:
93
+ if getattr(img,"is_animated",False): img.seek(0)
94
  except Exception: pass
95
  if img.mode != "RGB": img = img.convert("RGB")
96
+ h = base_h; w = max(1, int(img.width * (h / img.height)))
97
+ img = img.resize((w,h), Image.LANCZOS)
 
98
  buf = BytesIO(); img.save(buf, format="JPEG", quality=85)
99
  return buf.getvalue()
100
 
101
  def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
102
  return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
103
 
104
+ # --- model wrappers (chat_complete, upload_file_to_mistral, analyze_image_structured, analyze_video_cohesive)
105
+ # Keep your existing implementations here unchanged (omitted in this snippet for brevity).
106
+ # Insert the exact helper implementations from your prior file for chat_complete, upload_file_to_mistral,
107
+ # analyze_image_structured, analyze_video_cohesive, extract_best_frames_bytes, determine_media_type, process_media.
108
+ # (To run, paste the helper functions you already have above this UI block.)
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ # --- UI ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
112
 
113
  def _btn_label_for_status(status: str) -> str:
114
+ return {"idle":"Submit","busy":"Processing…","done":"Submit","error":"Retry"}.get(status or "idle","Submit")
115
 
116
  def create_demo():
117
  with gr.Blocks(title="Flux Multimodal", css=css) as demo:
118
+ with gr.Column():
119
+ with gr.Row():
120
  preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
121
  preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
122
+ url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
123
+ with gr.Accordion("Prompt (optional)", open=False):
124
+ custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
125
+ with gr.Accordion("Mistral API Key (optional)", open=False):
126
+ api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
127
+
128
+ # buttons on same row
129
+ with gr.Row():
130
  submit_btn = gr.Button(_btn_label_for_status("idle"))
131
  clear_btn = gr.Button("Clear")
132
+ output_md = gr.Markdown("")
133
+ status_state = gr.State("idle")
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  def load_preview(url: str):
136
  empty_img = gr.update(value=None, visible=False)
137
  empty_vid = gr.update(value=None, visible=False)
138
+ if not url:
139
+ return empty_img, empty_vid
140
+ # local files
141
  if not is_remote(url) and os.path.exists(url):
142
  ext = ext_from_src(url)
143
+ if ext in VIDEO_EXTS:
144
+ return empty_img, gr.update(value=os.path.abspath(url), visible=True)
145
  if ext in IMAGE_EXTS:
146
  try:
147
  img = Image.open(url)
148
+ if getattr(img,"is_animated",False): img.seek(0)
149
+ return gr.update(value=img.convert("RGB"), visible=True), empty_vid
150
+ except Exception:
151
+ return empty_img, empty_vid
152
+ # remote: header-based check
153
  head = safe_head(url)
154
  if head:
155
  ctype = (head.headers.get("content-type") or "").lower()
156
  if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
157
+ return empty_img, gr.update(value=url, visible=True)
158
+ if ctype.startswith("image/") or any(url.lower().endswith(ext) for ext in IMAGE_EXTS):
159
+ try:
160
+ r = safe_get(url, timeout=15)
161
+ img = Image.open(BytesIO(r.content))
162
+ if getattr(img,"is_animated",False): img.seek(0)
163
+ return gr.update(value=img.convert("RGB"), visible=True), empty_vid
164
+ except Exception:
165
+ return empty_img, empty_vid
166
+ # fallback: try GET as image, otherwise treat as video URL
167
  try:
168
  r = safe_get(url, timeout=15)
169
  img = Image.open(BytesIO(r.content))
170
+ if getattr(img,"is_animated",False): img.seek(0)
171
+ return gr.update(value=img.convert("RGB"), visible=True), empty_vid
172
  except Exception:
173
+ # pass URL to video if it looks like a video ext or header indicated video earlier failed
174
+ return empty_img, gr.update(value=url, visible=True)
175
 
176
+ url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
177
 
178
  def clear_all():
179
  return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle", gr.update(value=_btn_label_for_status("idle"))
180
  clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, submit_btn])
181
 
182
+ # start_busy returns exactly two outputs: (status_state, submit_btn)
 
 
 
 
183
  def start_busy():
184
  s = "busy"
185
  return s, gr.update(value=_btn_label_for_status(s))
186
+
187
  submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state, submit_btn])
188
 
189
  def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
190
  return process_media(url or "", prompt or "", key or "", progress=progress)
191
+
192
  submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
193
  fn=lambda res: ("error", "**Error:** no result returned.") if not res else
194
  ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
 
203
  return demo
204
 
205
  if __name__ == "__main__":
206
+ # Ensure you pasted your helper/model functions above
207
+ demo = create_demo()
208
+ demo.launch()