Image-To-Flux-Prompt

Running

App Files Files Community

Hug0endob commited on Dec 18, 2025

Commit

02d7acf

verified ·

1 Parent(s): 1a185c8

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -83

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-from __future__ import annotations
 import os, shutil, subprocess, tempfile, base64, json
 from io import BytesIO
 from typing import List, Tuple
@@ -9,7 +8,6 @@ import requests
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
-# --- Config
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
@@ -75,8 +73,10 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
                         try:
                             with open(p, "wb") as fh:
                                 for chunk in r.iter_content(8192):
-                                    if chunk: fh.write(chunk)
-                            with open(p, "rb") as fh: return fh.read()
                         finally:
                             try: os.remove(p)
                             except Exception: pass
@@ -85,23 +85,30 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
         r = safe_get(src, timeout=timeout)
         return r.content
     else:
-        with open(src, "rb") as f: return f.read()
 def save_bytes_to_temp(b: bytes, suffix: str) -> str:
-    fd, path = tempfile.mkstemp(suffix=suffix); os.close(fd)
-    with open(path, "wb") as f: f.write(b)
     return path
 def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
     img = Image.open(BytesIO(img_bytes))
     try:
-        if getattr(img, "is_animated", False): img.seek(0)
-    except Exception: pass
-    if img.mode != "RGB": img = img.convert("RGB")
     h = base_h
     w = max(1, int(img.width * (h / img.height)))
     img = img.resize((w, h), Image.LANCZOS)
-    buf = BytesIO(); img.save(buf, format="JPEG", quality=85)
     return buf.getvalue()
 def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
@@ -109,20 +116,43 @@ def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
 def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
     frames: List[bytes] = []
-    if not FFMPEG_BIN or not os.path.exists(media_path): return frames
     timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
     for i, t in enumerate(timestamps):
-        fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg"); os.close(fd)
-        cmd = [FFMPEG_BIN, "-nostdin", "-y", "-ss", str(t), "-i", media_path, "-frames:v", "1", "-q:v", "2", tmp]
         try:
-            subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
             if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
-                with open(tmp, "rb") as f: frames.append(f.read())
         except Exception:
             pass
         finally:
-            try: os.remove(tmp)
-            except Exception: pass
     return frames
 def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
@@ -132,14 +162,33 @@ def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
         else:
             api_key = getattr(client, "api_key", "") or DEFAULT_KEY
             url = "https://api.mistral.ai/v1/chat/completions"
-            headers = ({"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if api_key else {"Content-Type": "application/json"})
-            r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
-            r.raise_for_status(); res = r.json()
         choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
-        if not choices: return str(res)
         first = choices[0]
-        msg = first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first)
-        content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
         return content.strip() if isinstance(content, str) else str(content)
     except Exception as e:
         return f"Error during model call: {e}"
@@ -151,7 +200,8 @@ def upload_file_to_mistral(client, path: str, filename: str | None = None, purpo
             with open(path, "rb") as fh:
                 res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
             fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
-            if not fid: fid = res["data"][0]["id"]
             return fid
     except Exception:
         pass
@@ -159,56 +209,95 @@ def upload_file_to_mistral(client, path: str, filename: str | None = None, purpo
     url = "https://api.mistral.ai/v1/files"
     headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
     with open(path, "rb") as fh:
-        files = {"file": (fname, fh)}; data = {"purpose": purpose}
-        r = requests.post(url, headers=headers, files=files, data=data, timeout=timeout); r.raise_for_status(); jr = r.json()
         return jr.get("id") or jr.get("data", [{}])[0].get("id")
 def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
     jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
     data_url = b64_bytes(jpeg, mime="image/jpeg")
-    messages = [{"role": "system", "content": SYSTEM_INSTRUCTION},
-                {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": data_url}]}]
     return chat_complete(client, PIXTRAL_MODEL, messages)
 def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
     try:
         file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
-        extra_msg = f"Uploaded video file id: {file_id}\n\nInstruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
-        messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": extra_msg + "\n\n" + prompt}]
         return chat_complete(client, VIDEO_MODEL, messages)
     except Exception:
         frames = extract_best_frames_bytes(video_path, sample_count=6)
-        if not frames: return "Error: could not upload video and no frames could be extracted."
         image_entries = []
         for i, fb in enumerate(frames, start=1):
             try:
                 j = convert_to_jpeg_bytes(fb, base_h=720)
-                image_entries.append({"type": "image_url", "image_url": b64_bytes(j, mime="image/jpeg"), "meta": {"frame_index": i}})
             except Exception:
                 continue
-        content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
-        messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
         return chat_complete(client, PIXTRAL_MODEL, messages)
-def determine_media_type(src: str) -> Tuple[bool, bool]:
-    is_image = False; is_video = False
-    ext = ext_from_src(src)
-    if ext in IMAGE_EXTS: is_image = True
-    if ext in VIDEO_EXTS: is_video = True
-    if is_remote(src):
-        head = safe_head(src)
-        if head:
-            ctype = (head.headers.get("content-type") or "").lower()
-            if ctype.startswith("image/"): is_image, is_video = True, False
-            elif ctype.startswith("video/"): is_video, is_image = True, False
-    return is_image, is_video
 def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
     client = get_client(api_key)
     prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
-    if not src: return "No URL or path provided."
     progress(0.05, desc="Determining media type")
     is_image, is_video = determine_media_type(src)
     if is_image:
         try:
             raw = fetch_bytes(src)
@@ -221,6 +310,7 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progre
             return "Error: provided file is not a valid image."
         except Exception as e:
             return f"Error analyzing image: {e}"
     if is_video:
         try:
             raw = fetch_bytes(src, timeout=120)
@@ -231,8 +321,12 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progre
             progress(0.2, desc="Analyzing video")
             return analyze_video_cohesive(client, tmp_path, prompt)
         finally:
-            try: os.remove(tmp_path)
-            except Exception: pass
     try:
         raw = fetch_bytes(src)
         progress(0.2, desc="Treating as image")
@@ -240,11 +334,12 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progre
     except Exception as e:
         return f"Unable to determine media type or fetch file: {e}"
-# --- Gradio UI
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 def _btn_label_for_status(status: str) -> str:
-    return {"idle": "Submit", "busy": "Processing…", "done": "Submit", "error": "Retry"}.get(status or "idle", "Submit")
 def create_demo():
     with gr.Blocks(title="Flux Multimodal", css=css) as demo:
@@ -252,44 +347,35 @@ def create_demo():
             with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
                 preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
-                pip_button = gr.Button("Open Video in PiP", visible=False)
             with gr.Column(scale=2):
                 url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
                 with gr.Accordion("Prompt (optional)", open=False):
                     custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
                 with gr.Accordion("Mistral API Key (optional)", open=False):
                     api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
-                submit_btn = gr.Button(_btn_label_for_status("idle"))
-                clear_btn = gr.Button("Clear")
                 output_md = gr.Markdown("")
                 status_state = gr.State("idle")
-        pip_html = gr.HTML("""<div id="pip-root" style="display:none"></div>
-<script>
-window.openPiP = (sel) => {
-  try {
-    const v = document.querySelector(sel);
-    if (!v) return "no-video";
-    if (v.requestPictureInPicture) { v.requestPictureInPicture(); return "opened"; }
-    return "unsupported";
-  } catch(e){ return "error:"+e; }
-};
-</script>""")
         def load_preview(url: str):
             empty_img = gr.update(value=None, visible=False)
             empty_vid = gr.update(value=None, visible=False)
-            pip_vis = gr.update(visible=False)
-            if not url: return empty_img, empty_vid, pip_vis
             if not is_remote(url) and os.path.exists(url):
                 ext = ext_from_src(url)
-                if ext in VIDEO_EXTS: return empty_img, gr.update(value=os.path.abspath(url), visible=True), gr.update(visible=True)
                 if ext in IMAGE_EXTS:
                     try:
                         img = Image.open(url)
-                        if getattr(img, "is_animated", False): img.seek(0)
-                        return gr.update(value=img.convert("RGB"), visible=True), empty_vid, pip_vis
-                    except Exception: return empty_img, empty_vid, pip_vis
             head = safe_head(url)
             if head:
                 ctype = (head.headers.get("content-type") or "").lower()
@@ -298,22 +384,19 @@ window.openPiP = (sel) => {
             try:
                 r = safe_get(url, timeout=15)
                 img = Image.open(BytesIO(r.content))
-                if getattr(img, "is_animated", False): img.seek(0)
-                return gr.update(value=img.convert("RGB"), visible=True), empty_vid, pip_vis
             except Exception:
-                return empty_img, empty_vid, pip_vis
-        url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, pip_button])
         def clear_all():
             return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle", gr.update(value=_btn_label_for_status("idle"))
         clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, submit_btn])
-        def pip_click(_):
-            js = "<script>setTimeout(()=>window.openPiP('video.preview_media'),50);</script>"
-            return gr.HTML.update(value=js)
-        pip_button.click(fn=pip_click, inputs=[url_input], outputs=[pip_html])
         def start_busy():
             s = "busy"
             return s, gr.update(value=_btn_label_for_status(s))
@@ -321,7 +404,9 @@ window.openPiP = (sel) => {
         def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
             return process_media(url or "", prompt or "", key or "", progress=progress)
-        submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
             fn=lambda res: ("error", "**Error:** no result returned.") if not res else
                            ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
             inputs=[output_md],

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import os, shutil, subprocess, tempfile, base64, json
 from io import BytesIO
 from typing import List, Tuple
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
                         try:
                             with open(p, "wb") as fh:
                                 for chunk in r.iter_content(8192):
+                                    if chunk:
+                                        fh.write(chunk)
+                            with open(p, "rb") as fh:
+                                return fh.read()
                         finally:
                             try: os.remove(p)
                             except Exception: pass
         r = safe_get(src, timeout=timeout)
         return r.content
     else:
+        with open(src, "rb") as f:
+            return f.read()
 def save_bytes_to_temp(b: bytes, suffix: str) -> str:
+    fd, path = tempfile.mkstemp(suffix=suffix)
+    os.close(fd)
+    with open(path, "wb") as f:
+        f.write(b)
     return path
 def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
     img = Image.open(BytesIO(img_bytes))
     try:
+        if getattr(img, "is_animated", False):
+            img.seek(0)
+    except Exception:
+        pass
+    if img.mode != "RGB":
+        img = img.convert("RGB")
     h = base_h
     w = max(1, int(img.width * (h / img.height)))
     img = img.resize((w, h), Image.LANCZOS)
+    buf = BytesIO()
+    img.save(buf, format="JPEG", quality=85)
     return buf.getvalue()
 def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
 def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
     frames: List[bytes] = []
+    if not FFMPEG_BIN or not os.path.exists(media_path):
+        return frames
     timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
     for i, t in enumerate(timestamps):
+        fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
+        os.close(fd)
+        cmd = [
+            FFMPEG_BIN,
+            "-nostdin",
+            "-y",
+            "-ss",
+            str(t),
+            "-i",
+            media_path,
+            "-frames:v",
+            "1",
+            "-q:v",
+            "2",
+            tmp,
+        ]
         try:
+            subprocess.run(
+                cmd,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                timeout=timeout_extract,
+            )
             if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
+                with open(tmp, "rb") as f:
+                    frames.append(f.read())
         except Exception:
             pass
         finally:
+            try:
+                os.remove(tmp)
+            except Exception:
+                pass
     return frames
 def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
         else:
             api_key = getattr(client, "api_key", "") or DEFAULT_KEY
             url = "https://api.mistral.ai/v1/chat/completions"
+            headers = (
+                {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+                if api_key
+                else {"Content-Type": "application/json"}
+            )
+            r = requests.post(
+                url,
+                json={"model": model, "messages": messages},
+                headers=headers,
+                timeout=timeout,
+            )
+            r.raise_for_status()
+            res = r.json()
         choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
+        if not choices:
+            return str(res)
         first = choices[0]
+        msg = (
+            first.message
+            if hasattr(first, "message")
+            else (first.get("message") if isinstance(first, dict) else first)
+        )
+        content = (
+            msg.get("content")
+            if isinstance(msg, dict)
+            else getattr(msg, "content", None)
+        )
         return content.strip() if isinstance(content, str) else str(content)
     except Exception as e:
         return f"Error during model call: {e}"
             with open(path, "rb") as fh:
                 res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
             fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
+            if not fid:
+                fid = res["data"][0]["id"]
             return fid
     except Exception:
         pass
     url = "https://api.mistral.ai/v1/files"
     headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
     with open(path, "rb") as fh:
+        files = {"file": (fname, fh)}
+        data = {"purpose": purpose}
+        r = requests.post(url, headers=headers, files=files, data=data, timeout=timeout)
+        r.raise_for_status()
+        jr = r.json()
         return jr.get("id") or jr.get("data", [{}])[0].get("id")
+def determine_media_type(src: str) -> Tuple[bool, bool]:
+    is_image = False
+    is_video = False
+    ext = ext_from_src(src)
+    if ext in IMAGE_EXTS:
+        is_image = True
+    if ext in VIDEO_EXTS:
+        is_video = True
+    if is_remote(src):
+        head = safe_head(src)
+        if head:
+            ctype = (head.headers.get("content-type") or "").lower()
+            if ctype.startswith("image/"):
+                is_image, is_video = True, False
+            elif ctype.startswith("video/"):
+                is_video, is_image = True, False
+    return is_image, is_video
 def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
     jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
     data_url = b64_bytes(jpeg, mime="image/jpeg")
+    messages = [
+        {"role": "system", "content": SYSTEM_INSTRUCTION},
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": data_url},
+            ],
+        },
+    ]
     return chat_complete(client, PIXTRAL_MODEL, messages)
 def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
     try:
         file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
+        extra_msg = (
+            f"Uploaded video file id: {file_id}\n\n"
+            "Instruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
+        )
+        messages = [
+            {"role": "system", "content": SYSTEM_INSTRUCTION},
+            {"role": "user", "content": extra_msg + "\n\n" + prompt},
+        ]
         return chat_complete(client, VIDEO_MODEL, messages)
     except Exception:
         frames = extract_best_frames_bytes(video_path, sample_count=6)
+        if not frames:
+            return "Error: could not upload video and no frames could be extracted."
         image_entries = []
         for i, fb in enumerate(frames, start=1):
             try:
                 j = convert_to_jpeg_bytes(fb, base_h=720)
+                image_entries.append(
+                    {
+                        "type": "image_url",
+                        "image_url": b64_bytes(j, mime="image/jpeg"),
+                        "meta": {"frame_index": i},
+                    }
+                )
             except Exception:
                 continue
+        content = [
+            {"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}
+        ] + image_entries
+        messages = [
+            {"role": "system", "content": SYSTEM_INSTRUCTION},
+            {"role": "user", "content": content},
+        ]
         return chat_complete(client, PIXTRAL_MODEL, messages)
 def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
     client = get_client(api_key)
     prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
+    if not src:
+        return "No URL or path provided."
     progress(0.05, desc="Determining media type")
     is_image, is_video = determine_media_type(src)
     if is_image:
         try:
             raw = fetch_bytes(src)
             return "Error: provided file is not a valid image."
         except Exception as e:
             return f"Error analyzing image: {e}"
     if is_video:
         try:
             raw = fetch_bytes(src, timeout=120)
             progress(0.2, desc="Analyzing video")
             return analyze_video_cohesive(client, tmp_path, prompt)
         finally:
+            try:
+                os.remove(tmp_path)
+            except Exception:
+                pass
+    # Fallback: treat as image
     try:
         raw = fetch_bytes(src)
         progress(0.2, desc="Treating as image")
     except Exception as e:
         return f"Unable to determine media type or fetch file: {e}"
+# ------------------- Gradio UI -------------------
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 def _btn_label_for_status(status: str) -> str:
+    return {"idle": "Submit", "busy": "Processing…", "done": "Submit", "error": "Retry"}.get(status, "Submit")
 def create_demo():
     with gr.Blocks(title="Flux Multimodal", css=css) as demo:
             with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
                 preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
             with gr.Column(scale=2):
                 url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
                 with gr.Accordion("Prompt (optional)", open=False):
                     custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
                 with gr.Accordion("Mistral API Key (optional)", open=False):
                     api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
+                with gr.Row():
+                    submit_btn = gr.Button(_btn_label_for_status("idle"))
+                    clear_btn = gr.Button("Clear")
                 output_md = gr.Markdown("")
                 status_state = gr.State("idle")
         def load_preview(url: str):
             empty_img = gr.update(value=None, visible=False)
             empty_vid = gr.update(value=None, visible=False)
+            if not url:
+                return empty_img, empty_vid, gr.update(visible=False)
             if not is_remote(url) and os.path.exists(url):
                 ext = ext_from_src(url)
+                if ext in VIDEO_EXTS:
+                    return empty_img, gr.update(value=os.path.abspath(url), visible=True), gr.update(visible=True)
                 if ext in IMAGE_EXTS:
                     try:
                         img = Image.open(url)
+                        if getattr(img, "is_animated", False):
+                            img.seek(0)
+                        return gr.update(value=img.convert("RGB"), visible=True), empty_vid, gr.update(visible=False)
+                    except Exception:
+                        return empty_img, empty_vid, gr.update(visible=False)
             head = safe_head(url)
             if head:
                 ctype = (head.headers.get("content-type") or "").lower()
             try:
                 r = safe_get(url, timeout=15)
                 img = Image.open(BytesIO(r.content))
+                if getattr(img, "is_animated", False):
+                    img.seek(0)
+                return gr.update(value=img.convert("RGB"), visible=True), empty_vid, gr.update(visible=False)
             except Exception:
+                return empty_img, empty_vid, gr.update(visible=False)
+        url_input.change(fn=load_preview, inputs=[url_input],
+                         outputs=[preview_image, preview_video, preview_video])
         def clear_all():
             return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle", gr.update(value=_btn_label_for_status("idle"))
         clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, submit_btn])
         def start_busy():
             s = "busy"
             return s, gr.update(value=_btn_label_for_status(s))
         def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
             return process_media(url or "", prompt or "", key or "", progress=progress)
+        submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key],
+                        outputs=[output_md], queue=True).then(
             fn=lambda res: ("error", "**Error:** no result returned.") if not res else
                            ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
             inputs=[output_md],