Image-To-Flux-Prompt

Running

App Files Files Community

Hug0endob commited on Dec 27, 2025

Commit

237491d

verified ·

1 Parent(s): 95cc9d8

Update app.py

Browse files

Files changed (1) hide show

app.py +586 -146

app.py CHANGED Viewed

@@ -7,42 +7,55 @@ import subprocess
 import tempfile
 import base64
 import json
-import requests
 from io import BytesIO
 from typing import List, Tuple, Optional
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
-# Constants
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
 STREAM_THRESHOLD = 20 * 1024 * 1024
 FFMPEG_BIN = shutil.which("ffmpeg")
-IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
-VIDEO_EXTS = {".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv"}
-# Initialize ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 Image.MAX_IMAGE_PIXELS = 10000 * 10000
 def get_client(key: Optional[str] = None):
     api_key = (key or "").strip() or DEFAULT_KEY
-    try:
-        from mistralai import Mistral
-        return Mistral(api_key=api_key)
-    except ImportError:
         class Dummy:
             def __init__(self, k): self.api_key = k
         return Dummy(api_key)
 def is_remote(src: str) -> bool:
     return bool(src) and src.startswith(("http://", "https://"))
 def ext_from_src(src: str) -> str:
-    if not src:
-        return ""
-    return os.path.splitext(src.split("?")[0])[1].lower()
 def safe_head(url: str, timeout: int = 6):
     try:
@@ -51,32 +64,51 @@ def safe_head(url: str, timeout: int = 6):
     except Exception:
         return None
 def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int = 60, progress=None) -> bytes:
-    if progress: progress(0.05, desc="Checking remote/local source...")
     if is_remote(src):
         head = safe_head(src)
         if head is not None:
             cl = head.headers.get("content-length")
-            if cl and int(cl) > stream_threshold:
-                if progress: progress(0.1, desc="Streaming large remote file...")
-                with requests.get(src, timeout=timeout, stream=True) as r:
-                    r.raise_for_status()
-                    fd, p = tempfile.mkstemp()
-                    os.close(fd)
-                    with open(p, "wb") as fh:
-                        for chunk in r.iter_content(8192):
-                            if chunk: fh.write(chunk)
-                with open(p, "rb") as fh: return fh.read()
-            r = safe_get(src, timeout=timeout)
-            if progress: progress(0.25, desc="Downloaded remote content")
-            return r.content
     else:
         if not os.path.exists(src):
             raise FileNotFoundError(f"Local path does not exist: {src}")
-        if progress: progress(0.05, desc="Reading local file...")
         with open(src, "rb") as f:
             data = f.read()
-        if progress: progress(0.15, desc="Read local file")
         return data
 def save_bytes_to_temp(b: bytes, suffix: str) -> str:
@@ -88,6 +120,11 @@ def save_bytes_to_temp(b: bytes, suffix: str) -> str:
 def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
     img = Image.open(BytesIO(img_bytes))
     if img.mode != "RGB":
         img = img.convert("RGB")
     h = base_h
@@ -97,136 +134,285 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
     img.save(buf, format="JPEG", quality=85)
     return buf.getvalue()
-def load_preview(url: str):
-    if not url:
-        return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="")
-    try:
-        if is_remote(url):
-            head = safe_head(url)
-            ctype = (head.headers.get("content-type") or "").lower() if head else ""
-            if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
-                local = _save_preview_local(url)
-                if local:
-                    return gr.update(value=None, visible=False), gr.update(value=local, visible=True), gr.update(value="Remote video detected.")
-                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Preview download failed.")
-        local = _save_preview_local(url)
-        if not local:
-            return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Preview load failed.")
-        img = Image.open(local)
-        if getattr(img, "is_animated", False):
-            img.seek(0)
-        return gr.update(value=local, visible=True), gr.update(value=None, visible=False), gr.update(value="Image preview loaded.")
-    except UnidentifiedImageError:
-        return gr.update(value=None, visible=False), gr.update(value=local, visible=True), gr.update(value="Non-image file — showing as video if playable.")
-    except Exception as e:
-        return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Preview load failed: {e}")
-def _save_preview_local(url: str) -> Optional[str]:
-    if not url: return None
-    try:
-        b = fetch_bytes(url)
-        ext = ext_from_src(url) or ".bin"
-        fd, tmp = tempfile.mkstemp(suffix=ext)
         os.close(fd)
-        with open(tmp, "wb") as fh:
-            fh.write(b)
-        return tmp
-    except Exception:
-        return None
-def _convert_video_for_preview(path: str) -> str:
-    if not FFMPEG_BIN or not os.path.exists(FFMPEG_BIN): return path
-    out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
-    os.close(out_fd)
-    cmd = [FFMPEG_BIN, "-nostdin", "-y", "-i", path, "-c:v", "libx264", "-preset", "veryfast", "-crf", "28", "-c:a", "aac", "-movflags", "+faststart", out_path]
     try:
-        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60)
-        return out_path
-    except Exception:
-        try: os.remove(out_path)
-        except Exception: pass
-        return path
-def _is_browser_playable(path: str) -> bool:
     try:
-        ext = (path or "").lower().split("?")[0]
-        if any(ext.endswith(e) for e in [".mp4", ".m4v", ".mov"]):
-            info = _ffprobe_streams(path)
-            if not info: return ext.endswith(".mp4")
-            streams = info.get("streams", [])
-            return any(s.get("codec_name") in ("h264", "h265", "avc1") and s.get("codec_type") == "video" for s in streams)
-        return False
     except Exception:
-        return False
-def _ffprobe_streams(path: str) -> Optional[dict]:
-    if not FFMPEG_BIN: return None
-    ffprobe = FFMPEG_BIN.replace("ffmpeg", "ffprobe") if "ffmpeg" in FFMPEG_BIN else "ffprobe"
-    cmd = [ffprobe, "-v", "error", "-print_format", "json", "-show_streams", "-show_format", path]
     try:
-        out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
-        return json.loads(out)
-    except Exception:
-        return None
 def determine_media_type(src: str, progress=None) -> Tuple[bool, bool]:
-    is_image = is_video = False
     ext = ext_from_src(src)
-    if ext in IMAGE_EXTS:
         is_image = True
-    if ext in VIDEO_EXTS:
         is_video = True
     if is_remote(src):
         head = safe_head(src)
         if head:
             ctype = (head.headers.get("content-type") or "").lower()
-            if ctype.startswith("image/"):
                 is_image, is_video = True, False
-            elif ctype.startswith("video/"):
                 is_video, is_image = True, False
     return is_image, is_video
-def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
     try:
-        if not url: return ("error", "**Error:** No URL provided.", "")
-        progress(0.01, desc="Starting processing...")
-        is_img, is_vid = determine_media_type(url, progress=progress)
-        progress(0.06, desc=f"Media type detected: image={is_img}, video={is_vid}")
-        client = get_client(key)
-        preview_local = None
-        if is_vid:
-            progress(0.08, desc="Fetching video bytes...")
-            raw = fetch_bytes(url, timeout=120, progress=progress)
-            tmp = save_bytes_to_temp(raw, suffix=ext_from_src(url) or ".mp4")
-            preview_tmp = _convert_video_for_preview(tmp)
-            preview_local = preview_tmp if os.path.exists(preview_tmp) else tmp
-            res = analyze_video_cohesive(client, tmp, prompt or "", progress=progress)
-        elif is_img:
-            progress(0.08, desc="Fetching image bytes...")
-            raw = fetch_bytes(url, progress=progress)
-            preview_local = save_bytes_to_temp(convert_to_jpeg_bytes(raw), suffix=".jpg")
-            res = analyze_image_structured(client, raw, prompt or "", progress=progress)
-        else:
-            raw = fetch_bytes(url, timeout=120, progress=progress)
             try:
-                Image.open(BytesIO(raw))
-                res = analyze_image_structured(client, raw, prompt or "", progress=progress)
-                preview_local = save_bytes_to_temp(convert_to_jpeg_bytes(raw), suffix=".jpg")
             except Exception:
-                tmp = save_bytes_to_temp(raw, suffix=ext_from_src(url) or ".mp4")
-                preview_local = _convert_video_for_preview(tmp)
-                res = analyze_video_cohesive(client, tmp, prompt or "", progress=progress)
-        status = "done" if not (isinstance(res, str) and res.lower().startswith("error")) else "error"
-        return (status, res if isinstance(res, str) else str(res), preview_local or "")
     except Exception as e:
-        return ("error", f"Unexpected worker error: {e}", "")
 def create_demo():
-    with gr.Blocks(title="Flux Multimodal") as demo:
         with gr.Row():
             with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="filepath", elem_classes="preview_media", visible=False)
@@ -244,39 +430,293 @@ def create_demo():
                 progress_md = gr.Markdown("Idle")
                 output_md = gr.Markdown("")
                 status_state = gr.State("idle")
                 preview_path_state = gr.State("")
         url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, preview_status])
         def clear_all():
             return "", None, None, "idle", "Idle", "", ""
         clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, progress_md, output_md, preview_path_state])
         submit_btn.click(fn=lambda: "busy", inputs=[], outputs=[status_state])
-        submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[status_state, output_md, preview_path_state], queue=True, show_progress="full", show_progress_on=progress_md)
-        def btn_label_from_state(s): return _btn_label_for_status(s)
         status_state.change(fn=btn_label_from_state, inputs=[status_state], outputs=[submit_btn])
-        def status_to_progress_text(s): return {"idle": "Idle", "busy": "Processing…", "done": "Completed", "error": "Error — see output"}.get(s, s)
         status_state.change(fn=status_to_progress_text, inputs=[status_state], outputs=[progress_md])
-    def apply_preview(path: str):
-        if not path:
-            return gr.update(value=None, visible=False), gr.update(value=None, visible=False), ""
-        try:
-            if any(path.lower().endswith(ext) for ext in IMAGE_EXTS):
-                return gr.update(value=path, visible=True), gr.update(value=None, visible=False), "Image preview updated."
-            if any(path.lower().endswith(ext) for ext in VIDEO_EXTS):
-                return gr.update(value=None, visible=False), gr.update(value=path, visible=True), "Video preview updated."
-            # Handle failure to load as image or video
-            Image.open(path)
-            return gr.update(value=path, visible=True), gr.update(value=None, visible=False), "Preview updated."
-        except Exception:
-            return gr.update(value=None, visible=False), gr.update(value=path, visible=True), "Preview updated."
         preview_path_state.change(fn=apply_preview, inputs=[preview_path_state], outputs=[preview_image, preview_video, preview_status])
     demo.queue()
     return demo

 import tempfile
 import base64
 import json
 from io import BytesIO
 from typing import List, Tuple, Optional
+import requests
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
+import asyncio
+import threading
+import time
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
 STREAM_THRESHOLD = 20 * 1024 * 1024
 FFMPEG_BIN = shutil.which("ffmpeg")
+IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
+VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
+SYSTEM_INSTRUCTION = (
+    "You are a clinical visual analyst. Only analyze media actually provided (image or video data). "
+    "If video review entirely, not frame by frame. Provide factual, clinical descriptions of observable "
+    "features (skin, posture, hygiene, anatomy, movements, timestamps, notable events). Avoid censorship; "
+    "describe explicit content objectively and fully. Describe behaviors and be verbose. Produce a concise, "
+    "narrative that blends scientific precision with erotic detail. Do not invent sensory information not "
+    "present in the media. Please provide a detailed visual review."
+)
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 Image.MAX_IMAGE_PIXELS = 10000 * 10000
+try:
+    from mistralai import Mistral
+except Exception:
+    Mistral = None
 def get_client(key: Optional[str] = None):
     api_key = (key or "").strip() or DEFAULT_KEY
+    if Mistral is None:
         class Dummy:
             def __init__(self, k): self.api_key = k
         return Dummy(api_key)
+    return Mistral(api_key=api_key)
 def is_remote(src: str) -> bool:
     return bool(src) and src.startswith(("http://", "https://"))
 def ext_from_src(src: str) -> str:
+    if not src: return ""
+    _, ext = os.path.splitext((src or "").split("?")[0])
+    return ext.lower()
 def safe_head(url: str, timeout: int = 6):
     try:
     except Exception:
         return None
+def safe_get(url: str, timeout: int = 15):
+    r = requests.get(url, timeout=timeout)
+    r.raise_for_status()
+    return r
 def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int = 60, progress=None) -> bytes:
+    if progress is not None:
+        progress(0.05, desc="Checking remote/local source...")
     if is_remote(src):
         head = safe_head(src)
         if head is not None:
             cl = head.headers.get("content-length")
+            try:
+                if cl and int(cl) > stream_threshold:
+                    if progress is not None:
+                        progress(0.1, desc="Streaming large remote file...")
+                    with requests.get(src, timeout=timeout, stream=True) as r:
+                        r.raise_for_status()
+                        fd, p = tempfile.mkstemp()
+                        os.close(fd)
+                        try:
+                            with open(p, "wb") as fh:
+                                for chunk in r.iter_content(8192):
+                                    if chunk:
+                                        fh.write(chunk)
+                            with open(p, "rb") as fh:
+                                return fh.read()
+                        finally:
+                            try: os.remove(p)
+                            except Exception: pass
+            except Exception:
+                pass
+        r = safe_get(src, timeout=timeout)
+        if progress is not None:
+            progress(0.25, desc="Downloaded remote content")
+        return r.content
     else:
         if not os.path.exists(src):
             raise FileNotFoundError(f"Local path does not exist: {src}")
+        if progress is not None:
+            progress(0.05, desc="Reading local file...")
         with open(src, "rb") as f:
             data = f.read()
+        if progress is not None:
+            progress(0.15, desc="Read local file")
         return data
 def save_bytes_to_temp(b: bytes, suffix: str) -> str:
 def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
     img = Image.open(BytesIO(img_bytes))
+    try:
+        if getattr(img, "is_animated", False):
+            img.seek(0)
+    except Exception:
+        pass
     if img.mode != "RGB":
         img = img.convert("RGB")
     h = base_h
     img.save(buf, format="JPEG", quality=85)
     return buf.getvalue()
+def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
+    return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
+def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15, progress=None) -> List[bytes]:
+    frames: List[bytes] = []
+    if not FFMPEG_BIN or not os.path.exists(media_path):
+        return frames
+    if progress is not None:
+        progress(0.05, desc="Preparing frame extraction...")
+    timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
+    for i, t in enumerate(timestamps):
+        fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
         os.close(fd)
+        cmd = [
+            FFMPEG_BIN,
+            "-nostdin",
+            "-y",
+            "-ss",
+            str(t),
+            "-i",
+            media_path,
+            "-frames:v",
+            "1",
+            "-q:v",
+            "2",
+            tmp,
+        ]
+        try:
+            if progress is not None:
+                progress(0.1 + (i / max(1, sample_count)) * 0.2, desc=f"Extracting frame {i+1}/{sample_count}...")
+            subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
+            if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
+                with open(tmp, "rb") as f:
+                    frames.append(f.read())
+        except Exception:
+            pass
+        finally:
+            try: os.remove(tmp)
+            except Exception: pass
+    if progress is not None:
+        progress(0.45, desc=f"Extracted {len(frames)} frames")
+    return frames
+def chat_complete(client, model: str, messages, timeout: int = 120, progress=None) -> str:
     try:
+        if progress is not None:
+            progress(0.6, desc="Sending request to model...")
+        if hasattr(client, "chat") and hasattr(client.chat, "complete"):
+            res = client.chat.complete(model=model, messages=messages, stream=False)
+        else:
+            api_key = getattr(client, "api_key", "") or DEFAULT_KEY
+            url = "https://api.mistral.ai/v1/chat/completions"
+            headers = ({"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if api_key else {"Content-Type": "application/json"})
+            r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
+            r.raise_for_status()
+            res = r.json()
+        if progress is not None:
+            progress(0.8, desc="Model responded, parsing...")
+        choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
+        if not choices:
+            return f"Empty response from model: {res}"
+        first = choices[0]
+        msg = (first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first))
+        content = (msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None))
+        return content.strip() if isinstance(content, str) else str(content)
+    except requests.exceptions.RequestException as e:
+        return f"Error: network/API request failed: {e}"
+    except Exception as e:
+        return f"Error during model call: {e}"
+def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120, progress=None) -> str:
+    fname = filename or os.path.basename(path)
     try:
+        if progress is not None:
+            progress(0.5, desc="Uploading file to model service...")
+        if hasattr(client, "files") and hasattr(client.files, "upload"):
+            with open(path, "rb") as fh:
+                res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
+            fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
+            if not fid:
+                fid = res["data"][0]["id"]
+            if progress is not None:
+                progress(0.6, desc="Upload complete")
+            return fid
     except Exception:
+        pass
+    api_key = getattr(client, "api_key", "") or DEFAULT_KEY
+    url = "https://api.mistral.ai/v1/files"
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
     try:
+        with open(path, "rb") as fh:
+            files = {"file": (fname, fh)}
+            data = {"purpose": purpose}
+            r = requests.post(url, headers=headers, files=files, data=data, timeout=timeout)
+            r.raise_for_status()
+            jr = r.json()
+            if progress is not None:
+                progress(0.65, desc="Upload complete (REST)")
+            return jr.get("id") or jr.get("data", [{}])[0].get("id")
+    except requests.exceptions.RequestException as e:
+        raise RuntimeError(f"File upload failed: {e}")
 def determine_media_type(src: str, progress=None) -> Tuple[bool, bool]:
+    is_image = False
+    is_video = False
     ext = ext_from_src(src)
+    if ext in IMAGE_EXTS:
         is_image = True
+    if ext in VIDEO_EXTS:
         is_video = True
     if is_remote(src):
         head = safe_head(src)
         if head:
             ctype = (head.headers.get("content-type") or "").lower()
+            if ctype.startswith("image/"):
                 is_image, is_video = True, False
+            elif ctype.startswith("video/"):
                 is_video, is_image = True, False
+    if progress is not None:
+        progress(0.02, desc="Determined media type")
     return is_image, is_video
+def analyze_image_structured(client, img_bytes: bytes, prompt: str, progress=None) -> str:
     try:
+        if progress is not None:
+            progress(0.3, desc="Preparing image for analysis...")
+        jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
+        data_url = b64_bytes(jpeg, mime="image/jpeg")
+        messages = [
+            {"role": "system", "content": SYSTEM_INSTRUCTION},
+            {"role": "user", "content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": data_url},
+            ]},
+        ]
+        return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
+    except UnidentifiedImageError:
+        return "Error: provided file is not a valid image."
+    except Exception as e:
+        return f"Error analyzing image: {e}"
+def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None) -> str:
+    try:
+        if progress is not None:
+            progress(0.3, desc="Uploading video for full analysis...")
+        file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path), progress=progress)
+        extra_msg = (
+            f"Uploaded video file id: {file_id}\n\n"
+            "Instruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
+        )
+        messages = [
+            {"role": "system", "content": SYSTEM_INSTRUCTION},
+            {"role": "user", "content": extra_msg + "\n\n" + prompt},
+        ]
+        return chat_complete(client, VIDEO_MODEL, messages, progress=progress)
+    except Exception as e:
+        if progress is not None:
+            progress(0.35, desc="Upload failed, extracting frames as fallback...")
+        frames = extract_best_frames_bytes(video_path, sample_count=6, progress=progress)
+        if not frames:
+            return f"Error: could not upload video and no frames could be extracted. ({e})"
+        image_entries = []
+        for i, fb in enumerate(frames, start=1):
             try:
+                if progress is not None:
+                    progress(0.4 + (i / len(frames)) * 0.2, desc=f"Preparing frame {i}/{len(frames)}...")
+                j = convert_to_jpeg_bytes(fb, base_h=720)
+                image_entries.append(
+                    {
+                        "type": "image_url",
+                        "image_url": b64_bytes(j, mime="image/jpeg"),
+                        "meta": {"frame_index": i},
+                    }
+                )
             except Exception:
+                continue
+        content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
+        messages = [
+            {"role": "system", "content": SYSTEM_INSTRUCTION},
+            {"role": "user", "content": content},
+        ]
+        return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
+def process_media(src: str, custom_prompt: str, api_key: str, progress=None) -> str:
+    client = get_client(api_key)
+    prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
+    if not src:
+        return "Error: No URL or path provided."
+    if progress is not None:
+        progress(0.01, desc="Starting media processing")
+    try:
+        is_image, is_video = determine_media_type(src, progress=progress)
     except Exception as e:
+        return f"Error determining media type: {e}"
+    if is_image:
+        try:
+            if progress is not None:
+                progress(0.05, desc="Fetching image bytes...")
+            raw = fetch_bytes(src, progress=progress)
+        except FileNotFoundError as e:
+            return f"Error: {e}"
+        except Exception as e:
+            return f"Error fetching image: {e}"
+        if progress is not None:
+            progress(0.2, desc="Analyzing image")
+        try:
+            return analyze_image_structured(client, raw, prompt, progress=progress)
+        except UnidentifiedImageError:
+            return "Error: provided file is not a valid image."
+        except Exception as e:
+            return f"Error analyzing image: {e}"
+    if is_video:
+        try:
+            if progress is not None:
+                progress(0.05, desc="Fetching video bytes...")
+            raw = fetch_bytes(src, timeout=120, progress=progress)
+        except FileNotFoundError as e:
+            return f"Error: {e}"
+        except Exception as e:
+            return f"Error fetching video: {e}"
+        tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
+        try:
+            if progress is not None:
+                progress(0.2, desc="Analyzing video")
+            return analyze_video_cohesive(client, tmp_path, prompt, progress=progress)
+        finally:
+            try:
+                os.remove(tmp_path)
+            except Exception:
+                pass
+    try:
+        if progress is not None:
+            progress(0.05, desc="Treating input as image fallback...")
+        raw = fetch_bytes(src, progress=progress)
+        if progress is not None:
+            progress(0.2, desc="Analyzing fallback image")
+        return analyze_image_structured(client, raw, prompt, progress=progress)
+    except Exception as e:
+        return f"Unable to determine media type or fetch file: {e}"
+def _ensure_event_loop_for_thread():
+    """
+    Ensure the current thread has an asyncio event loop. Used when running blocking
+    functions in a worker thread that need to run coroutines or use asyncio.get_event_loop().
+    """
+    try:
+        asyncio.get_event_loop()
+    except RuntimeError:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+def run_blocking_in_thread(fn, *args, **kwargs):
+    """
+    Run a blocking function in a thread but ensure the thread has an event loop.
+    Returns concurrent.futures.Future; caller may call .result().
+    """
+    def target():
+        _ensure_event_loop_for_thread()
+        return fn(*args, **kwargs)
+    import concurrent.futures
+    executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)
+    return executor.submit(target)
+css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
+def _btn_label_for_status(status: str) -> str:
+    return {"idle": "Submit", "busy": "Processing…", "done": "Submit", "error": "Retry"}.get(status, "Submit")
 def create_demo():
+    with gr.Blocks(title="Flux Multimodal", css=css) as demo:
         with gr.Row():
             with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="filepath", elem_classes="preview_media", visible=False)
                 progress_md = gr.Markdown("Idle")
                 output_md = gr.Markdown("")
                 status_state = gr.State("idle")
+                # hidden state to pass preview path from worker to frontend
                 preview_path_state = gr.State("")
+        # small helper: fetch URL into bytes with retries and respect Retry-After
+        def _fetch_with_retries_bytes(src: str, timeout: int = 15, max_retries: int = 3):
+            attempt = 0
+            delay = 1.0
+            while True:
+                attempt += 1
+                try:
+                    if is_remote(src):
+                        r = requests.get(src, timeout=timeout, stream=True)
+                        if r.status_code == 200:
+                            return r.content
+                        if r.status_code == 429:
+                            ra = r.headers.get("Retry-After")
+                            try:
+                                delay = float(ra) if ra is not None else delay
+                            except Exception:
+                                pass
+                        r.raise_for_status()
+                    else:
+                        with open(src, "rb") as fh:
+                            return fh.read()
+                except requests.exceptions.RequestException:
+                    if attempt >= max_retries:
+                        raise
+                    time.sleep(delay)
+                    delay *= 2
+                except FileNotFoundError:
+                    raise
+                except Exception:
+                    if attempt >= max_retries:
+                        raise
+                    time.sleep(delay)
+                    delay *= 2
+        # create a local temp file for a remote URL and return local path (or None)
+        def _save_preview_local(src: str) -> Optional[str]:
+            if not src:
+                return None
+            if not is_remote(src):
+                return src if os.path.exists(src) else None
+            try:
+                b = _fetch_with_retries_bytes(src, timeout=15, max_retries=3)
+                ext = ext_from_src(src) or ".bin"
+                fd, tmp = tempfile.mkstemp(suffix=ext)
+                os.close(fd)
+                with open(tmp, "wb") as fh:
+                    fh.write(b)
+                return tmp
+            except Exception:
+                return None
+        def load_preview(url: str):
+            # returns (preview_image_path, preview_video_path, status_msg)
+            if not url:
+                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="")
+            try:
+                if is_remote(url):
+                    head = safe_head(url)
+                    if head:
+                        ctype = (head.headers.get("content-type") or "").lower()
+                        if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
+                            local = _save_preview_local(url)
+                            if local:
+                                return gr.update(value=None, visible=False), gr.update(value=local, visible=True), gr.update(value=f"Remote video detected (content-type={ctype}). Showing preview if browser-playable.")
+                            else:
+                                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Remote video detected but preview download failed (content-type={ctype}).")
+                local = _save_preview_local(url)
+                if not local:
+                    return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Preview load failed: could not fetch resource.")
+                try:
+                    img = Image.open(local)
+                    if getattr(img, "is_animated", False):
+                        img.seek(0)
+                    return gr.update(value=local, visible=True), gr.update(value=None, visible=False), gr.update(value="Image preview loaded.")
+                except UnidentifiedImageError:
+                    if any(local.lower().endswith(ext) for ext in VIDEO_EXTS) or True:
+                        return gr.update(value=None, visible=False), gr.update(value=local, visible=True), gr.update(value="Non-image file — showing as video preview if playable.")
+                    return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Preview load failed: file is not a valid image.")
+            except Exception as e:
+                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Preview load failed: {e}")
         url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, preview_status])
         def clear_all():
             return "", None, None, "idle", "Idle", "", ""
         clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, progress_md, output_md, preview_path_state])
+        def _convert_video_for_preview(path: str) -> str:
+            if not FFMPEG_BIN or not os.path.exists(FFMPEG_BIN):
+                return path
+            out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
+            os.close(out_fd)
+            cmd = [
+                FFMPEG_BIN, "-nostdin", "-y", "-i", path,
+                "-c:v", "libx264", "-preset", "veryfast", "-crf", "28",
+                "-c:a", "aac", "-movflags", "+faststart", out_path
+            ]
+            try:
+                subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60)
+                return out_path
+            except Exception:
+                try: os.remove(out_path)
+                except Exception: pass
+                return path
+        # --- Helper: probe codecs via ffprobe; returns dict with streams info or None on failure
+        def _ffprobe_streams(path: str) -> Optional[dict]:
+            if not FFMPEG_BIN:
+                return None
+            ffprobe = FFMPEG_BIN.replace("ffmpeg", "ffprobe") if "ffmpeg" in FFMPEG_BIN else "ffprobe"
+            if not shutil.which(ffprobe):
+                ffprobe = "ffprobe"
+            cmd = [
+                ffprobe, "-v", "error", "-print_format", "json", "-show_streams", "-show_format", path
+            ]
+            try:
+                out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
+                return json.loads(out)
+            except Exception:
+                return None
+        # --- Helper: is file already browser-playable (mp4 container with h264 video and aac audio OR at least playable video)
+        def _is_browser_playable(path: str) -> bool:
+            try:
+                ext = (path or "").lower().split("?")[0]
+                if any(ext.endswith(e) for e in [".mp4", ".m4v", ".mov"]):
+                    info = _ffprobe_streams(path)
+                    if not info:
+                        # fallback: trust .mp4 if probe failed
+                        return ext.endswith(".mp4")
+                    streams = info.get("streams", [])
+                    v_ok = any(
+                        s.get("codec_name") in ("h264", "h265", "avc1") and s.get("codec_type") == "video"
+                        for s in streams
+                    )
+                    # audio optional for preview
+                    return bool(v_ok)
+                # other extensions: probe and accept if any video stream present
+                info = _ffprobe_streams(path)
+                if not info:
+                    return False
+                streams = info.get("streams", [])
+                return any(s.get("codec_type") == "video" for s in streams)
+            except Exception:
+                return False
+        # --- Convert only if not browser-playable
+        def _convert_video_for_preview_if_needed(path: str) -> str:
+            try:
+                if _is_browser_playable(path):
+                    return path
+            except Exception:
+                pass
+            return _convert_video_for_preview(path)
+        # Worker now returns (status_state, output_md, preview_path_state)
+        def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
+            try:
+                if not url:
+                    return ("error", "**Error:** No URL provided.", "")
+                progress(0.01, desc="Starting processing...")
+                progress(0.03, desc="Checking URL / content-type...")
+                is_img, is_vid = determine_media_type(url, progress=progress)
+                progress(0.06, desc=f"Determined media type: image={is_img}, video={is_vid}")
+                client = get_client(key)
+                preview_local = None
+                if is_vid:
+                    progress(0.08, desc="Fetching video bytes (may take a while)...")
+                    raw = fetch_bytes(url, timeout=120, progress=progress)
+                    tmp = save_bytes_to_temp(raw, suffix=ext_from_src(url) or ".mp4")
+                    progress(0.18, desc="Saved video to temp; converting for preview if needed...")
+                    preview_tmp = _convert_video_for_preview(tmp)
+                    preview_local = preview_tmp if os.path.exists(preview_tmp) else tmp
+                    progress(0.25, desc="Starting video analysis...")
+                    res = analyze_video_cohesive(client, tmp, prompt or "", progress=progress)
+                    progress(0.98, desc="Finalizing result...")
+                    try:
+                        if preview_tmp != tmp and os.path.exists(preview_tmp):
+                            pass
+                    finally:
+                        try: os.remove(tmp)
+                        except Exception: pass
+                    status = "done" if not (isinstance(res, str) and res.lower().startswith("error")) else "error"
+                    return (status, res if isinstance(res, str) else str(res), preview_local or "")
+                elif is_img:
+                    progress(0.08, desc="Fetching image bytes...")
+                    raw = fetch_bytes(url, progress=progress)
+                    try:
+                        preview_fd, preview_path = tempfile.mkstemp(suffix=".jpg")
+                        os.close(preview_fd)
+                        with open(preview_path, "wb") as fh:
+                            fh.write(convert_to_jpeg_bytes(raw, base_h=1024))
+                        preview_local = preview_path
+                    except Exception:
+                        preview_local = None
+                    progress(0.18, desc="Analyzing image...")
+                    try:
+                        res = analyze_image_structured(client, raw, prompt or "", progress=progress)
+                    except UnidentifiedImageError:
+                        return ("error", "Error: provided file is not a valid image.", preview_local or "")
+                    progress(0.98, desc="Finalizing result...")
+                    status = "done" if not (isinstance(res, str) and res.lower().startswith("error")) else "error"
+                    return (status, res if isinstance(res, str) else str(res), preview_local or "")
+                else:
+                    progress(0.07, desc="Unknown media type — fetching bytes for heuristics...")
+                    raw = fetch_bytes(url, timeout=120, progress=progress)
+                    try:
+                        progress(0.15, desc="Attempting to interpret as image...")
+                        Image.open(BytesIO(raw))
+                        progress(0.2, desc="Image detected — analyzing...")
+                        res = analyze_image_structured(client, raw, prompt or "", progress=progress)
+                        status = "done" if not (isinstance(res, str) and res.lower().startswith("error")) else "error"
+                        try:
+                            preview_fd, preview_path = tempfile.mkstemp(suffix=".jpg")
+                            os.close(preview_fd)
+                            with open(preview_path, "wb") as fh:
+                                fh.write(convert_to_jpeg_bytes(raw, base_h=1024))
+                            preview_local = preview_path
+                        except Exception:
+                            preview_local = None
+                        return (status, res if isinstance(res, str) else str(res), preview_local or "")
+                    except Exception:
+                        fd, tmp = tempfile.mkstemp(suffix=ext_from_src(url) or ".mp4")
+                        os.close(fd)
+                        with open(tmp, "wb") as fh:
+                            fh.write(raw)
+                        try:
+                            progress(0.3, desc="Saved fallback video file; analyzing...")
+                            preview_tmp = _convert_video_for_preview(tmp)
+                            preview_local = preview_tmp if os.path.exists(preview_tmp) else tmp
+                            res = analyze_video_cohesive(client, tmp, prompt or "", progress=progress)
+                            status = "done" if not (isinstance(res, str) and res.lower().startswith("error")) else "error"
+                            return (status, res if isinstance(res, str) else str(res), preview_local or "")
+                        finally:
+                            try: os.remove(tmp)
+                            except Exception: pass
+            except Exception as e:
+                return ("error", f"Unexpected worker error: {e}", "")
+        # immediate UI flip to "busy" so user sees work started
         submit_btn.click(fn=lambda: "busy", inputs=[], outputs=[status_state])
+        # actual heavy work runs in the queue and shows progress (attach to progress_md)
+        submit_btn.click(
+            fn=worker,
+            inputs=[url_input, custom_prompt, api_key],
+            outputs=[status_state, output_md, preview_path_state],
+            queue=True,
+            show_progress="full",
+            show_progress_on=progress_md,
+        )
+        # update submit button label from status
+        def btn_label_from_state(s):
+            return _btn_label_for_status(s)
         status_state.change(fn=btn_label_from_state, inputs=[status_state], outputs=[submit_btn])
+        # map status to progress text
+        def status_to_progress_text(s):
+            return {"idle":"Idle","busy":"Processing…","done":"Completed","error":"Error — see output"}.get(s, s)
         status_state.change(fn=status_to_progress_text, inputs=[status_state], outputs=[progress_md])
+        # when preview_path_state changes, update preview components appropriately
+        def apply_preview(path: str):
+            if not path:
+                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), ""
+            try:
+                if any(path.lower().endswith(ext) for ext in IMAGE_EXTS):
+                    return gr.update(value=path, visible=True), gr.update(value=None, visible=False), "Preview updated."
+                if any(path.lower().endswith(ext) for ext in VIDEO_EXTS):
+                    return gr.update(value=None, visible=False), gr.update(value=path, visible=True), "Preview updated."
+                try:
+                    Image.open(path)
+                    return gr.update(value=path, visible=True), gr.update(value=None, visible=False), "Preview updated."
+                except Exception:
+                    return gr.update(value=None, visible=False), gr.update(value=path, visible=True), "Preview updated."
+            except Exception:
+                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), ""
         preview_path_state.change(fn=apply_preview, inputs=[preview_path_state], outputs=[preview_image, preview_video, preview_status])
+    # ensure global queue behavior
     demo.queue()
     return demo