Image-To-Flux-Prompt

Running

App Files Files Community

Hug0endob commited on Dec 14, 2025

Commit

3c2126a

verified ·

1 Parent(s): e6159f2

Update app.py

Browse files

Files changed (1) hide show

app.py +207 -110

app.py CHANGED Viewed

@@ -1,20 +1,38 @@
 #!/usr/bin/env python3
 import os
 import subprocess
 import tempfile
-import shutil
 import base64
 import requests
 from io import BytesIO
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
-from mistralai import Mistral
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
 STREAM_THRESHOLD = 20 * 1024 * 1024
 FFMPEG_BIN = shutil.which("ffmpeg")
 SYSTEM_INSTRUCTION = (
     "You are a clinical visual analyst. Only analyze media actually provided (image data or extracted frames). "
@@ -23,51 +41,75 @@ SYSTEM_INSTRUCTION = (
     "Do not invent sensory information not present in the media."
 )
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 Image.MAX_IMAGE_PIXELS = 10000 * 10000
-IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
-VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
-def get_client(key: str = None) -> Mistral:
     api_key = (key or "").strip() or DEFAULT_KEY
     return Mistral(api_key=api_key)
 def is_remote(src: str) -> bool:
     return bool(src) and src.startswith(("http://", "https://"))
 def ext_from_src(src: str) -> str:
     _, ext = os.path.splitext((src or "").split("?")[0])
     return ext.lower()
-def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> bytes:
     if is_remote(src):
-        try:
-            h = requests.head(src, timeout=6, allow_redirects=True)
-            h.raise_for_status()
-            cl = h.headers.get("content-length")
-            if cl and int(cl) > stream_threshold:
-                with requests.get(src, timeout=timeout, stream=True) as r:
-                    r.raise_for_status()
-                    fd, path = tempfile.mkstemp()
-                    os.close(fd)
-                    try:
-                        with open(path, "wb") as f:
-                            for chunk in r.iter_content(8192):
-                                if chunk:
-                                    f.write(chunk)
-                        with open(path, "rb") as f:
-                            return f.read()
-                    finally:
-                        try: os.remove(path)
-                        except Exception: pass
-        except Exception:
-            pass
-        with requests.get(src, timeout=timeout) as r:
-            r.raise_for_status()
-            return r.content
-    with open(src, "rb") as f:
-        return f.read()
 def save_bytes_to_temp(b: bytes, suffix: str) -> str:
     fd, path = tempfile.mkstemp(suffix=suffix)
@@ -86,6 +128,7 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
     if img.mode != "RGB":
         img = img.convert("RGB")
     h = base_h
     w = max(1, int(img.width * (h / img.height)))
     img = img.resize((w, h), Image.LANCZOS)
     buf = BytesIO()
@@ -95,11 +138,11 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
 def b64_jpeg(img_bytes: bytes) -> str:
     return base64.b64encode(img_bytes).decode("utf-8")
-def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> list:
     if not FFMPEG_BIN or not os.path.exists(media_path):
-        return []
     timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
-    frames = []
     for i, t in enumerate(timestamps):
         fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
         os.close(fd)
@@ -109,37 +152,19 @@ def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_ex
             if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
                 with open(tmp, "rb") as f:
                     frames.append(f.read())
         finally:
             try: os.remove(tmp)
             except Exception: pass
     return frames
-def upload_file_to_mistral(client: Mistral, path: str, filename: str | None = None, purpose: str = "batch") -> str:
-    fname = filename or os.path.basename(path)
-    try:
-        with open(path, "rb") as fh:
-            res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
-        fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
-        if not fid:
-            fid = res["data"][0]["id"]
-        return fid
-    except Exception:
-        api_key = client.api_key if hasattr(client, "api_key") else os.getenv("MISTRAL_API_KEY", "")
-        url = "https://api.mistral.ai/v1/files"
-        headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
-        with open(path, "rb") as fh:
-            files = {"file": (fname, fh)}
-            data = {"purpose": purpose}
-            r = requests.post(url, headers=headers, files=files, data=data, timeout=120)
-            r.raise_for_status()
-            jr = r.json()
-            return jr.get("id") or jr.get("data", [{}])[0].get("id")
-def build_messages_for_image(prompt: str, b64_jpg: str) -> list:
     content = f"{prompt}\n\nImage (base64): data:image/jpeg;base64,{b64_jpg}"
     return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
-def build_messages_for_text(prompt: str, extra: str) -> list:
     return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": f"{prompt}\n\n{extra}"}]
 def extract_text_from_response(res, parts: list):
@@ -164,19 +189,58 @@ def extract_text_from_response(res, parts: list):
     except Exception:
         parts.append(str(res))
-def chat_complete(client: Mistral, model: str, messages: list) -> str:
     parts = []
-    res = client.chat.complete(model=model, messages=messages, stream=False)
-    extract_text_from_response(res, parts)
     return "".join(parts).strip()
-def analyze_image(client: Mistral, img_bytes: bytes, prompt: str) -> str:
     jpeg = convert_to_jpeg_bytes(img_bytes, base_h=640)
     b64 = b64_jpeg(jpeg)
     msgs = build_messages_for_image(prompt, b64)
     return chat_complete(client, PIXTRAL_MODEL, msgs)
-def analyze_frames_and_consolidate(client: Mistral, frames: list, prompt: str) -> str:
     per_frame = []
     for i, fb in enumerate(frames):
         txt = analyze_image(client, fb, f"{prompt}\n\nFrame index: {i + 1}")
@@ -191,6 +255,29 @@ def analyze_frames_and_consolidate(client: Mistral, frames: list, prompt: str) -
     summary = chat_complete(client, PIXTRAL_MODEL, msgs)
     return "\n\n".join(per_frame + [f"Consolidated summary:\n{summary}"])
 def process_media(src: str, custom_prompt: str, api_key: str) -> str:
     client = get_client(api_key)
     prompt = custom_prompt.strip() or "Please provide a detailed visual review."
@@ -198,16 +285,9 @@ def process_media(src: str, custom_prompt: str, api_key: str) -> str:
     is_image = ext in IMAGE_EXTS
     is_video = ext in VIDEO_EXTS
     if is_remote(src):
-        try:
-            h = requests.head(src, timeout=6, allow_redirects=True)
-            h.raise_for_status()
-            ctype = (h.headers.get("content-type") or "").lower()
-            if ctype.startswith("video/"):
-                is_video = True; is_image = False
-            elif ctype.startswith("image/"):
-                is_image = True; is_video = False
-        except Exception:
-            pass
     if is_image:
         try:
             raw = fetch_bytes(src)
@@ -224,75 +304,92 @@ def process_media(src: str, custom_prompt: str, api_key: str) -> str:
             raw = fetch_bytes(src, timeout=120)
         except Exception as e:
             return f"Error fetching video: {e}"
-        tmp_path = save_bytes_to_temp(raw, suffix=ext or ".mp4")
         try:
             try:
                 file_id = upload_file_to_mistral(client, tmp_path, filename=os.path.basename(src.split("?")[0]))
                 extra = f"Uploaded video to Mistral Files with id: {file_id}\n\nInstruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
                 msgs = build_messages_for_text(prompt, extra)
                 return chat_complete(client, VIDEO_MODEL, msgs)
             except Exception:
                 frames = extract_best_frames_bytes(tmp_path, sample_count=5)
                 if not frames:
-                    return "Error: could not upload remote video and no frames extracted."
                 return analyze_frames_and_consolidate(client, frames, prompt)
         finally:
             try: os.remove(tmp_path)
             except Exception: pass
     return "Unable to determine media type from the provided URL or file extension."
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; }"
 def load_preview(url: str):
     if not url:
-        return gr.update(value=None, visible=False), gr.update(value=None, visible=False)
     if not is_remote(url) and os.path.exists(url):
         ext = ext_from_src(url)
         if ext in VIDEO_EXTS:
-            return gr.update(value=None, visible=False), gr.update(value=os.path.abspath(url), visible=True)
         if ext in IMAGE_EXTS:
             try:
                 img = Image.open(url)
                 if getattr(img, "is_animated", False):
                     img.seek(0)
-                return gr.update(value=img.convert("RGB"), visible=True), gr.update(value=None, visible=False)
             except Exception:
-                return gr.update(value=None, visible=False), gr.update(value=None, visible=False)
     try:
-        h = requests.head(url, timeout=6, allow_redirects=True)
-        if h.ok:
-            ctype = (h.headers.get("content-type") or "").lower()
-            if ctype.startswith("video/") or any(url.lower().split("?")[0].endswith(ext) for ext in VIDEO_EXTS):
-                return gr.update(value=None, visible=False), gr.update(value=url, visible=True)
-    except Exception:
-        pass
-    try:
-        r = requests.get(url, timeout=15)
-        r.raise_for_status()
         img = Image.open(BytesIO(r.content))
         if getattr(img, "is_animated", False):
             img.seek(0)
-        return gr.update(value=img.convert("RGB"), visible=True), gr.update(value=None, visible=False)
     except Exception:
-        return gr.update(value=None, visible=False), gr.update(value=None, visible=False)
-with gr.Blocks(title="Flux Multimodal (fixed)", css=css) as demo:
-    with gr.Row():
-        with gr.Column(scale=1):
-            url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
-            custom_prompt = gr.Textbox(label="Prompt (optional)", lines=2, value="")
-            with gr.Accordion("Mistral API Key (optional)", open=False):
-                api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
-            submit_btn = gr.Button("Submit")
-            preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
-            preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
-        with gr.Column(scale=2):
-            final_md = gr.Markdown(value="")
-    url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
-    def submit_wrapper(url, prompt, key):
-        return process_media(url, prompt, key)
-    submit_btn.click(fn=submit_wrapper, inputs=[url_input, custom_prompt, api_key], outputs=[final_md])
 if __name__ == "__main__":
     demo.queue().launch()

 #!/usr/bin/env python3
+"""
+flux_multimodal_fixed.py
+Streamlined Gradio app to preview an image/video from URL or local path,
+send media (or extracted frames) to Mistral API for analysis using the
+default SYSTEM_INSTRUCTION prompt (unless user supplies one).
+"""
+from __future__ import annotations
 import os
+import shutil
 import subprocess
 import tempfile
 import base64
 import requests
 from io import BytesIO
+from typing import List, Tuple
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
+# Import Mistral client in the same way original code did.
+# If you have a different client interface, adjust get_client/upload_file_to_mistral accordingly.
+try:
+    from mistralai import Mistral
+except Exception:
+    Mistral = None  # Fallback; upload will use raw HTTP if needed
+# --- Configuration / constants ---
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
 STREAM_THRESHOLD = 20 * 1024 * 1024
 FFMPEG_BIN = shutil.which("ffmpeg")
+IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
+VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
 SYSTEM_INSTRUCTION = (
     "You are a clinical visual analyst. Only analyze media actually provided (image data or extracted frames). "
     "Do not invent sensory information not present in the media."
 )
+# Pillow config
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 Image.MAX_IMAGE_PIXELS = 10000 * 10000
+# --- Utilities ---
+def get_client(key: str | None = None):
     api_key = (key or "").strip() or DEFAULT_KEY
+    if Mistral is None:
+        # If mistralai package is not available, return a thin object with api_key attr for upload fallback.
+        class Dummy:
+            def __init__(self, k): self.api_key = k
+        return Dummy(api_key)
     return Mistral(api_key=api_key)
 def is_remote(src: str) -> bool:
     return bool(src) and src.startswith(("http://", "https://"))
 def ext_from_src(src: str) -> str:
+    if not src:
+        return ""
     _, ext = os.path.splitext((src or "").split("?")[0])
     return ext.lower()
+def safe_head(url: str, timeout: int = 6) -> requests.Response | None:
+    try:
+        r = requests.head(url, timeout=timeout, allow_redirects=True)
+        if r.status_code >= 400:
+            return None
+        return r
+    except Exception:
+        return None
+def safe_get(url: str, timeout: int = 15) -> requests.Response:
+    r = requests.get(url, timeout=timeout)
+    r.raise_for_status()
+    return r
+def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int = 60) -> bytes:
     if is_remote(src):
+        # try HEAD to learn content-length
+        head = safe_head(src)
+        if head is not None:
+            cl = head.headers.get("content-length")
+            try:
+                if cl and int(cl) > stream_threshold:
+                    # stream download to temp file to avoid memory spike
+                    with requests.get(src, timeout=timeout, stream=True) as r:
+                        r.raise_for_status()
+                        fd, p = tempfile.mkstemp()
+                        os.close(fd)
+                        try:
+                            with open(p, "wb") as fh:
+                                for chunk in r.iter_content(8192):
+                                    if chunk:
+                                        fh.write(chunk)
+                            with open(p, "rb") as fh:
+                                return fh.read()
+                        finally:
+                            try: os.remove(p)
+                            except Exception: pass
+            except Exception:
+                # fallthrough to simple get
+                pass
+        # regular GET
+        r = safe_get(src, timeout=timeout)
+        return r.content
+    else:
+        with open(src, "rb") as f:
+            return f.read()
 def save_bytes_to_temp(b: bytes, suffix: str) -> str:
     fd, path = tempfile.mkstemp(suffix=suffix)
     if img.mode != "RGB":
         img = img.convert("RGB")
     h = base_h
+    # maintain aspect
     w = max(1, int(img.width * (h / img.height)))
     img = img.resize((w, h), Image.LANCZOS)
     buf = BytesIO()
 def b64_jpeg(img_bytes: bytes) -> str:
     return base64.b64encode(img_bytes).decode("utf-8")
+def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
+    frames = []
     if not FFMPEG_BIN or not os.path.exists(media_path):
+        return frames
     timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
     for i, t in enumerate(timestamps):
         fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
         os.close(fd)
             if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
                 with open(tmp, "rb") as f:
                     frames.append(f.read())
+        except Exception:
+            pass
         finally:
             try: os.remove(tmp)
             except Exception: pass
     return frames
+# --- Mistral interaction helpers ---
+def build_messages_for_image(prompt: str, b64_jpg: str):
     content = f"{prompt}\n\nImage (base64): data:image/jpeg;base64,{b64_jpg}"
     return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
+def build_messages_for_text(prompt: str, extra: str):
     return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": f"{prompt}\n\n{extra}"}]
 def extract_text_from_response(res, parts: list):
     except Exception:
         parts.append(str(res))
+def chat_complete(client, model: str, messages: list) -> str:
+    # Prefer client.chat.complete if available; otherwise attempt REST call
     parts = []
+    try:
+        if hasattr(client, "chat") and hasattr(client.chat, "complete"):
+            res = client.chat.complete(model=model, messages=messages, stream=False)
+        else:
+            # Try basic HTTP request (Mistral REST)
+            api_key = getattr(client, "api_key", "") or DEFAULT_KEY
+            url = f"https://api.mistral.ai/v1/chat/completions"
+            headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+            payload = {"model": model, "messages": messages}
+            r = requests.post(url, json=payload, headers=headers, timeout=120)
+            r.raise_for_status()
+            res = r.json()
+        extract_text_from_response(res, parts)
+    except Exception as e:
+        parts.append(f"Error during model call: {e}")
     return "".join(parts).strip()
+def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch") -> str:
+    fname = filename or os.path.basename(path)
+    # Prefer SDK upload if available
+    try:
+        if hasattr(client, "files") and hasattr(client.files, "upload"):
+            with open(path, "rb") as fh:
+                res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
+            fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
+            if not fid:
+                fid = res["data"][0]["id"]
+            return fid
+    except Exception:
+        pass
+    # Fallback to HTTP upload
+    api_key = getattr(client, "api_key", "") or DEFAULT_KEY
+    url = "https://api.mistral.ai/v1/files"
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
+    with open(path, "rb") as fh:
+        files = {"file": (fname, fh)}
+        data = {"purpose": purpose}
+        r = requests.post(url, headers=headers, files=files, data=data, timeout=120)
+        r.raise_for_status()
+        jr = r.json()
+        return jr.get("id") or jr.get("data", [{}])[0].get("id")
+def analyze_image(client, img_bytes: bytes, prompt: str) -> str:
     jpeg = convert_to_jpeg_bytes(img_bytes, base_h=640)
     b64 = b64_jpeg(jpeg)
     msgs = build_messages_for_image(prompt, b64)
     return chat_complete(client, PIXTRAL_MODEL, msgs)
+def analyze_frames_and_consolidate(client, frames: List[bytes], prompt: str) -> str:
     per_frame = []
     for i, fb in enumerate(frames):
         txt = analyze_image(client, fb, f"{prompt}\n\nFrame index: {i + 1}")
     summary = chat_complete(client, PIXTRAL_MODEL, msgs)
     return "\n\n".join(per_frame + [f"Consolidated summary:\n{summary}"])
+# --- Core processing ---
+def determine_media_type_from_remote(url: str) -> Tuple[bool, bool]:
+    """
+    Returns (is_image, is_video) based on HEAD content-type or URL extension
+    """
+    is_image = False
+    is_video = False
+    if not url:
+        return is_image, is_video
+    ext = ext_from_src(url)
+    if ext in IMAGE_EXTS:
+        is_image = True
+    if ext in VIDEO_EXTS:
+        is_video = True
+    head = safe_head(url)
+    if head is not None:
+        ctype = (head.headers.get("content-type") or "").lower()
+        if ctype.startswith("video/"):
+            is_video = True; is_image = False
+        elif ctype.startswith("image/"):
+            is_image = True; is_video = False
+    return is_image, is_video
 def process_media(src: str, custom_prompt: str, api_key: str) -> str:
     client = get_client(api_key)
     prompt = custom_prompt.strip() or "Please provide a detailed visual review."
     is_image = ext in IMAGE_EXTS
     is_video = ext in VIDEO_EXTS
     if is_remote(src):
+        ri, rv = determine_media_type_from_remote(src)
+        if ri or rv:
+            is_image, is_video = ri, rv
     if is_image:
         try:
             raw = fetch_bytes(src)
             raw = fetch_bytes(src, timeout=120)
         except Exception as e:
             return f"Error fetching video: {e}"
+        tmp_suffix = ext or ".mp4"
+        tmp_path = save_bytes_to_temp(raw, suffix=tmp_suffix)
         try:
+            # Try uploading file to Mistral first
             try:
                 file_id = upload_file_to_mistral(client, tmp_path, filename=os.path.basename(src.split("?")[0]))
                 extra = f"Uploaded video to Mistral Files with id: {file_id}\n\nInstruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
                 msgs = build_messages_for_text(prompt, extra)
                 return chat_complete(client, VIDEO_MODEL, msgs)
             except Exception:
+                # fallback to extracting frames
                 frames = extract_best_frames_bytes(tmp_path, sample_count=5)
                 if not frames:
+                    return "Error: could not upload remote video and no frames extracted (ffmpeg missing or extraction failed)."
                 return analyze_frames_and_consolidate(client, frames, prompt)
         finally:
             try: os.remove(tmp_path)
             except Exception: pass
     return "Unable to determine media type from the provided URL or file extension."
+# --- Gradio app UI helpers ---
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; }"
 def load_preview(url: str):
+    # Returns (preview_image, preview_video) where only one is visible at a time
+    empty_img = gr.update(value=None, visible=False)
+    empty_vid = gr.update(value=None, visible=False)
     if not url:
+        return empty_img, empty_vid
+    # Local file
     if not is_remote(url) and os.path.exists(url):
         ext = ext_from_src(url)
         if ext in VIDEO_EXTS:
+            return empty_img, gr.update(value=os.path.abspath(url), visible=True)
         if ext in IMAGE_EXTS:
             try:
                 img = Image.open(url)
                 if getattr(img, "is_animated", False):
                     img.seek(0)
+                return gr.update(value=img.convert("RGB"), visible=True), empty_vid
             except Exception:
+                return empty_img, empty_vid
+    # Remote: first try HEAD
+    head = safe_head(url)
+    if head:
+        ctype = (head.headers.get("content-type") or "").lower()
+        if ctype.startswith("video/") or any(url.lower().split("?")[0].endswith(ext) for ext in VIDEO_EXTS):
+            return empty_img, gr.update(value=url, visible=True)
+    # Finally try GET and attempt to open as image
     try:
+        r = safe_get(url, timeout=15)
         img = Image.open(BytesIO(r.content))
         if getattr(img, "is_animated", False):
             img.seek(0)
+        return gr.update(value=img.convert("RGB"), visible=True), empty_vid
     except Exception:
+        return empty_img, empty_vid
+# --- Gradio app layout ---
+def create_app():
+    with gr.Blocks(title="Flux Multimodal (fixed)", css=css) as demo:
+        with gr.Row():
+            with gr.Column(scale=1):
+                url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
+                custom_prompt = gr.Textbox(label="Prompt (optional)", lines=2, value="")
+                with gr.Accordion("Mistral API Key (optional)", open=False):
+                    api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
+                submit_btn = gr.Button("Submit")
+                preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
+                preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
+            with gr.Column(scale=2):
+                final_md = gr.Markdown(value="")
+        # Update preview on change
+        url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
+        def submit_wrapper(url, prompt, key):
+            try:
+                return process_media(url or "", prompt or "", key or "")
+            except Exception as e:
+                return f"Unhandled error: {e}"
+        submit_btn.click(fn=submit_wrapper, inputs=[url_input, custom_prompt, api_key], outputs=[final_md])
+    return demo
 if __name__ == "__main__":
+    demo = create_app()
     demo.queue().launch()