Image-To-Flux-Prompt

Running

App Files Files Community

Hug0endob commited on Dec 18, 2025

Commit

1a185c8

verified ·

1 Parent(s): 1c90917

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -72

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os, shutil, subprocess, tempfile, base64, json
 from io import BytesIO
 from typing import List, Tuple
 import requests
-from PIL import Image, ImageFile, UnidentifiedImageError, ImageSequence
 import gradio as gr
 # --- Config
@@ -87,6 +87,26 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
     else:
         with open(src, "rb") as f: return f.read()
 def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
     frames: List[bytes] = []
     if not FFMPEG_BIN or not os.path.exists(media_path): return frames
@@ -150,50 +170,15 @@ def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
                 {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": data_url}]}]
     return chat_complete(client, PIXTRAL_MODEL, messages)
-def ffmpeg_make_browser_mp4(input_path: str, output_path: str, max_width: int = 1280, crf: int = 28, preset: str = "fast", timeout: int = 60) -> bool:
-    """
-    Re-encode to H.264/AAC and move moov atom to front for browser playback.
-    Returns True on success.
-    """
-    if not FFMPEG_BIN:
-        return False
-    cmd = [
-        FFMPEG_BIN, "-nostdin", "-y", "-i", input_path,
-        "-vf", f"scale='min({max_width},iw)':-2",
-        "-c:v", "libx264", "-crf", str(crf), "-preset", preset,
-        "-c:a", "aac", "-b:a", "128k",
-        "-movflags", "+faststart",
-        output_path
-        ]
-    try:
-        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout, check=True)
-        return os.path.exists(output_path) and os.path.getsize(output_path) > 0
-    except Exception:
-        try:
-            if os.path.exists(output_path): os.remove(output_path)
-        except Exception: pass
-        return False
 def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
-    # Try upload first (preferred). If upload fails, try to ensure browser-playable mp4 and fall back to frames.
     try:
         file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
         extra_msg = f"Uploaded video file id: {file_id}\n\nInstruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
         messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": extra_msg + "\n\n" + prompt}]
         return chat_complete(client, VIDEO_MODEL, messages)
     except Exception:
-        pass
-    tmp_fixed = None
-    try:
-        tmp_fd, tmp_fixed = tempfile.mkstemp(suffix=".mp4"); os.close(tmp_fd)
-        ok = ffmpeg_make_browser_mp4(video_path, tmp_fixed, max_width=1280, crf=28, preset="fast", timeout=120)
-        if ok:
-            frames = extract_best_frames_bytes(tmp_fixed, sample_count=6)
-        else:
-            frames = extract_best_frames_bytes(video_path, sample_count=6)
-        if not frames:
-            return "Error: could not upload video and no frames could be extracted."
         image_entries = []
         for i, fb in enumerate(frames, start=1):
             try:
@@ -204,11 +189,6 @@ def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
         content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
         messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
         return chat_complete(client, PIXTRAL_MODEL, messages)
-    finally:
-        try:
-            if tmp_fixed and os.path.exists(tmp_fixed): os.remove(tmp_fixed)
-        except Exception:
-            pass
 def determine_media_type(src: str) -> Tuple[bool, bool]:
     is_image = False; is_video = False
@@ -223,30 +203,6 @@ def determine_media_type(src: str) -> Tuple[bool, bool]:
             elif ctype.startswith("video/"): is_video, is_image = True, False
     return is_image, is_video
-def save_bytes_to_temp(data: bytes, suffix: str = ".dat") -> str:
-    fd, path = tempfile.mkstemp(suffix=suffix)
-    os.close(fd)
-    with open(path, "wb") as fh:
-        fh.write(data)
-    return path
-def convert_to_jpeg_bytes(data: bytes, base_h: int = 1024) -> bytes:
-    buf = BytesIO(data)
-    img = Image.open(buf)
-    if getattr(img, "is_animated", False):
-        img = next(ImageSequence.Iterator(img))
-    img = img.convert("RGB")
-    w, h = img.size
-    if h > base_h:
-        new_w = int(w * (base_h / h))
-        img = img.resize((new_w, base_h), Image.LANCZOS)
-    out = BytesIO()
-    img.save(out, format="JPEG", quality=90, optimize=True)
-    return out.getvalue()
-def b64_bytes(data: bytes, mime: str = "image/jpeg") -> str:
-    return "data:" + mime + ";base64," + base64.b64encode(data).decode("ascii")
 def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
     client = get_client(api_key)
     prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
@@ -284,7 +240,7 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progre
     except Exception as e:
         return f"Unable to determine media type or fetch file: {e}"
-# --- Gradio UI (modified: removed PiP, keep preview left, Submit+Clear on same row)
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 def _btn_label_for_status(status: str) -> str:
@@ -296,15 +252,87 @@ def create_demo():
             with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
                 preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
             with gr.Column(scale=2):
                 url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
                 with gr.Accordion("Prompt (optional)", open=False):
                     custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
                 with gr.Accordion("Mistral API Key (optional)", open=False):
                     api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
-                # Buttons on same row
-                with gr.Row():
-                    submit_btn = gr.Button(_btn_label_for_status("idle"))
-                    clear_btn = gr.Button("Clear")
                 output_md = gr.Markdown("")
                 status_state = gr.State("idle")

 from io import BytesIO
 from typing import List, Tuple
 import requests
+from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
 # --- Config
     else:
         with open(src, "rb") as f: return f.read()
+def save_bytes_to_temp(b: bytes, suffix: str) -> str:
+    fd, path = tempfile.mkstemp(suffix=suffix); os.close(fd)
+    with open(path, "wb") as f: f.write(b)
+    return path
+def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
+    img = Image.open(BytesIO(img_bytes))
+    try:
+        if getattr(img, "is_animated", False): img.seek(0)
+    except Exception: pass
+    if img.mode != "RGB": img = img.convert("RGB")
+    h = base_h
+    w = max(1, int(img.width * (h / img.height)))
+    img = img.resize((w, h), Image.LANCZOS)
+    buf = BytesIO(); img.save(buf, format="JPEG", quality=85)
+    return buf.getvalue()
+def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
+    return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
 def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
     frames: List[bytes] = []
     if not FFMPEG_BIN or not os.path.exists(media_path): return frames
                 {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": data_url}]}]
     return chat_complete(client, PIXTRAL_MODEL, messages)
 def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
     try:
         file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
         extra_msg = f"Uploaded video file id: {file_id}\n\nInstruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
         messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": extra_msg + "\n\n" + prompt}]
         return chat_complete(client, VIDEO_MODEL, messages)
     except Exception:
+        frames = extract_best_frames_bytes(video_path, sample_count=6)
+        if not frames: return "Error: could not upload video and no frames could be extracted."
         image_entries = []
         for i, fb in enumerate(frames, start=1):
             try:
         content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
         messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
         return chat_complete(client, PIXTRAL_MODEL, messages)
 def determine_media_type(src: str) -> Tuple[bool, bool]:
     is_image = False; is_video = False
             elif ctype.startswith("video/"): is_video, is_image = True, False
     return is_image, is_video
 def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
     client = get_client(api_key)
     prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
     except Exception as e:
         return f"Unable to determine media type or fetch file: {e}"
+# --- Gradio UI
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 def _btn_label_for_status(status: str) -> str:
             with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
                 preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
+                pip_button = gr.Button("Open Video in PiP", visible=False)
             with gr.Column(scale=2):
                 url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
                 with gr.Accordion("Prompt (optional)", open=False):
                     custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
                 with gr.Accordion("Mistral API Key (optional)", open=False):
                     api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
+                submit_btn = gr.Button(_btn_label_for_status("idle"))
+                clear_btn = gr.Button("Clear")
                 output_md = gr.Markdown("")
                 status_state = gr.State("idle")
+        pip_html = gr.HTML("""<div id="pip-root" style="display:none"></div>
+<script>
+window.openPiP = (sel) => {
+  try {
+    const v = document.querySelector(sel);
+    if (!v) return "no-video";
+    if (v.requestPictureInPicture) { v.requestPictureInPicture(); return "opened"; }
+    return "unsupported";
+  } catch(e){ return "error:"+e; }
+};
+</script>""")
+        def load_preview(url: str):
+            empty_img = gr.update(value=None, visible=False)
+            empty_vid = gr.update(value=None, visible=False)
+            pip_vis = gr.update(visible=False)
+            if not url: return empty_img, empty_vid, pip_vis
+            if not is_remote(url) and os.path.exists(url):
+                ext = ext_from_src(url)
+                if ext in VIDEO_EXTS: return empty_img, gr.update(value=os.path.abspath(url), visible=True), gr.update(visible=True)
+                if ext in IMAGE_EXTS:
+                    try:
+                        img = Image.open(url)
+                        if getattr(img, "is_animated", False): img.seek(0)
+                        return gr.update(value=img.convert("RGB"), visible=True), empty_vid, pip_vis
+                    except Exception: return empty_img, empty_vid, pip_vis
+            head = safe_head(url)
+            if head:
+                ctype = (head.headers.get("content-type") or "").lower()
+                if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
+                    return empty_img, gr.update(value=url, visible=True), gr.update(visible=True)
+            try:
+                r = safe_get(url, timeout=15)
+                img = Image.open(BytesIO(r.content))
+                if getattr(img, "is_animated", False): img.seek(0)
+                return gr.update(value=img.convert("RGB"), visible=True), empty_vid, pip_vis
+            except Exception:
+                return empty_img, empty_vid, pip_vis
+        url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, pip_button])
+        def clear_all():
+            return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle", gr.update(value=_btn_label_for_status("idle"))
+        clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, submit_btn])
+        def pip_click(_):
+            js = "<script>setTimeout(()=>window.openPiP('video.preview_media'),50);</script>"
+            return gr.HTML.update(value=js)
+        pip_button.click(fn=pip_click, inputs=[url_input], outputs=[pip_html])
+        def start_busy():
+            s = "busy"
+            return s, gr.update(value=_btn_label_for_status(s))
+        submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state, submit_btn])
+        def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
+            return process_media(url or "", prompt or "", key or "", progress=progress)
+        submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
+            fn=lambda res: ("error", "**Error:** no result returned.") if not res else
+                           ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
+            inputs=[output_md],
+            outputs=[status_state, output_md],
+        )
+        def btn_label_for_state(s: str):
+            return _btn_label_for_status(s)
+        status_state.change(fn=btn_label_for_state, inputs=[status_state], outputs=[submit_btn])
+    return demo
+if __name__ == "__main__":
+    create_demo().launch()