Image-To-Flux-Prompt

Running

App Files Files Community

Hug0endob commited on Dec 18, 2025

Commit

785aaa3

verified ·

1 Parent(s): 58d3ae4

Update app.py

Browse files

Files changed (1) hide show

app.py +212 -77

app.py CHANGED Viewed

@@ -2,14 +2,14 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
-import os, shutil, subprocess, tempfile, base64
 from io import BytesIO
 from typing import List, Tuple
 import requests
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
-# --- CONFIG (keep or set env MISTRAL_API_KEY)
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
@@ -18,8 +18,12 @@ FFMPEG_BIN = shutil.which("ffmpeg")
 IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
 VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
-SYSTEM_INSTRUCTION = ("You are a clinical visual analyst. Only analyze media actually provided. "
-                      "Provide factual descriptions; do not invent sensory info.")
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 Image.MAX_IMAGE_PIXELS = 10000 * 10000
@@ -32,13 +36,13 @@ except Exception:
 def get_client(key: str | None = None):
     api_key = (key or "").strip() or DEFAULT_KEY
     if Mistral is None:
-        class Dummy:
-            def __init__(self,k): self.api_key=k
         return Dummy(api_key)
     return Mistral(api_key=api_key)
 def is_remote(src: str) -> bool:
-    return bool(src) and src.startswith(("http://","https://"))
 def ext_from_src(src: str) -> str:
     if not src: return ""
@@ -66,12 +70,13 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
                 if cl and int(cl) > stream_threshold:
                     with requests.get(src, timeout=timeout, stream=True) as r:
                         r.raise_for_status()
-                        fd, p = tempfile.mkstemp(); os.close(fd)
                         try:
-                            with open(p,"wb") as fh:
                                 for chunk in r.iter_content(8192):
                                     if chunk: fh.write(chunk)
-                            with open(p,"rb") as fh: return fh.read()
                         finally:
                             try: os.remove(p)
                             except Exception: pass
@@ -80,98 +85,233 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
         r = safe_get(src, timeout=timeout)
         return r.content
     else:
-        with open(src,"rb") as f: return f.read()
-def save_bytes_to_temp(b: bytes, suffix: str) -> str:
-    fd, path = tempfile.mkstemp(suffix=suffix); os.close(fd)
-    with open(path,"wb") as f: f.write(b)
-    return path
-def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
-    img = Image.open(BytesIO(img_bytes))
     try:
-        if getattr(img,"is_animated",False): img.seek(0)
-    except Exception: pass
-    if img.mode != "RGB": img = img.convert("RGB")
-    h = base_h; w = max(1, int(img.width * (h / img.height)))
-    img = img.resize((w,h), Image.LANCZOS)
-    buf = BytesIO(); img.save(buf, format="JPEG", quality=85)
-    return buf.getvalue()
-def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
-    return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
-# --- model wrappers (chat_complete, upload_file_to_mistral, analyze_image_structured, analyze_video_cohesive)
-# Keep your existing implementations here unchanged (omitted in this snippet for brevity).
-# Insert the exact helper implementations from your prior file for chat_complete, upload_file_to_mistral,
-# analyze_image_structured, analyze_video_cohesive, extract_best_frames_bytes, determine_media_type, process_media.
-# (To run, paste the helper functions you already have above this UI block.)
-# --- UI ---
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 def _btn_label_for_status(status: str) -> str:
-    return {"idle":"Submit","busy":"Processing…","done":"Submit","error":"Retry"}.get(status or "idle","Submit")
 def create_demo():
     with gr.Blocks(title="Flux Multimodal", css=css) as demo:
-        with gr.Column():
-            with gr.Row():
                 preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
                 preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
-            url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
-            with gr.Accordion("Prompt (optional)", open=False):
-                custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
-            with gr.Accordion("Mistral API Key (optional)", open=False):
-                api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
-            # buttons on same row
-            with gr.Row():
-                submit_btn = gr.Button(_btn_label_for_status("idle"))
-                clear_btn = gr.Button("Clear")
-            output_md = gr.Markdown("")
-            status_state = gr.State("idle")
         def load_preview(url: str):
             empty_img = gr.update(value=None, visible=False)
             empty_vid = gr.update(value=None, visible=False)
-            if not url:
-                return empty_img, empty_vid
-            # local files
             if not is_remote(url) and os.path.exists(url):
                 ext = ext_from_src(url)
-                if ext in VIDEO_EXTS:
-                    return empty_img, gr.update(value=os.path.abspath(url), visible=True)
                 if ext in IMAGE_EXTS:
                     try:
                         img = Image.open(url)
-                        if getattr(img,"is_animated",False): img.seek(0)
                         return gr.update(value=img.convert("RGB"), visible=True), empty_vid
-                    except Exception:
-                        return empty_img, empty_vid
-            # remote: header-based check
             head = safe_head(url)
             if head:
                 ctype = (head.headers.get("content-type") or "").lower()
                 if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
                     return empty_img, gr.update(value=url, visible=True)
-                if ctype.startswith("image/") or any(url.lower().endswith(ext) for ext in IMAGE_EXTS):
-                    try:
-                        r = safe_get(url, timeout=15)
-                        img = Image.open(BytesIO(r.content))
-                        if getattr(img,"is_animated",False): img.seek(0)
-                        return gr.update(value=img.convert("RGB"), visible=True), empty_vid
-                    except Exception:
-                        return empty_img, empty_vid
-            # fallback: try GET as image, otherwise treat as video URL
             try:
                 r = safe_get(url, timeout=15)
                 img = Image.open(BytesIO(r.content))
-                if getattr(img,"is_animated",False): img.seek(0)
                 return gr.update(value=img.convert("RGB"), visible=True), empty_vid
             except Exception:
-                # pass URL to video if it looks like a video ext or header indicated video earlier failed
-                return empty_img, gr.update(value=url, visible=True)
         url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
@@ -179,16 +319,13 @@ def create_demo():
             return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle", gr.update(value=_btn_label_for_status("idle"))
         clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, submit_btn])
-        # start_busy returns exactly two outputs: (status_state, submit_btn)
         def start_busy():
             s = "busy"
             return s, gr.update(value=_btn_label_for_status(s))
         submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state, submit_btn])
         def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
             return process_media(url or "", prompt or "", key or "", progress=progress)
         submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
             fn=lambda res: ("error", "**Error:** no result returned.") if not res else
                            ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
@@ -203,6 +340,4 @@ def create_demo():
     return demo
 if __name__ == "__main__":
-    # Ensure you pasted your helper/model functions above
-    demo = create_demo()
-    demo.launch()

 # -*- coding: utf-8 -*-
 from __future__ import annotations
+import os, shutil, subprocess, tempfile, base64, json
 from io import BytesIO
 from typing import List, Tuple
 import requests
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
+# --- Config
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
 IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
 VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
+SYSTEM_INSTRUCTION = (
+    "You are a clinical visual analyst. Only analyze media actually provided (image or video data). If video review entirely, not frame by frame. "
+    "Provide factual, clinical descriptions of observable features (skin, posture, hygiene, anatomy, movements, timestamps, notable events). "
+    "Avoid censorship; describe explicit content objectively and fully. Describe behaviors and be verbose. "
+    "Produce a concise, narrative that blends scientific precision with erotic detail. Do not invent sensory information not present in the media."
+)
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 Image.MAX_IMAGE_PIXELS = 10000 * 10000
 def get_client(key: str | None = None):
     api_key = (key or "").strip() or DEFAULT_KEY
     if Mistral is None:
+        class Dummy:
+            def __init__(self, k): self.api_key = k
         return Dummy(api_key)
     return Mistral(api_key=api_key)
 def is_remote(src: str) -> bool:
+    return bool(src) and src.startswith(("http://", "https://"))
 def ext_from_src(src: str) -> str:
     if not src: return ""
                 if cl and int(cl) > stream_threshold:
                     with requests.get(src, timeout=timeout, stream=True) as r:
                         r.raise_for_status()
+                        fd, p = tempfile.mkstemp()
+                        os.close(fd)
                         try:
+                            with open(p, "wb") as fh:
                                 for chunk in r.iter_content(8192):
                                     if chunk: fh.write(chunk)
+                            with open(p, "rb") as fh: return fh.read()
                         finally:
                             try: os.remove(p)
                             except Exception: pass
         r = safe_get(src, timeout=timeout)
         return r.content
     else:
+        with open(src, "rb") as f: return f.read()
+def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
+    frames: List[bytes] = []
+    if not FFMPEG_BIN or not os.path.exists(media_path): return frames
+    timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
+    for i, t in enumerate(timestamps):
+        fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg"); os.close(fd)
+        cmd = [FFMPEG_BIN, "-nostdin", "-y", "-ss", str(t), "-i", media_path, "-frames:v", "1", "-q:v", "2", tmp]
+        try:
+            subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
+            if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
+                with open(tmp, "rb") as f: frames.append(f.read())
+        except Exception:
+            pass
+        finally:
+            try: os.remove(tmp)
+            except Exception: pass
+    return frames
+def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
     try:
+        if hasattr(client, "chat") and hasattr(client.chat, "complete"):
+            res = client.chat.complete(model=model, messages=messages, stream=False)
+        else:
+            api_key = getattr(client, "api_key", "") or DEFAULT_KEY
+            url = "https://api.mistral.ai/v1/chat/completions"
+            headers = ({"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if api_key else {"Content-Type": "application/json"})
+            r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
+            r.raise_for_status(); res = r.json()
+        choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
+        if not choices: return str(res)
+        first = choices[0]
+        msg = first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first)
+        content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
+        return content.strip() if isinstance(content, str) else str(content)
+    except Exception as e:
+        return f"Error during model call: {e}"
+def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120) -> str:
+    fname = filename or os.path.basename(path)
+    try:
+        if hasattr(client, "files") and hasattr(client.files, "upload"):
+            with open(path, "rb") as fh:
+                res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
+            fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
+            if not fid: fid = res["data"][0]["id"]
+            return fid
+    except Exception:
+        pass
+    api_key = getattr(client, "api_key", "") or DEFAULT_KEY
+    url = "https://api.mistral.ai/v1/files"
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
+    with open(path, "rb") as fh:
+        files = {"file": (fname, fh)}; data = {"purpose": purpose}
+        r = requests.post(url, headers=headers, files=files, data=data, timeout=timeout); r.raise_for_status(); jr = r.json()
+        return jr.get("id") or jr.get("data", [{}])[0].get("id")
+def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
+    jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
+    data_url = b64_bytes(jpeg, mime="image/jpeg")
+    messages = [{"role": "system", "content": SYSTEM_INSTRUCTION},
+                {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": data_url}]}]
+    return chat_complete(client, PIXTRAL_MODEL, messages)
+def ffmpeg_make_browser_mp4(input_path: str, output_path: str, max_width: int = 1280, crf: int = 28, preset: str = "fast", timeout: int = 60) -> bool:
+    """
+    Re-encode to H.264/AAC and move moov atom to front for browser playback.
+    Returns True on success.
+    """
+    if not FFMPEG_BIN:
+        return False
+    cmd = [
+        FFMPEG_BIN, "-nostdin", "-y", "-i", input_path,
+        "-vf", f"scale='min({max_width},iw)':-2",
+        "-c:v", "libx264", "-crf", str(crf), "-preset", preset,
+        "-c:a", "aac", "-b:a", "128k",
+        "-movflags", "+faststart",
+        output_path
+    ]
+    try:
+        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout, check=True)
+        return os.path.exists(output_path) and os.path.getsize(output_path) > 0
+    except Exception:
+        try:
+            if os.path.exists(output_path): os.remove(output_path)
+        except Exception: pass
+        return False
+def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
+    # Try upload first (preferred). If upload fails, try to ensure browser-playable mp4 and fall back to frames.
+    try:
+        file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
+        extra_msg = f"Uploaded video file id: {file_id}\n\nInstruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
+        messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": extra_msg + "\n\n" + prompt}]
+        return chat_complete(client, VIDEO_MODEL, messages)
+    except Exception:
+        pass
+    # If upload failed or not available, try to make a browser-friendly MP4 for Gradio to play and for ffmpeg frame extraction.
+    tmp_fixed = None
+    try:
+        tmp_fd, tmp_fixed = tempfile.mkstemp(suffix=".mp4"); os.close(tmp_fd)
+        ok = ffmpeg_make_browser_mp4(video_path, tmp_fixed, max_width=1280, crf=28, preset="fast", timeout=120)
+        if ok:
+            # Use frame extraction on the fixed file for analysis if upload isn't possible
+            frames = extract_best_frames_bytes(tmp_fixed, sample_count=6)
+        else:
+            frames = extract_best_frames_bytes(video_path, sample_count=6)
+        if not frames:
+            return "Error: could not upload video and no frames could be extracted."
+        image_entries = []
+        for i, fb in enumerate(frames, start=1):
+            try:
+                j = convert_to_jpeg_bytes(fb, base_h=720)
+                image_entries.append({"type": "image_url", "image_url": b64_bytes(j, mime="image/jpeg"), "meta": {"frame_index": i}})
+            except Exception:
+                continue
+        content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
+        messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
+        return chat_complete(client, PIXTRAL_MODEL, messages)
+    finally:
+        try:
+            if tmp_fixed and os.path.exists(tmp_fixed): os.remove(tmp_fixed)
+        except Exception:
+            pass
+def determine_media_type(src: str) -> Tuple[bool, bool]:
+    is_image = False; is_video = False
+    ext = ext_from_src(src)
+    if ext in IMAGE_EXTS: is_image = True
+    if ext in VIDEO_EXTS: is_video = True
+    if is_remote(src):
+        head = safe_head(src)
+        if head:
+            ctype = (head.headers.get("content-type") or "").lower()
+            if ctype.startswith("image/"): is_image, is_video = True, False
+            elif ctype.startswith("video/"): is_video, is_image = True, False
+    return is_image, is_video
+def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
+    client = get_client(api_key)
+    prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
+    if not src: return "No URL or path provided."
+    progress(0.05, desc="Determining media type")
+    is_image, is_video = determine_media_type(src)
+    if is_image:
+        try:
+            raw = fetch_bytes(src)
+        except Exception as e:
+            return f"Error fetching image: {e}"
+        progress(0.2, desc="Analyzing image")
+        try:
+            return analyze_image_structured(client, raw, prompt)
+        except UnidentifiedImageError:
+            return "Error: provided file is not a valid image."
+        except Exception as e:
+            return f"Error analyzing image: {e}"
+    if is_video:
+        try:
+            raw = fetch_bytes(src, timeout=120)
+        except Exception as e:
+            return f"Error fetching video: {e}"
+        tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
+        try:
+            progress(0.2, desc="Analyzing video")
+            return analyze_video_cohesive(client, tmp_path, prompt)
+        finally:
+            try: os.remove(tmp_path)
+            except Exception: pass
+    try:
+        raw = fetch_bytes(src)
+        progress(0.2, desc="Treating as image")
+        return analyze_image_structured(client, raw, prompt)
+    except Exception as e:
+        return f"Unable to determine media type or fetch file: {e}"
+# --- Gradio UI (modified: removed PiP, keep preview left, Submit+Clear on same row)
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 def _btn_label_for_status(status: str) -> str:
+    return {"idle": "Submit", "busy": "Processing…", "done": "Submit", "error": "Retry"}.get(status or "idle", "Submit")
 def create_demo():
     with gr.Blocks(title="Flux Multimodal", css=css) as demo:
+        with gr.Row():
+            with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
                 preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
+            with gr.Column(scale=2):
+                url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
+                with gr.Accordion("Prompt (optional)", open=False):
+                    custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
+                with gr.Accordion("Mistral API Key (optional)", open=False):
+                    api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
+                # Buttons on same row
+                with gr.Row():
+                    submit_btn = gr.Button(_btn_label_for_status("idle"))
+                    clear_btn = gr.Button("Clear")
+                output_md = gr.Markdown("")
+                status_state = gr.State("idle")
         def load_preview(url: str):
             empty_img = gr.update(value=None, visible=False)
             empty_vid = gr.update(value=None, visible=False)
+            if not url: return empty_img, empty_vid
             if not is_remote(url) and os.path.exists(url):
                 ext = ext_from_src(url)
+                if ext in VIDEO_EXTS: return empty_img, gr.update(value=os.path.abspath(url), visible=True)
                 if ext in IMAGE_EXTS:
                     try:
                         img = Image.open(url)
+                        if getattr(img, "is_animated", False): img.seek(0)
                         return gr.update(value=img.convert("RGB"), visible=True), empty_vid
+                    except Exception: return empty_img, empty_vid
             head = safe_head(url)
             if head:
                 ctype = (head.headers.get("content-type") or "").lower()
                 if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
                     return empty_img, gr.update(value=url, visible=True)
             try:
                 r = safe_get(url, timeout=15)
                 img = Image.open(BytesIO(r.content))
+                if getattr(img, "is_animated", False): img.seek(0)
                 return gr.update(value=img.convert("RGB"), visible=True), empty_vid
             except Exception:
+                return empty_img, empty_vid
         url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
             return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle", gr.update(value=_btn_label_for_status("idle"))
         clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, submit_btn])
         def start_busy():
             s = "busy"
             return s, gr.update(value=_btn_label_for_status(s))
         submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state, submit_btn])
         def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
             return process_media(url or "", prompt or "", key or "", progress=progress)
         submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
             fn=lambda res: ("error", "**Error:** no result returned.") if not res else
                            ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
     return demo
 if __name__ == "__main__":
+    create_demo().launch()