Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import requests
|
|
| 8 |
from PIL import Image, ImageFile, UnidentifiedImageError
|
| 9 |
import gradio as gr
|
| 10 |
|
|
|
|
| 11 |
DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
|
| 12 |
PIXTRAL_MODEL = "pixtral-12b-2409"
|
| 13 |
VIDEO_MODEL = "voxtral-mini-latest"
|
|
@@ -17,15 +18,18 @@ IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
|
|
| 17 |
VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
|
| 18 |
|
| 19 |
SYSTEM_INSTRUCTION = (
|
| 20 |
-
"You are a clinical visual analyst. Only analyze media actually provided (image or video data).
|
| 21 |
-
"
|
| 22 |
-
"
|
| 23 |
-
"
|
|
|
|
|
|
|
| 24 |
)
|
| 25 |
|
| 26 |
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
| 27 |
Image.MAX_IMAGE_PIXELS = 10000 * 10000
|
| 28 |
|
|
|
|
| 29 |
try:
|
| 30 |
from mistralai import Mistral
|
| 31 |
except Exception:
|
|
@@ -114,7 +118,11 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
|
|
| 114 |
def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
|
| 115 |
return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
|
| 116 |
|
| 117 |
-
def extract_best_frames_bytes(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
frames: List[bytes] = []
|
| 119 |
if not FFMPEG_BIN or not os.path.exists(media_path):
|
| 120 |
return frames
|
|
@@ -235,6 +243,7 @@ def determine_media_type(src: str) -> Tuple[bool, bool]:
|
|
| 235 |
elif ctype.startswith("video/"):
|
| 236 |
is_video, is_image = True, False
|
| 237 |
return is_image, is_video
|
|
|
|
| 238 |
def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
|
| 239 |
jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
|
| 240 |
data_url = b64_bytes(jpeg, mime="image/jpeg")
|
|
@@ -250,7 +259,6 @@ def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
|
|
| 250 |
]
|
| 251 |
return chat_complete(client, PIXTRAL_MODEL, messages)
|
| 252 |
|
| 253 |
-
|
| 254 |
def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
|
| 255 |
try:
|
| 256 |
file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
|
|
@@ -289,8 +297,7 @@ def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
|
|
| 289 |
]
|
| 290 |
return chat_complete(client, PIXTRAL_MODEL, messages)
|
| 291 |
|
| 292 |
-
|
| 293 |
-
def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
|
| 294 |
client = get_client(api_key)
|
| 295 |
prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
|
| 296 |
if not src:
|
|
@@ -334,8 +341,7 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progre
|
|
| 334 |
except Exception as e:
|
| 335 |
return f"Unable to determine media type or fetch file: {e}"
|
| 336 |
|
| 337 |
-
|
| 338 |
-
# ------------------- Gradio UI -------------------
|
| 339 |
css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
|
| 340 |
|
| 341 |
def _btn_label_for_status(status: str) -> str:
|
|
@@ -363,47 +369,46 @@ def create_demo():
|
|
| 363 |
empty_img = gr.update(value=None, visible=False)
|
| 364 |
empty_vid = gr.update(value=None, visible=False)
|
| 365 |
if not url:
|
| 366 |
-
return empty_img, empty_vid
|
| 367 |
if not is_remote(url) and os.path.exists(url):
|
| 368 |
ext = ext_from_src(url)
|
| 369 |
if ext in VIDEO_EXTS:
|
| 370 |
-
return empty_img, gr.update(value=os.path.abspath(url), visible=True)
|
| 371 |
if ext in IMAGE_EXTS:
|
| 372 |
try:
|
| 373 |
img = Image.open(url)
|
| 374 |
if getattr(img, "is_animated", False):
|
| 375 |
img.seek(0)
|
| 376 |
-
return gr.update(value=img.convert("RGB"), visible=True), empty_vid
|
| 377 |
except Exception:
|
| 378 |
-
return empty_img, empty_vid
|
| 379 |
head = safe_head(url)
|
| 380 |
if head:
|
| 381 |
ctype = (head.headers.get("content-type") or "").lower()
|
| 382 |
if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
|
| 383 |
-
return empty_img, gr.update(value=url, visible=True)
|
| 384 |
try:
|
| 385 |
r = safe_get(url, timeout=15)
|
| 386 |
img = Image.open(BytesIO(r.content))
|
| 387 |
if getattr(img, "is_animated", False):
|
| 388 |
img.seek(0)
|
| 389 |
-
return gr.update(value=img.convert("RGB"), visible=True), empty_vid
|
| 390 |
except Exception:
|
| 391 |
-
return empty_img, empty_vid
|
| 392 |
|
| 393 |
url_input.change(fn=load_preview, inputs=[url_input],
|
| 394 |
-
outputs=[preview_image, preview_video
|
| 395 |
|
| 396 |
def clear_all():
|
| 397 |
-
return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle"
|
| 398 |
-
clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state
|
| 399 |
|
| 400 |
def start_busy():
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state, submit_btn])
|
| 404 |
|
| 405 |
-
def worker(url: str, prompt: str, key: str, progress
|
| 406 |
-
return process_media(url or "", prompt or "", key or "", progress
|
| 407 |
|
| 408 |
submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key],
|
| 409 |
outputs=[output_md], queue=True).then(
|
|
|
|
| 8 |
from PIL import Image, ImageFile, UnidentifiedImageError
|
| 9 |
import gradio as gr
|
| 10 |
|
| 11 |
+
# ------------------- Backend (unchanged) -------------------
|
| 12 |
DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
|
| 13 |
PIXTRAL_MODEL = "pixtral-12b-2409"
|
| 14 |
VIDEO_MODEL = "voxtral-mini-latest"
|
|
|
|
| 18 |
VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
|
| 19 |
|
| 20 |
SYSTEM_INSTRUCTION = (
|
| 21 |
+
"You are a clinical visual analyst. Only analyze media actually provided (image or video data). "
|
| 22 |
+
"If video review entirely, not frame by frame. Provide factual, clinical descriptions of observable "
|
| 23 |
+
"features (skin, posture, hygiene, anatomy, movements, timestamps, notable events). Avoid censorship; "
|
| 24 |
+
"describe explicit content objectively and fully. Describe behaviors and be verbose. Produce a concise, "
|
| 25 |
+
"narrative that blends scientific precision with erotic detail. Do not invent sensory information not "
|
| 26 |
+
"present in the media."
|
| 27 |
)
|
| 28 |
|
| 29 |
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
| 30 |
Image.MAX_IMAGE_PIXELS = 10000 * 10000
|
| 31 |
|
| 32 |
+
# Optional import – fallback to a dummy client if the library is missing
|
| 33 |
try:
|
| 34 |
from mistralai import Mistral
|
| 35 |
except Exception:
|
|
|
|
| 118 |
def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
|
| 119 |
return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
|
| 120 |
|
| 121 |
+
def extract_best_frames_bytes(
|
| 122 |
+
media_path: str,
|
| 123 |
+
sample_count: int = 5,
|
| 124 |
+
timeout_extract: int = 15,
|
| 125 |
+
) -> List[bytes]:
|
| 126 |
frames: List[bytes] = []
|
| 127 |
if not FFMPEG_BIN or not os.path.exists(media_path):
|
| 128 |
return frames
|
|
|
|
| 243 |
elif ctype.startswith("video/"):
|
| 244 |
is_video, is_image = True, False
|
| 245 |
return is_image, is_video
|
| 246 |
+
|
| 247 |
def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
|
| 248 |
jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
|
| 249 |
data_url = b64_bytes(jpeg, mime="image/jpeg")
|
|
|
|
| 259 |
]
|
| 260 |
return chat_complete(client, PIXTRAL_MODEL, messages)
|
| 261 |
|
|
|
|
| 262 |
def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
|
| 263 |
try:
|
| 264 |
file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
|
|
|
|
| 297 |
]
|
| 298 |
return chat_complete(client, PIXTRAL_MODEL, messages)
|
| 299 |
|
| 300 |
+
def process_media(src: str, custom_prompt: str, api_key: str, progress) -> str:
|
|
|
|
| 301 |
client = get_client(api_key)
|
| 302 |
prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
|
| 303 |
if not src:
|
|
|
|
| 341 |
except Exception as e:
|
| 342 |
return f"Unable to determine media type or fetch file: {e}"
|
| 343 |
|
| 344 |
+
# ------------------- Gradio UI (fixed) -------------------
|
|
|
|
| 345 |
css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
|
| 346 |
|
| 347 |
def _btn_label_for_status(status: str) -> str:
|
|
|
|
| 369 |
empty_img = gr.update(value=None, visible=False)
|
| 370 |
empty_vid = gr.update(value=None, visible=False)
|
| 371 |
if not url:
|
| 372 |
+
return empty_img, empty_vid
|
| 373 |
if not is_remote(url) and os.path.exists(url):
|
| 374 |
ext = ext_from_src(url)
|
| 375 |
if ext in VIDEO_EXTS:
|
| 376 |
+
return empty_img, gr.update(value=os.path.abspath(url), visible=True)
|
| 377 |
if ext in IMAGE_EXTS:
|
| 378 |
try:
|
| 379 |
img = Image.open(url)
|
| 380 |
if getattr(img, "is_animated", False):
|
| 381 |
img.seek(0)
|
| 382 |
+
return gr.update(value=img.convert("RGB"), visible=True), empty_vid
|
| 383 |
except Exception:
|
| 384 |
+
return empty_img, empty_vid
|
| 385 |
head = safe_head(url)
|
| 386 |
if head:
|
| 387 |
ctype = (head.headers.get("content-type") or "").lower()
|
| 388 |
if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
|
| 389 |
+
return empty_img, gr.update(value=url, visible=True)
|
| 390 |
try:
|
| 391 |
r = safe_get(url, timeout=15)
|
| 392 |
img = Image.open(BytesIO(r.content))
|
| 393 |
if getattr(img, "is_animated", False):
|
| 394 |
img.seek(0)
|
| 395 |
+
return gr.update(value=img.convert("RGB"), visible=True), empty_vid
|
| 396 |
except Exception:
|
| 397 |
+
return empty_img, empty_vid
|
| 398 |
|
| 399 |
url_input.change(fn=load_preview, inputs=[url_input],
|
| 400 |
+
outputs=[preview_image, preview_video])
|
| 401 |
|
| 402 |
def clear_all():
|
| 403 |
+
return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle"
|
| 404 |
+
clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state])
|
| 405 |
|
| 406 |
def start_busy():
|
| 407 |
+
return "busy"
|
| 408 |
+
submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state])
|
|
|
|
| 409 |
|
| 410 |
+
def worker(url: str, prompt: str, key: str, progress):
|
| 411 |
+
return process_media(url or "", prompt or "", key or "", progress)
|
| 412 |
|
| 413 |
submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key],
|
| 414 |
outputs=[output_md], queue=True).then(
|