Spaces:

speridlabs
/

eneas

Paused

App Files Files Community

eneas / app.py

javipd99

fix

dcb39c7 about 1 month ago

raw

history blame contribute delete

29.4 kB

	import gradio as gr
	import os
	import cv2
	import shutil
	import tempfile
	import numpy as np
	import subprocess
	import time
	import threading
	import torch
	import sys
	import logging
	from PIL import Image

	# ===========================================
	# LOGGING CONFIGURATION
	# ===========================================
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[logging.StreamHandler(sys.stdout)]
	)
	logger = logging.getLogger(__name__)

	# Ensure Python sees the local 'eneas' folder
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))

	import spaces

	try:
	from eneas.segmentation import UniqueInstanceSegmenter, GenericCategorySegmenter
	from eneas.segmentation.model_manager import ModelManager
	except ImportError as e:
	logger.error(f"Error importing ENEAS: {e}")
	raise e

	# ===========================================
	# CONSTANTS
	# ===========================================
	MAX_FRAMES = 150 # Limit frames to avoid ZeroGPU Timeout (~1s/frame processing)
	OLLAMA_HOST = "127.0.0.1:11434"
	OLLAMA_URL = f"http://{OLLAMA_HOST}"
	OLLAMA_BIN = "./bin/ollama"

	VLM_MODELS = [
	"qwen3-vl:4b-instruct-q8_0",
	"qwen3-vl:2b-instruct-q8_0"
	]

	OUTPUT_BASE_DIR = "gradio_outputs"
	os.makedirs(OUTPUT_BASE_DIR, exist_ok=True)


	# ===========================================
	# OLLAMA FUNCTIONS (FOR USE INSIDE @spaces.GPU)
	# ===========================================
	def get_ollama_env():
	"""Get environment variables for Ollama process with GPU support."""
	env = os.environ.copy()
	env["OLLAMA_HOST"] = OLLAMA_HOST
	env["OLLAMA_ORIGINS"] = "*"
	env["HOME"] = os.getcwd()

	# Add local lib path for the extracted binary
	cwd = os.getcwd()
	lib_path = f"{cwd}/lib"
	if "LD_LIBRARY_PATH" in env:
	env["LD_LIBRARY_PATH"] += f":{lib_path}"
	else:
	env["LD_LIBRARY_PATH"] = lib_path

	return env


	def is_ollama_server_running() -> bool:
	"""Check if Ollama server is responding."""
	try:
	result = subprocess.run(
	["curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", OLLAMA_URL],
	capture_output=True,
	text=True,
	timeout=5
	)
	return result.stdout.strip() == "200"
	except Exception:
	return False


	def start_ollama_server_gpu():
	"""
	Start Ollama server INSIDE @spaces.GPU context.
	This ensures Ollama detects and uses the GPU.

	Returns:
	bool: True if server started successfully
	"""
	if is_ollama_server_running():
	logger.info("Ollama server is already running.")
	return True

	logger.info("Starting Ollama server inside GPU context...")

	try:
	env = get_ollama_env()

	# Start server as background process
	process = subprocess.Popen(
	[OLLAMA_BIN, "serve"],
	env=env,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE
	)

	# Wait for server to be ready (max 30 seconds)
	max_retries = 30
	for i in range(max_retries):
	if is_ollama_server_running():
	logger.info(f"Ollama server started successfully in {i+1} seconds.")
	return True
	time.sleep(1)

	logger.error("Ollama server failed to start within 30 seconds.")
	return False

	except Exception as e:
	logger.error(f"Failed to start Ollama server: {e}")
	return False


	def load_model_into_vram(model_name: str) -> bool:
	"""
	Load model into VRAM for faster inference.
	Uses keep_alive=-1 to keep model loaded.

	Args:
	model_name: Name of the Ollama model to load

	Returns:
	bool: True if model loaded successfully
	"""
	logger.info(f"Loading model {model_name} into VRAM...")

	try:
	# Send a minimal request to trigger model loading
	result = subprocess.run(
	[
	"curl", "-s", f"{OLLAMA_URL}/api/generate",
	"-d", f'{{"model": "{model_name}", "prompt": "hi", "stream": false}}'
	],
	capture_output=True,
	text=True,
	timeout=120 # Model loading can take time
	)

	if "error" in result.stdout.lower():
	logger.error(f"Error loading model: {result.stdout}")
	return False

	# Set keep_alive to -1 to keep model in VRAM
	subprocess.run(
	[
	"curl", "-s", f"{OLLAMA_URL}/api/generate",
	"-d", f'{{"model": "{model_name}", "keep_alive": -1}}'
	],
	capture_output=True,
	timeout=10
	)

	logger.info(f"Model {model_name} loaded into VRAM successfully.")
	return True

	except subprocess.TimeoutExpired:
	logger.error("Timeout while loading model into VRAM.")
	return False
	except Exception as e:
	logger.error(f"Error loading model into VRAM: {e}")
	return False


	def log_active_models():
	"""Log which models are currently loaded in VRAM (not just on disk)."""
	try:
	result = subprocess.run(
	["curl", "-s", f"{OLLAMA_URL}/api/ps"],
	capture_output=True,
	text=True,
	timeout=5
	)
	logger.info(f"Active models in VRAM: {result.stdout}")
	except Exception as e:
	logger.warning(f"Could not get active models: {e}")


	def ensure_ollama_ready_gpu(model_name: str) -> bool:
	"""
	Main function to ensure Ollama is fully ready with GPU support.
	MUST be called inside @spaces.GPU decorated function.

	This function:
	1. Starts Ollama server (which will detect GPU)
	2. Loads the specified model into VRAM
	3. Logs which model is active

	Args:
	model_name: Name of the Ollama model to use

	Returns:
	bool: True if ready

	Raises:
	RuntimeError: If setup fails
	"""
	logger.info(f"Ensuring Ollama is ready with GPU for model: {model_name}")

	# Step 1: Start server (will detect GPU since we're inside @spaces.GPU)
	if not start_ollama_server_gpu():
	raise RuntimeError("Failed to start Ollama server with GPU")

	# Step 2: Load model into VRAM
	if not load_model_into_vram(model_name):
	raise RuntimeError(f"Failed to load model {model_name} into VRAM")

	# Step 3: Log which model is actually active in VRAM
	log_active_models()

	logger.info("Ollama is ready with GPU support!")
	return True


	# ===========================================
	# STARTUP: DOWNLOAD BINARY AND MODELS (CPU)
	# ===========================================
	def download_ollama_binary():
	"""Download Ollama binary if not present."""
	if os.path.exists(OLLAMA_BIN):
	logger.info("Ollama binary already exists.")
	return True

	logger.info("Downloading Ollama binary (ZST)...")
	try:
	subprocess.run(
	["curl", "-L", "https://ollama.com/download/ollama-linux-amd64.tar.zst", "-o", "ollama.tar.zst"],
	check=True,
	timeout=300
	)
	subprocess.run(["tar", "--zstd", "-xf", "ollama.tar.zst"], check=True)
	subprocess.run(["chmod", "+x", OLLAMA_BIN], check=True)
	os.remove("ollama.tar.zst") # Cleanup
	logger.info("Ollama binary downloaded and extracted successfully.")
	return True
	except Exception as e:
	logger.error(f"Failed to download Ollama binary: {e}")
	return False


	def pull_ollama_models():
	"""
	Pull Ollama models at startup (runs on CPU).
	This pre-downloads the models so they're ready when GPU is available.
	"""
	logger.info("Pre-downloading Ollama models...")

	# Need to temporarily start server to pull models
	env = get_ollama_env()

	# Start server temporarily
	server_process = subprocess.Popen(
	[OLLAMA_BIN, "serve"],
	env=env,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE
	)

	# Wait for server
	time.sleep(5)
	for _ in range(20):
	if is_ollama_server_running():
	break
	time.sleep(1)

	# Pull each model
	for model in VLM_MODELS:
	logger.info(f"Pulling model: {model}")
	try:
	subprocess.run(
	[OLLAMA_BIN, "pull", model],
	env=env,
	timeout=600,
	capture_output=True
	)
	logger.info(f"Model {model} pulled successfully.")
	except Exception as e:
	logger.warning(f"Failed to pull model {model}: {e}")

	# Stop server (we'll restart it inside GPU context later)
	server_process.terminate()
	try:
	server_process.wait(timeout=5)
	except subprocess.TimeoutExpired:
	server_process.kill()

	logger.info("Ollama models pre-download complete.")


	def setup_ollama_startup():
	"""Setup Ollama at startup: download binary and pull models."""
	download_ollama_binary()
	pull_ollama_models()


	def setup_hf_models():
	"""
	Downloads heavy HuggingFace models to disk at startup.
	This prevents ZeroGPU timeouts during the first inference.
	"""
	logger.info("Starting HuggingFace models download (Warm-up)...")
	try:
	manager = ModelManager()

	# 1. SeC-4B (Heavy, ~15GB)
	logger.info("Downloading SeC-4B...")
	manager.download("OpenIXCLab/SeC-4B")

	# 2. Florence-2 (Grounding)
	logger.info("Downloading Florence-2...")
	manager.download("microsoft/Florence-2-large")

	# 3. SigLIP (For Generic Category)
	logger.info("Downloading SigLIP...")
	manager.download("google/siglip2-base-patch16-naflex")

	# 4. SAM2 Checkpoint (Direct URL)
	logger.info("Downloading SAM2 checkpoint...")
	manager.download_url(
	"https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt",
	"sam2.1_hiera_large.pt"
	)

	logger.info("All HuggingFace models downloaded successfully.")
	except Exception as e:
	logger.error(f"Error during HF model download: {e}")


	# ===========================================
	# STARTUP: PARALLEL MODEL DOWNLOADS
	# ===========================================
	logger.info("Starting parallel model downloads at startup...")
	t_hf = threading.Thread(target=setup_hf_models, daemon=True)
	t_ollama = threading.Thread(target=setup_ollama_startup, daemon=True)

	t_hf.start()
	t_ollama.start()

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	logger.info(f"Main process device detection: {DEVICE}")


	# ===========================================
	# UTILITY FUNCTIONS
	# ===========================================
	def process_inputs_to_frames(input_data, output_folder: str) -> tuple:
	"""
	Extracts frames from video (1 FPS) or copies images to output folder.
	Enforces MAX_FRAMES limit to prevent ZeroGPU timeouts.

	Args:
	input_data: Video file or list of image files
	output_folder: Directory to save extracted frames

	Returns:
	tuple: (output_folder path, list of frame file paths)
	"""
	if os.path.exists(output_folder):
	shutil.rmtree(output_folder)
	os.makedirs(output_folder)

	frame_paths = []
	video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.webm'}

	input_list = input_data if isinstance(input_data, list) else [input_data]
	if not input_list:
	return output_folder, []

	first_file = input_list[0].name if hasattr(input_list[0], 'name') else str(input_list[0])
	ext = os.path.splitext(first_file)[1].lower()

	if ext in video_extensions:
	# Process video file
	logger.info(f"Processing video: {first_file}...")
	cap = cv2.VideoCapture(first_file)
	video_fps = cap.get(cv2.CAP_PROP_FPS)
	total_frames_original = cap.get(cv2.CAP_PROP_FRAME_COUNT)

	if video_fps == 0 or np.isnan(video_fps):
	video_fps = 30

	duration_sec = total_frames_original / video_fps

	# Validate video duration
	if duration_sec > MAX_FRAMES:
	cap.release()
	msg = f"Video is too long ({int(duration_sec)}s). Max allowed is {MAX_FRAMES}s to avoid ZeroGPU timeout."
	logger.error(msg)
	raise gr.Error(msg)

	# Sample at 1 FPS
	frame_interval = max(1, int(video_fps))
	count = 0
	saved_count = 0

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	if count % frame_interval == 0:
	filename = f"frame_{saved_count:05d}.jpg"
	filepath = os.path.join(output_folder, filename)
	cv2.imwrite(filepath, frame)
	frame_paths.append(filepath)
	saved_count += 1

	if saved_count > MAX_FRAMES:
	cap.release()
	raise gr.Error(f"Limit reached: > {MAX_FRAMES} frames extracted.")

	count += 1
	cap.release()
	logger.info(f"Video sampled at 1 FPS. Total frames: {saved_count}")

	else:
	# Process image files
	if len(input_list) > MAX_FRAMES:
	raise gr.Error(f"Too many images! You uploaded {len(input_list)}. Max allowed is {MAX_FRAMES}.")

	logger.info(f"Processing {len(input_list)} images...")
	input_list.sort(key=lambda x: x.name if hasattr(x, 'name') else str(x))

	for i, f in enumerate(input_list):
	path = f.name if hasattr(f, 'name') else str(f)
	try:
	img = Image.open(path).convert("RGB")
	filename = f"frame_{i:05d}.jpg"
	filepath = os.path.join(output_folder, filename)
	img.save(filepath)
	frame_paths.append(filepath)
	except Exception as e:
	logger.warning(f"Skipping file {path}: {e}")

	return output_folder, frame_paths


	def create_video_overlay(frames_folder: str, masks_dict: dict, output_path: str, fps: int = 5) -> str:
	"""
	Creates a video from frames with segmentation masks overlaid.

	Args:
	frames_folder: Directory containing frame images
	masks_dict: Dictionary mapping frame index to mask arrays
	output_path: Output video file path
	fps: Frames per second for output video

	Returns:
	Output video path or None if failed
	"""
	logger.info("Generating result video...")
	frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith(".jpg")])

	if not frame_files:
	return None

	first_frame = cv2.imread(os.path.join(frames_folder, frame_files[0]))
	height, width, _ = first_frame.shape
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

	# Orange/Gold color for mask overlay
	mask_color = np.array([255, 100, 0], dtype=np.uint8)

	for i, filename in enumerate(frame_files):
	frame = cv2.imread(os.path.join(frames_folder, filename))
	mask_overlay = np.zeros_like(frame)

	if i in masks_dict:
	masks_data = masks_dict[i]
	masks_list = [masks_data] if isinstance(masks_data, np.ndarray) else (
	masks_data if isinstance(masks_data, list) else []
	)
	for mask in masks_list:
	mask_overlay[mask > 0] = mask_color

	if np.any(mask_overlay):
	frame = cv2.addWeighted(frame, 1, mask_overlay, 0.5, 0)

	out.write(frame)

	out.release()
	return output_path


	# ===========================================
	# UNIQUE INSTANCE SEGMENTATION
	# ===========================================
	def process_unique_upload(input_files):
	"""
	Process uploaded files for Unique Instance segmentation.
	Extracts frames and prepares the UI for annotation.
	"""
	if not input_files:
	return None, None, [], "Please upload files first.", gr.Slider(value=0, maximum=0, visible=False)

	temp_dir = tempfile.mkdtemp()
	frames_dir, frame_paths = process_inputs_to_frames(input_files, temp_dir)
	num_frames = len(frame_paths)

	if num_frames == 0:
	return None, None, [], "No frames extracted.", gr.Slider(value=0, maximum=0, visible=False)

	new_slider = gr.Slider(
	value=0,
	minimum=0,
	maximum=num_frames - 1,
	step=1,
	visible=True,
	interactive=True,
	label=f"Select Reference Frame (0 - {num_frames - 1})"
	)

	return frame_paths[0], frames_dir, [], f"Processed {num_frames} frames (1 FPS). Select target.", new_slider


	def update_canvas_from_slider(frame_idx, frames_dir):
	"""Update the displayed frame when slider changes."""
	if not frames_dir or not os.path.exists(frames_dir):
	return None, []

	filename = f"frame_{int(frame_idx):05d}.jpg"
	path = os.path.join(frames_dir, filename)

	if os.path.exists(path):
	img = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
	return img, []

	return None, []


	def add_point(img, evt: gr.SelectData, points_state):
	"""Add a point annotation to the image."""
	x, y = evt.index
	points_state.append((x, y))

	img_pil = Image.fromarray(img)
	img_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)

	# Draw markers for all points
	for px, py in points_state:
	cv2.drawMarker(
	img_cv, (px, py), (0, 255, 0),
	markerType=cv2.MARKER_TILTED_CROSS,
	markerSize=20,
	thickness=3
	)

	return cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB), points_state


	@spaces.GPU(duration=180)
	def run_unique_segmentation(input_files, points_state, text_prompt, sam_encoder, offload_gpu, cleanup_interval, frame_idx_slider):
	"""
	Run Unique Instance segmentation on the uploaded frames.
	Tracks a specific object identified by points or text description.
	"""
	if not input_files:
	return None, "Error: Process input first."

	# Wait for HF models to be downloaded
	if t_hf.is_alive():
	logger.info("Waiting for HF models download to finish...")
	t_hf.join()

	try:
	logger.info("Processing inputs on GPU node...")
	temp_dir = tempfile.mkdtemp()
	# Re-extract frames to ensure they exist on GPU ephemeral storage
	frames_dir, _ = process_inputs_to_frames(input_files, temp_dir)

	logger.info("Initializing UniqueInstanceSegmenter...")
	segmenter = UniqueInstanceSegmenter(
	sam_encoder=sam_encoder,
	memory_cleanup_interval=int(cleanup_interval),
	device="cuda"
	)

	if offload_gpu:
	segmenter.optimize_cuda_memory()

	annotation_frame = f"frame_{int(frame_idx_slider):05d}.jpg"

	if not os.path.exists(os.path.join(frames_dir, annotation_frame)):
	return None, f"Error: Frame {annotation_frame} not found."

	# Run segmentation based on input type
	if text_prompt.strip():
	logger.info(f"Mode: Text -> {text_prompt}")
	result = segmenter.segment(
	frames_path=frames_dir,
	text=text_prompt,
	annotation_frame=annotation_frame,
	offload_frames_to_gpu=offload_gpu
	)
	else:
	if not points_state:
	return None, "Please add points or text."
	logger.info(f"Mode: Points -> {points_state}")
	result = segmenter.segment(
	frames_path=frames_dir,
	points=points_state,
	annotation_frame=annotation_frame,
	offload_frames_to_gpu=offload_gpu
	)

	output_vid = os.path.join(OUTPUT_BASE_DIR, "unique_output.mp4")
	return create_video_overlay(frames_dir, result.masks, output_vid), f"Completed. {result.num_frames} frames processed."

	except Exception as e:
	import traceback
	traceback.print_exc()
	logger.error(str(e))
	if isinstance(e, gr.Error):
	raise e
	return None, f"Error: {str(e)}"


	# ===========================================
	# GENERIC CATEGORY SEGMENTATION
	# ===========================================
	@spaces.GPU(duration=180)
	def run_generic_segmentation(input_files, category, accept_thresh, reject_thresh, vlm_model_name):
	"""
	Run Generic Category segmentation on the uploaded frames.
	Detects all instances of a specified category using VLM + segmentation.

	IMPORTANT: This function starts Ollama server INSIDE the GPU context,
	ensuring that Ollama can detect and use the GPU for inference.
	"""
	if not input_files:
	return None, "Error: Upload input."

	if not category.strip():
	return None, "Error: Please specify text."

	# Wait for model downloads to complete
	if t_hf.is_alive():
	logger.info("Waiting for HF models download...")
	t_hf.join()
	if t_ollama.is_alive():
	logger.info("Waiting for Ollama models download...")
	t_ollama.join()

	try:
	# =========================================================
	# CRITICAL: Start Ollama INSIDE @spaces.GPU context
	# This ensures Ollama detects and uses the GPU!
	# =========================================================
	logger.info("=" * 50)
	logger.info("Starting Ollama server with GPU support...")
	logger.info("=" * 50)

	ensure_ollama_ready_gpu(vlm_model_name)

	logger.info("Ollama is running with GPU. Processing inputs...")

	# Process input frames
	temp_dir = tempfile.mkdtemp()
	frames_dir, _ = process_inputs_to_frames(input_files, temp_dir)

	logger.info(f"Initializing GenericCategorySegmenter with VLM: {vlm_model_name}")
	segmenter = GenericCategorySegmenter(
	device="cuda",
	vlm_model=vlm_model_name
	)

	logger.info(f"Detecting category: {category}")
	result = segmenter.segment(
	frames_path=frames_dir,
	category=category,
	accept_threshold=accept_thresh,
	reject_threshold=reject_thresh,
	save_debug=False
	)

	output_vid = os.path.join(OUTPUT_BASE_DIR, "generic_output.mp4")
	total_detections = sum(len(d) for d in result.metadata['detections'].values())

	return create_video_overlay(frames_dir, result.masks, output_vid), f"Completed! Total detections: {total_detections}"

	except Exception as e:
	import traceback
	traceback.print_exc()
	logger.error(f"Generic segmentation error: {e}")
	if isinstance(e, gr.Error):
	raise e
	return None, f"Error: {e}"


	# ===========================================
	# GRADIO UI
	# ===========================================
	with gr.Blocks(title="ENEAS: Embedding-guided Neural Ensemble for Adaptive Segmentation") as demo:
	gr.Markdown(
	f"""
	# ⚡ ENEAS: Embedding-guided Neural Ensemble for Adaptive Segmentation

	⚠️ IMPORTANT LIMITS:
	- Maximum {MAX_FRAMES} FRAMES to prevent ZeroGPU timeouts
	- Videos are sampled at 1 FPS → Max {MAX_FRAMES} seconds of video
	- Exceeding these limits will stop execution

	"""
	)

	with gr.Tabs():
	# ===========================================
	# TAB 1: UNIQUE INSTANCE SEGMENTATION
	# ===========================================
	with gr.Tab("🎯 Unique Instance"):
	gr.Markdown("Track a specific object. Upload Video (1 FPS extraction) OR Images.")

	with gr.Row():
	with gr.Column(scale=1):
	u_file = gr.File(
	label="Input (Video or Images)",
	file_count="multiple",
	file_types=["video", "image"]
	)
	u_btn_proc = gr.Button("▶️ 1. Process Input (Extract 1 FPS)", variant="secondary")
	u_slider = gr.Slider(label="Frame Selector", visible=False)

	with gr.Accordion("Advanced Options", open=False):
	u_enc = gr.Dropdown(
	["long-large", "long-small"],
	value="long-large",
	label="SAM2 Encoder"
	)
	u_offload = gr.Checkbox(label="GPU Memory Offload", value=False)

	with gr.Column(scale=2):
	u_path_frames_cpu = gr.Textbox(visible=False)
	points_state = gr.State([])

	u_img = gr.Image(
	label="Reference Frame (Click to add points)",
	interactive=True
	)
	u_txt = gr.Textbox(
	label="Text Description (Grounding)",
	placeholder="Points are ignored if text is provided."
	)
	u_btn_run = gr.Button("🚀 2. Run Segmentation", variant="primary")
	u_out = gr.Video(label="Result")
	u_status = gr.Textbox(label="Status", interactive=False)

	# Event handlers
	u_btn_proc.click(
	process_unique_upload,
	[u_file],
	[u_img, u_path_frames_cpu, points_state, u_status, u_slider]
	)
	u_slider.change(
	update_canvas_from_slider,
	inputs=[u_slider, u_path_frames_cpu],
	outputs=[u_img, points_state]
	)
	u_img.select(add_point, [u_img, points_state], [u_img, points_state])
	u_btn_run.click(
	run_unique_segmentation,
	[u_file, points_state, u_txt, u_enc, u_offload, gr.Number(10, visible=False), u_slider],
	[u_out, u_status]
	)

	# Example for Unique Instance
	gr.Examples(
	examples=[
	[["examples/reporter.mp4"], "blonde woman with microphone"]
	],
	inputs=[u_file, u_txt],
	label="Example"
	)

	# ===========================================
	# TAB 2: GENERIC CATEGORY SEGMENTATION
	# ===========================================
	with gr.Tab("🔮 Generic Text"):
	gr.Markdown(
	f"""
	Detect all instances of a text prompt in every frame (Max {MAX_FRAMES} frames).

	🚀 GPU-Accelerated: Ollama VLM runs on GPU for fast inference.
	First request includes ~15-20s server startup time.
	"""
	)

	with gr.Row():
	g_file = gr.File(
	label="Input (Video or Images)",
	file_count="multiple",
	file_types=["video", "image"]
	)
	g_cat = gr.Textbox(
	label="Text prompt",
	placeholder="e.g., person, chair, car, dog"
	)
	g_btn = gr.Button("🔍 Run Detection", variant="primary")

	with gr.Accordion("Detection Settings", open=True):
	g_accept = gr.Slider(
	0.0, 1.0,
	value=0.30,
	label="Accept Threshold",
	info="Higher = more confident detections only"
	)
	g_reject = gr.Slider(
	0.0, 1.0,
	value=0.1,
	label="Reject Threshold",
	info="Lower = filter out more false positives"
	)
	g_vlm = gr.Dropdown(
	choices=VLM_MODELS,
	value=VLM_MODELS[0],
	label="VLM Model",
	info="Larger models are more accurate but slower"
	)

	g_out = gr.Video(label="Result")
	g_stat = gr.Textbox(label="Detection Statistics", interactive=False)

	g_btn.click(
	run_generic_segmentation,
	[g_file, g_cat, g_accept, g_reject, g_vlm],
	[g_out, g_stat]
	)

	# Example for Generic Category
	gr.Examples(
	examples=[
	[["examples/moving.mp4"], "person", 0.3, 0.1, "qwen3-vl:4b-instruct-q8_0"]
	],
	inputs=[g_file, g_cat, g_accept, g_reject, g_vlm],
	label="Example"
	)


	# ===========================================
	# MAIN ENTRY POINT
	# ===========================================
	if __name__ == "__main__":
	demo.launch()