Spaces:

pyronear
/

Pyronear-Wildfire-Detection

Build error

App Files Files Community

Pyronear-Wildfire-Detection / app.py

Mateo

filter detection by filter

5bd4325 20 days ago

raw

history blame contribute delete

49.2 kB

	import logging
	import os
	import shutil
	import subprocess
	import tempfile
	import time
	from hashlib import sha1
	from collections import deque
	from contextlib import contextmanager

	import cv2
	import numpy as np
	import streamlit as st
	from PIL import Image, ImageDraw

	from vision import Classifier
	from utils import box_iou, nms


	LOGGER = logging.getLogger(__name__)


	PYRONEAR_LOGO_URL = (
	"https://raw.githubusercontent.com/pyronear/pyro-engine/develop/docs/source/_static/img/pyronear-logo-dark.png"
	)


	DEFAULT_SPLIT_CFG = {
	"n_samples": 16,
	"max_w": 400,
	"crop_y": (0.25, 0.90),
	"dx_threshold_px": 1.5,
	"min_inlier_ratio": 0.20,
	"min_stable_frames": 2,
	"smooth_window": 2,
	"orb_nfeatures": 800,
	"orb_fast_threshold": 12,
	"min_matches": 25,
	"keep_ratio": 0.4,
	"jump_meanabs_threshold": 18.0,
	"progress_every": 0,
	}
	ENABLE_MOTION_SEGMENTATION = os.getenv("ENABLE_MOTION_SEGMENTATION", "0").strip().lower() in {
	"1",
	"true",
	"yes",
	"on",
	}
	FAST_N_SAMPLES = max(1, int(os.getenv("FAST_N_SAMPLES", "12")))
	INFER_BATCH_SIZE = max(1, int(os.getenv("INFER_BATCH_SIZE", "16")))
	MODEL_IMGSZ = max(320, int(os.getenv("MODEL_IMGSZ", "1024")))
	MAX_INFER_FRAMES_PER_SPLIT = max(0, int(os.getenv("MAX_INFER_FRAMES_PER_SPLIT", "12")))
	MIN_MAIN_MATCH_ABS = max(1, int(os.getenv("MIN_MAIN_MATCH_ABS", "3")))
	MIN_MAIN_MATCH_RATIO = float(os.getenv("MIN_MAIN_MATCH_RATIO", "0.20"))
	MAIN_DET_MATCH_IOU_THRESHOLD = float(os.getenv("MAIN_DET_MATCH_IOU_THRESHOLD", "0.12"))
	MIN_COMBINED_MEDIAN_CONF = float(os.getenv("MIN_COMBINED_MEDIAN_CONF", "0.12"))
	DISPLAY_DET_MATCH_IOU_THRESHOLD = float(os.getenv("DISPLAY_DET_MATCH_IOU_THRESHOLD", "0.0"))


	def _log_timing_summary(label, stats, wall_time=None, max_items=12):
	if not stats:
	LOGGER.info("%s timing \| no data", label)
	return

	entries = sorted(
	((name, float(value)) for name, value in stats.items() if value is not None),
	key=lambda item: item[1],
	reverse=True,
	)
	if wall_time is None:
	wall_time = stats.get("wall")

	step_entries = [(name, sec) for name, sec in entries if name != "wall"]
	parts = []
	if wall_time is not None:
	parts.append(f"wall={float(wall_time):.3f}s")
	for name, sec in step_entries[:max_items]:
	if wall_time and wall_time > 0:
	parts.append(f"{name}={sec:.3f}s ({(100.0 * sec / float(wall_time)):.1f}%)")
	else:
	parts.append(f"{name}={sec:.3f}s")
	remaining = max(0, len(step_entries) - max_items)
	if remaining:
	parts.append(f"+{remaining} more")

	LOGGER.info("%s timing \| %s", label, " \| ".join(parts))


	def _sample_indices(total, n):
	if total <= 0:
	return []
	if total <= n:
	return list(range(total))
	return np.linspace(0, total - 1, n).astype(int).tolist()


	def _format_idx_list(indices, max_items=40):
	if not indices:
	return "[]"
	values = [int(i) for i in indices]
	if len(values) <= max_items:
	return str(values)
	head = values[: max_items // 2]
	tail = values[-(max_items // 2) :]
	return f"{head} ... {tail} (len={len(values)})"


	def _sample_uniform_items(items, n):
	n = max(1, int(n))
	if len(items) <= n:
	return items
	indices = np.linspace(0, len(items) - 1, n).astype(int).tolist()
	return [items[i] for i in indices]


	def _parse_fraction(value):
	if not value:
	return None
	txt = str(value).strip()
	if not txt or txt == "0/0":
	return None
	if "/" in txt:
	num, den = txt.split("/", 1)
	try:
	den_f = float(den)
	if den_f == 0:
	return None
	return float(num) / den_f
	except Exception:
	return None
	try:
	return float(txt)
	except Exception:
	return None


	def _probe_total_frames_ffprobe(video_path):
	ffprobe = shutil.which("ffprobe")
	if ffprobe is None:
	return None

	timing = {}
	wall_t0 = time.perf_counter()
	video_name = os.path.basename(video_path)

	# Try direct frame count first.
	cmd = [
	ffprobe,
	"-v",
	"error",
	"-select_streams",
	"v:0",
	"-show_entries",
	"stream=nb_frames",
	"-of",
	"default=noprint_wrappers=1:nokey=1",
	video_path,
	]
	with timer("ffprobe_nb_frames", timing):
	proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
	if proc.returncode == 0:
	raw = proc.stdout.strip()
	if raw.isdigit():
	val = int(raw)
	if val > 0:
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"ffprobe ({video_name})", timing, wall_time=timing["wall"])
	return val

	# Fallback: estimate from duration * avg frame rate.
	cmd = [
	ffprobe,
	"-v",
	"error",
	"-select_streams",
	"v:0",
	"-show_entries",
	"stream=avg_frame_rate,duration",
	"-of",
	"default=noprint_wrappers=1:nokey=1",
	video_path,
	]
	with timer("ffprobe_fps_duration", timing):
	proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
	if proc.returncode != 0:
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"ffprobe ({video_name})", timing, wall_time=timing["wall"])
	return None

	lines = [line.strip() for line in proc.stdout.splitlines() if line.strip()]
	if len(lines) < 2:
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"ffprobe ({video_name})", timing, wall_time=timing["wall"])
	return None

	fps = _parse_fraction(lines[0])
	duration = _parse_fraction(lines[1])
	if fps is None or duration is None:
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"ffprobe ({video_name})", timing, wall_time=timing["wall"])
	return None

	estimate = int(round(fps * duration))
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"ffprobe ({video_name})", timing, wall_time=timing["wall"])
	return estimate if estimate > 0 else None


	def _probe_duration_ffprobe(video_path):
	ffprobe = shutil.which("ffprobe")
	if ffprobe is None:
	return None

	cmd = [
	ffprobe,
	"-v",
	"error",
	"-show_entries",
	"format=duration",
	"-of",
	"default=noprint_wrappers=1:nokey=1",
	video_path,
	]
	proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
	if proc.returncode != 0:
	return None

	lines = [line.strip() for line in proc.stdout.splitlines() if line.strip()]
	if not lines:
	return None
	duration = _parse_fraction(lines[0])
	if duration is None or duration <= 0:
	return None
	return float(duration)


	def _probe_video_size_ffprobe(video_path):
	ffprobe = shutil.which("ffprobe")
	if ffprobe is None:
	return None

	cmd = [
	ffprobe,
	"-v",
	"error",
	"-select_streams",
	"v:0",
	"-show_entries",
	"stream=width,height",
	"-of",
	"csv=p=0:s=x",
	video_path,
	]
	proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
	if proc.returncode != 0:
	return None

	line = next((txt.strip() for txt in proc.stdout.splitlines() if txt.strip()), "")
	if "x" not in line:
	return None
	left, right = line.split("x", 1)
	if not left.isdigit() or not right.isdigit():
	return None

	width, height = int(left), int(right)
	if width <= 0 or height <= 0:
	return None
	return width, height


	def _extract_bgr_with_ffmpeg_disk(video_path, n):
	ffmpeg = shutil.which("ffmpeg")
	if ffmpeg is None:
	raise RuntimeError("ffmpeg is not available")

	timing = {}
	wall_t0 = time.perf_counter()
	video_name = os.path.basename(video_path)

	with timer("probe_total_frames", timing):
	total = _probe_total_frames_ffprobe(video_path)
	if total is None or total <= 0:
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	raise RuntimeError("ffprobe could not determine total frame count")

	with timer("sample_indices", timing):
	indices = _sample_indices(total, int(n))
	if not indices:
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	return []

	LOGGER.info(
	"Frame extraction \| video=%s total_frames=%d n_samples=%d sampled_indices=%s",
	os.path.basename(video_path),
	total,
	len(indices),
	_format_idx_list(indices),
	)

	select_expr = "+".join(f"eq(n\\,{int(i)})" for i in indices)
	vf = f"select={select_expr}"

	with tempfile.TemporaryDirectory(prefix="ffmpeg_frames_") as tmpdir:
	pattern = os.path.join(tmpdir, "frame_%06d.jpg")
	cmd = [
	ffmpeg,
	"-hide_banner",
	"-loglevel",
	"error",
	"-i",
	video_path,
	"-vf",
	vf,
	"-vsync",
	"vfr",
	"-q:v",
	"2",
	pattern,
	]
	with timer("ffmpeg_extract", timing):
	proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
	if proc.returncode != 0:
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	raise RuntimeError(proc.stderr.strip() or "ffmpeg extraction failed")

	frames = []
	with timer("read_extracted_images", timing):
	for name in sorted(os.listdir(tmpdir)):
	if not name.lower().endswith(".jpg"):
	continue
	frame = cv2.imread(os.path.join(tmpdir, name), cv2.IMREAD_COLOR)
	if frame is not None:
	frames.append(frame)
	LOGGER.info(
	"Frame extraction done \| video=%s extracted=%d requested=%d",
	os.path.basename(video_path),
	len(frames),
	len(indices),
	)
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	return frames


	def _extract_bgr_with_ffmpeg(video_path, n):
	ffmpeg = shutil.which("ffmpeg")
	if ffmpeg is None:
	raise RuntimeError("ffmpeg is not available")

	n = max(1, int(n))
	timing = {}
	wall_t0 = time.perf_counter()
	video_name = os.path.basename(video_path)

	with timer("probe_duration", timing):
	duration = _probe_duration_ffprobe(video_path)
	if duration is None or duration <= 0:
	LOGGER.warning("Frame extraction \| ffprobe duration unavailable, fallback to disk extraction")
	with timer("fallback_disk_extract", timing):
	frames = _extract_bgr_with_ffmpeg_disk(video_path, n)
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	return frames

	with timer("probe_video_size", timing):
	video_size = _probe_video_size_ffprobe(video_path)
	if video_size is None:
	LOGGER.warning("Frame extraction \| ffprobe size unavailable, fallback to disk extraction")
	with timer("fallback_disk_extract", timing):
	frames = _extract_bgr_with_ffmpeg_disk(video_path, n)
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	return frames

	width, height = video_size
	frame_size = int(width) * int(height) * 3
	if frame_size <= 0:
	LOGGER.warning("Frame extraction \| invalid frame size, fallback to disk extraction")
	with timer("fallback_disk_extract", timing):
	frames = _extract_bgr_with_ffmpeg_disk(video_path, n)
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	return frames

	sample_fps = max(1e-6, float(n) / float(duration))

	LOGGER.info(
	"Frame extraction (single ffmpeg/rawvideo) \| video=%s duration=%.3fs n_samples=%d fps=%.6f size=%dx%d",
	video_name,
	duration,
	n,
	sample_fps,
	width,
	height,
	)

	cmd = [
	ffmpeg,
	"-hide_banner",
	"-loglevel",
	"error",
	"-i",
	video_path,
	"-vf",
	f"fps={sample_fps:.8f}",
	"-frames:v",
	str(n),
	"-f",
	"rawvideo",
	"-pix_fmt",
	"bgr24",
	"-",
	]
	with timer("ffmpeg_extract_rawvideo", timing):
	proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False)
	if proc.returncode != 0 or not proc.stdout:
	LOGGER.warning(
	"Frame extraction rawvideo failed \| video=%s err=%s",
	video_name,
	(proc.stderr.decode("utf-8", errors="ignore").strip() if proc.stderr else "no stderr"),
	)
	with timer("fallback_disk_extract", timing):
	frames = _extract_bgr_with_ffmpeg_disk(video_path, n)
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	return frames

	with timer("decode_rawvideo", timing):
	raw = proc.stdout
	frame_count = len(raw) // frame_size
	usable_bytes = frame_count * frame_size
	if frame_count > 0 and usable_bytes:
	arr = np.frombuffer(raw[:usable_bytes], dtype=np.uint8).reshape(frame_count, height, width, 3)
	frames = [arr[idx].copy() for idx in range(frame_count)]
	else:
	frames = []
	if len(frames) > n:
	frames = _sample_uniform_items(frames, n)

	if not frames:
	LOGGER.warning("Frame extraction \| rawvideo mode returned 0 frame, fallback to disk extraction")
	with timer("fallback_disk_extract", timing):
	frames = _extract_bgr_with_ffmpeg_disk(video_path, n)

	LOGGER.info(
	"Frame extraction done \| video=%s extracted=%d requested=%d",
	video_name,
	len(frames),
	n,
	)
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary(f"Frame extraction ({video_name})", timing, wall_time=timing["wall"])
	return frames


	def _extract_with_ffmpeg(video_path, n):
	timing = {}
	wall_t0 = time.perf_counter()
	with timer("extract_bgr", timing):
	frames = _extract_bgr_with_ffmpeg(video_path, n)
	with timer("bgr_to_pil", timing):
	pil_frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in frames]
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("Extract with ffmpeg", timing, wall_time=timing["wall"])
	return pil_frames


	def split_video(video_path, n=8):
	if not video_path or not os.path.exists(video_path):
	return []
	timing = {}
	wall_t0 = time.perf_counter()
	with timer("extract_with_ffmpeg", timing):
	frames = _extract_with_ffmpeg(video_path, n)
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("split_video", timing, wall_time=timing["wall"])
	return frames


	@contextmanager
	def timer(name, stats):
	t0 = time.perf_counter()
	yield
	stats[name] = stats.get(name, 0.0) + (time.perf_counter() - t0)


	def _iter_sampled_frames(video_path, n_samples, sampled_frames=None):
	timing = {}
	wall_t0 = time.perf_counter()
	if sampled_frames is None:
	with timer("extract_bgr_with_ffmpeg", timing):
	frames = _extract_bgr_with_ffmpeg(video_path, int(n_samples))
	else:
	with timer("reuse_sampled_frames", timing):
	frames = sampled_frames
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("Iter sampled frames", timing, wall_time=timing["wall"])
	for out_idx, frame in enumerate(frames):
	yield out_idx, frame


	def iter_frames(video_path, n_samples, max_w, crop_y, sampled_frames=None):
	timing = {"resize": 0.0, "crop": 0.0}
	wall_t0 = time.perf_counter()
	frame_count = 0
	try:
	for out_idx, frame in _iter_sampled_frames(video_path, n_samples, sampled_frames=sampled_frames):
	frame_count += 1
	proc = frame
	if max_w > 0 and proc.shape[1] != max_w:
	t_resize = time.perf_counter()
	scale = max_w / float(proc.shape[1])
	proc = cv2.resize(
	proc,
	(max_w, int(proc.shape[0] * scale)),
	interpolation=cv2.INTER_AREA,
	)
	timing["resize"] += time.perf_counter() - t_resize

	if crop_y is not None:
	t_crop = time.perf_counter()
	h = proc.shape[0]
	y0 = int(max(0.0, min(1.0, float(crop_y[0]))) * h)
	y1 = int(max(0.0, min(1.0, float(crop_y[1]))) * h)
	if y1 > y0:
	proc = proc[y0:y1, :]
	timing["crop"] += time.perf_counter() - t_crop

	yield out_idx, proc
	finally:
	timing["wall"] = time.perf_counter() - wall_t0
	LOGGER.info(
	"iter_frames summary \| n_samples=%d yielded=%d max_w=%d crop_y=%s",
	int(n_samples),
	frame_count,
	int(max_w),
	crop_y,
	)
	_log_timing_summary("iter_frames", timing, wall_time=timing["wall"])


	def quick_jump_score(prev_gray, gray, small_w=160):
	h, w = prev_gray.shape[:2]
	if w > small_w:
	scale = small_w / float(w)
	prev_s = cv2.resize(prev_gray, (small_w, int(h * scale)), interpolation=cv2.INTER_AREA)
	gray_s = cv2.resize(gray, (small_w, int(h * scale)), interpolation=cv2.INTER_AREA)
	else:
	prev_s = prev_gray
	gray_s = gray

	diff = cv2.absdiff(prev_s, gray_s)
	return float(np.mean(diff))


	def estimate_dx_orb_affine(prev_gray, gray, orb, bf, min_matches, keep_ratio, timing_pair):
	with timer("orb_detect_compute", timing_pair):
	kp1, des1 = orb.detectAndCompute(prev_gray, None)
	kp2, des2 = orb.detectAndCompute(gray, None)

	if des1 is None or des2 is None or len(kp1) < 8 or len(kp2) < 8:
	return None

	with timer("bf_match", timing_pair):
	matches = bf.match(des1, des2)

	if len(matches) < min_matches:
	return None

	with timer("match_sort_filter", timing_pair):
	matches = sorted(matches, key=lambda m: m.distance)
	keep_n = max(8, int(len(matches) * keep_ratio))
	matches = matches[:keep_n]

	pts1 = np.float32([kp1[m.queryIdx].pt for m in matches])
	pts2 = np.float32([kp2[m.trainIdx].pt for m in matches])

	with timer("ransac_affine", timing_pair):
	M, inliers = cv2.estimateAffinePartial2D(
	pts1,
	pts2,
	method=cv2.RANSAC,
	ransacReprojThreshold=3.0,
	maxIters=1500,
	confidence=0.99,
	)

	if M is None:
	return None

	dx = float(M[0, 2])
	dy = float(M[1, 2])
	inlier_ratio = float(np.mean(inliers)) if inliers is not None else 0.0

	return {
	"dx": dx,
	"dy": dy,
	"score_dx": float(abs(dx)),
	"score_px": float(np.hypot(dx, dy)),
	"inlier_ratio": inlier_ratio,
	"matches": len(matches),
	"M": M,
	}


	def split_video_into_stable_segments_fast(
	video_path,
	n_samples=16,
	max_w=400,
	crop_y=(0.25, 0.90),
	dx_threshold_px=1.5,
	min_inlier_ratio=0.20,
	min_stable_frames=2,
	smooth_window=2,
	orb_nfeatures=800,
	orb_fast_threshold=12,
	min_matches=25,
	keep_ratio=0.4,
	jump_meanabs_threshold=18.0,
	progress_every=200,
	sampled_frames=None,
	):
	wall_t0 = time.perf_counter()
	timing_total = {}
	timing_pair = {}

	with timer("setup", timing_total):
	orb = cv2.ORB_create(nfeatures=orb_nfeatures, fastThreshold=orb_fast_threshold)
	bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

	metrics = []
	prev_gray = None
	frame_count = 0

	with timer("loop_total", timing_total):
	for _, frame in iter_frames(
	video_path,
	n_samples=n_samples,
	max_w=max_w,
	crop_y=crop_y,
	sampled_frames=sampled_frames,
	):
	frame_count += 1

	with timer("to_gray", timing_total):
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

	if prev_gray is not None:
	with timer("quick_jump", timing_total):
	q = quick_jump_score(prev_gray, gray)

	if q >= jump_meanabs_threshold:
	metrics.append(
	{
	"dx": np.nan,
	"dy": np.nan,
	"score_dx": 1e9,
	"score_px": 1e9,
	"inlier_ratio": 0.0,
	"matches": 0,
	"M": None,
	"quick_jump": q,
	}
	)
	else:
	m = estimate_dx_orb_affine(
	prev_gray,
	gray,
	orb=orb,
	bf=bf,
	min_matches=min_matches,
	keep_ratio=keep_ratio,
	timing_pair=timing_pair,
	)
	if m is None:
	metrics.append(
	{
	"dx": np.nan,
	"dy": np.nan,
	"score_dx": 1e9,
	"score_px": 1e9,
	"inlier_ratio": 0.0,
	"matches": 0,
	"M": None,
	"quick_jump": q,
	}
	)
	else:
	m["quick_jump"] = q
	metrics.append(m)

	if progress_every and (len(metrics) % progress_every == 0):
	print(f"processed pairs: {len(metrics)}")

	prev_gray = gray

	if frame_count < 2:
	timing_total["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("Segmentation total", timing_total, wall_time=timing_total["wall"])
	if timing_pair:
	_log_timing_summary(
	"Segmentation pair internals",
	timing_pair,
	wall_time=max(timing_total.get("loop_total", 0.0), 1e-9),
	)
	return [], metrics, [], {"total": timing_total, "per_pair": timing_pair}

	with timer("post_smooth", timing_total):
	raw_dx = [m["score_dx"] for m in metrics]
	raw_inlier = [m["inlier_ratio"] for m in metrics]

	smoothed_dx = []
	q = deque(maxlen=max(1, int(smooth_window)))
	for v in raw_dx:
	if not np.isfinite(v):
	q.clear()
	smoothed_dx.append(np.nan)
	else:
	q.append(v)
	smoothed_dx.append(float(np.mean(q)))

	with timer("post_segments", timing_total):
	min_len = max(1, int(min_stable_frames))

	stable_flags = []
	for dx_s, r in zip(smoothed_dx, raw_inlier):
	if not np.isfinite(dx_s):
	stable_flags.append(False)
	else:
	stable_flags.append((dx_s < dx_threshold_px) and (r >= min_inlier_ratio))

	segments = []
	start = None
	for i, is_stable in enumerate(stable_flags):
	if is_stable and start is None:
	start = i
	if (not is_stable) and start is not None:
	end = i
	if (end - start) >= min_len:
	segments.append((start, end))
	start = None

	if start is not None:
	end = len(stable_flags)
	if (end - start) >= min_len:
	segments.append((start, end))

	LOGGER.info(
	"Segmentation summary \| sampled_frames=%d pair_metrics=%d stable_segments=%d",
	frame_count,
	len(metrics),
	len(segments),
	)
	if segments:
	LOGGER.info("Segment ranges (sample indices) \| %s", segments)
	timing_total["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("Segmentation total", timing_total, wall_time=timing_total["wall"])
	if timing_pair:
	_log_timing_summary(
	"Segmentation pair internals",
	timing_pair,
	wall_time=max(timing_total.get("loop_total", 0.0), 1e-9),
	)

	return segments, metrics, smoothed_dx, {"total": timing_total, "per_pair": timing_pair}


	def _bgr_to_pil(frame):
	return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))


	def extract_segment_frames(video_path, segments, n_samples, sampled_frames=None):
	timing = {}
	wall_t0 = time.perf_counter()

	if not segments:
	LOGGER.info("Segment frame extraction \| no segments found")
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("Segment frame extraction", timing, wall_time=timing["wall"])
	return []

	with timer("normalize_segments", timing):
	normalized_segments = []
	for start, end in segments:
	s = max(0, int(start))
	e = max(s, int(end))
	normalized_segments.append((s, e))

	with timer("prepare_groups", timing):
	normalized_segments.sort(key=lambda x: x[0])
	grouped_frames = [[] for _ in normalized_segments]
	grouped_indices = [[] for _ in normalized_segments]
	segment_idx = 0

	# Detection runs on original sampled frames (no resize / no crop).
	to_pil_time = 0.0
	with timer("assign_frames_to_segments", timing):
	for frame_idx, frame in _iter_sampled_frames(video_path, n_samples=n_samples, sampled_frames=sampled_frames):
	while segment_idx < len(normalized_segments) and frame_idx > normalized_segments[segment_idx][1]:
	segment_idx += 1

	if segment_idx >= len(normalized_segments):
	break

	seg_start, seg_end = normalized_segments[segment_idx]
	if seg_start <= frame_idx <= seg_end:
	t_pil = time.perf_counter()
	grouped_frames[segment_idx].append(_bgr_to_pil(frame))
	to_pil_time += time.perf_counter() - t_pil
	grouped_indices[segment_idx].append(frame_idx)
	timing["to_pil"] = to_pil_time

	LOGGER.info(
	"Segment frame extraction summary \| segments=%d n_samples=%d",
	len(normalized_segments),
	n_samples,
	)
	for seg_i, ((seg_start, seg_end), idx_list, frames) in enumerate(
	zip(normalized_segments, grouped_indices, grouped_frames),
	start=1,
	):
	LOGGER.info(
	"Segment %d \| requested_range=[%d,%d] matched_frames=%d matched_indices=%s",
	seg_i,
	seg_start,
	seg_end,
	len(frames),
	_format_idx_list(idx_list),
	)

	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("Segment frame extraction", timing, wall_time=timing["wall"])
	return [frames for frames in grouped_frames if frames]


	def split_video_stable(video_path, split_cfg=None, fallback_n=16):
	if not video_path or not os.path.exists(video_path):
	return []

	timing = {}
	wall_t0 = time.perf_counter()
	cfg = DEFAULT_SPLIT_CFG.copy()
	if split_cfg:
	cfg.update(split_cfg)

	LOGGER.info("Split config \| %s", cfg)

	with timer("extract_sampled_frames", timing):
	sampled_frames = _extract_bgr_with_ffmpeg(video_path, int(cfg["n_samples"]))

	with timer("split_video_into_stable_segments_fast", timing):
	segments, _, _, _ = split_video_into_stable_segments_fast(video_path, sampled_frames=sampled_frames, **cfg)
	with timer("extract_segment_frames", timing):
	frame_groups = extract_segment_frames(
	video_path,
	segments,
	n_samples=cfg["n_samples"],
	sampled_frames=sampled_frames,
	)

	if frame_groups:
	LOGGER.info(
	"Split result \| stable_splits=%d split_frame_counts=%s",
	len(frame_groups),
	[len(group) for group in frame_groups],
	)
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("split_video_stable", timing, wall_time=timing["wall"])
	return frame_groups

	LOGGER.info("Split result \| no stable segment, using fallback sampling n=%d", fallback_n)
	if int(fallback_n) == int(cfg["n_samples"]):
	with timer("fallback_reuse_sampled_frames", timing):
	fallback_frames = [_bgr_to_pil(frame) for frame in sampled_frames]
	else:
	with timer("fallback_split_video", timing):
	fallback_frames = split_video(video_path, n=fallback_n)
	LOGGER.info("Fallback frame count \| %d", len(fallback_frames))
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("split_video_stable", timing, wall_time=timing["wall"])
	return [fallback_frames] if fallback_frames else []


	@st.cache_resource(show_spinner=False)
	def _load_model():
	model_t0 = time.perf_counter()
	clf = Classifier(format="onnx", conf=0.05, imgsz=MODEL_IMGSZ)
	LOGGER.info("Model init timing \| wall=%.3fs", time.perf_counter() - model_t0)
	LOGGER.info("Model config \| imgsz=%d", MODEL_IMGSZ)
	return clf


	model = _load_model()


	def _resolve_video_path(video_input):
	if not video_input:
	return None
	if isinstance(video_input, str):
	return video_input
	if isinstance(video_input, dict):
	for key in ("name", "path", "data", "video"):
	value = video_input.get(key)
	if isinstance(value, str) and os.path.exists(value):
	return value
	if isinstance(video_input, (list, tuple)):
	for value in video_input:
	if isinstance(value, str) and os.path.exists(value):
	return value
	return None


	def _draw_detections(pil_img, preds, subtitle=None):
	img = pil_img.copy()
	draw = ImageDraw.Draw(img)
	width, height = img.size
	color = (255, 80, 0)
	preds = np.asarray(preds)

	for x1, y1, x2, y2, conf in preds:
	x1 = int(max(0.0, min(1.0, float(x1))) * width)
	y1 = int(max(0.0, min(1.0, float(y1))) * height)
	x2 = int(max(0.0, min(1.0, float(x2))) * width)
	y2 = int(max(0.0, min(1.0, float(y2))) * height)
	draw.rectangle([x1, y1, x2, y2], outline=color, width=3)
	draw.text((x1 + 4, y1 + 4), f"{conf:.2f}", fill=color)

	draw.text((6, 6), f"detections : {len(preds)}", fill=color)
	if subtitle:
	draw.text((6, 26), subtitle, fill=color)
	return img


	def _combine_predictions_per_split(frame_preds):
	n_frames = len(frame_preds)
	if n_frames == 0:
	return []

	boxes = np.zeros((0, 5), dtype=np.float64)
	for bbox in frame_preds:
	if bbox.size > 0:
	boxes = np.vstack([boxes, bbox])

	if boxes.size == 0:
	return []

	main_bboxes = np.asarray(nms(boxes), dtype=np.float64)
	if main_bboxes.size == 0:
	return []

	n_main = len(main_bboxes)
	matches_per_main = np.zeros(n_main, dtype=int)
	conf_max_per_main = np.zeros(n_main, dtype=np.float64)
	matched_conf_values_per_main = [[] for _ in range(n_main)]
	matched_frame_indices_per_main = [[] for _ in range(n_main)]
	first_match_frame_idx_per_main = [None for _ in range(n_main)]
	first_match_bbox_per_main = [None for _ in range(n_main)]
	for frame_idx, bbox in enumerate(frame_preds):
	if bbox.size == 0:
	continue
	ious = box_iou(bbox[:, :4], main_bboxes[:, :4])
	match_mask = ious >= MAIN_DET_MATCH_IOU_THRESHOLD
	has_match = match_mask.any(axis=1)
	matches_per_main += has_match.astype(int)
	if np.any(has_match):
	# Keep only one bbox per frame for each main bbox (best IoU among matches).
	masked_ious = np.where(match_mask, ious, -1.0)
	best_idx_per_main = np.argmax(masked_ious, axis=1)
	best_conf_per_main = bbox[best_idx_per_main, 4].astype(np.float64)
	matched_conf = np.where(has_match, best_conf_per_main, 0.0)
	conf_max_per_main = np.maximum(conf_max_per_main, matched_conf)
	for main_idx in np.flatnonzero(has_match):
	matched_conf_values_per_main[main_idx].append(float(best_conf_per_main[main_idx]))
	matched_frame_indices_per_main[main_idx].append(int(frame_idx))
	if first_match_frame_idx_per_main[main_idx] is None:
	first_match_frame_idx_per_main[main_idx] = int(frame_idx)
	first_match_bbox_per_main[main_idx] = np.asarray(
	bbox[int(best_idx_per_main[main_idx])], dtype=np.float64
	).copy()

	required_matches = max(MIN_MAIN_MATCH_ABS, int(np.ceil(float(MIN_MAIN_MATCH_RATIO) * n_frames)))
	keep_main = matches_per_main >= required_matches
	if not np.any(keep_main):
	return []

	kept = []
	for idx in np.flatnonzero(keep_main):
	match_count = int(matches_per_main[idx])
	matched_conf_values = matched_conf_values_per_main[idx]
	median_conf = (
	float(np.median(np.asarray(matched_conf_values, dtype=np.float64))) if matched_conf_values else 0.0
	)
	if median_conf < MIN_COMBINED_MEDIAN_CONF:
	LOGGER.info(
	(
	"Combine drop candidate \| matches=%d/%d (required=%d) \| "
	"median_conf=%.2f < min_combined_median_conf=%.2f"
	),
	match_count,
	n_frames,
	required_matches,
	median_conf,
	MIN_COMBINED_MEDIAN_CONF,
	)
	continue
	kept.append(
	{
	"box": main_bboxes[idx],
	"match_count": match_count,
	"n_frames": int(n_frames),
	"required_matches": int(required_matches),
	"match_ratio": float(match_count / max(n_frames, 1)),
	"median_conf": median_conf,
	"max_conf": float(conf_max_per_main[idx]),
	"matched_conf_values": matched_conf_values,
	"matched_frame_indices": matched_frame_indices_per_main[idx],
	"first_match_frame_idx": first_match_frame_idx_per_main[idx],
	"first_match_bbox": first_match_bbox_per_main[idx],
	}
	)
	return kept


	def infer(video_file):
	timing = {}
	wall_t0 = time.perf_counter()

	with timer("resolve_video_path", timing):
	video_path = _resolve_video_path(video_file)
	LOGGER.info("Inference start \| video=%s", video_path)
	LOGGER.info(
	(
	"Inference config \| batch_size=%d motion_segmentation=%s fast_n_samples=%d "
	"max_infer_frames_per_split=%d min_main_match_abs=%d min_main_match_ratio=%.2f "
	"main_det_match_iou_threshold=%.2f min_combined_median_conf=%.2f "
	"display_det_match_iou_threshold=%.2f"
	),
	INFER_BATCH_SIZE,
	ENABLE_MOTION_SEGMENTATION,
	FAST_N_SAMPLES,
	MAX_INFER_FRAMES_PER_SPLIT,
	MIN_MAIN_MATCH_ABS,
	MIN_MAIN_MATCH_RATIO,
	MAIN_DET_MATCH_IOU_THRESHOLD,
	MIN_COMBINED_MEDIAN_CONF,
	DISPLAY_DET_MATCH_IOU_THRESHOLD,
	)
	with timer("prepare_splits", timing):
	if ENABLE_MOTION_SEGMENTATION:
	split_frames = split_video_stable(video_path)
	else:
	fast_frames = split_video(video_path, n=FAST_N_SAMPLES)
	split_frames = [fast_frames] if fast_frames else []
	total_frames = sum(len(frames) for frames in split_frames)
	LOGGER.info("Inference workload \| splits=%d total_frames=%d", len(split_frames), total_frames)
	if not split_frames:
	LOGGER.info("Inference stop \| no frames available")
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("Inference", timing, wall_time=timing["wall"])
	return {"detections": [], "all_frame_predictions": []}

	outputs = []
	all_frame_predictions = []
	infer_model = 0.0
	combine_time = 0.0
	iou_time = 0.0
	draw_time = 0.0
	draw_all_frames_time = 0.0
	split_loop_time = 0.0
	for split_idx, frames in enumerate(split_frames):
	split_t0 = time.perf_counter()
	original_len = len(frames)
	if MAX_INFER_FRAMES_PER_SPLIT > 0 and original_len > MAX_INFER_FRAMES_PER_SPLIT:
	frames_for_infer = _sample_uniform_items(frames, MAX_INFER_FRAMES_PER_SPLIT)
	else:
	frames_for_infer = frames
	LOGGER.info(
	"Inference split %d \| frames=%d used_for_infer=%d",
	split_idx + 1,
	original_len,
	len(frames_for_infer),
	)
	t_model = time.perf_counter()
	if hasattr(model, "infer_batch"):
	frame_preds = model.infer_batch(frames_for_infer, batch_size=INFER_BATCH_SIZE)
	else:
	frame_preds = [model(frame) for frame in frames_for_infer]
	frame_preds = [np.asarray(bbox, dtype=np.float64).reshape(-1, 5) for bbox in frame_preds]
	for frame_idx, bbox in enumerate(frame_preds):
	if bbox.size == 0:
	LOGGER.info(
	"Inference split %d frame %d \| detections=0",
	split_idx + 1,
	frame_idx + 1,
	)
	continue
	confs = bbox[:, 4].astype(np.float64)
	conf_list_txt = ", ".join(f"{float(c):.2f}" for c in confs.tolist())
	LOGGER.info(
	(
	"Inference split %d frame %d \| detections=%d \| confs=[%s] \| "
	"frame_max_conf=%.2f \| frame_mean_conf_all_bboxes=%.2f"
	),
	split_idx + 1,
	frame_idx + 1,
	len(bbox),
	conf_list_txt,
	float(np.max(confs)),
	float(np.mean(confs)),
	)
	for frame_idx, (frame, bbox) in enumerate(zip(frames_for_infer, frame_preds)):
	subtitle = f"segment {split_idx + 1} / frame {frame_idx + 1}"
	t_draw_all = time.perf_counter()
	all_frame_predictions.append(
	{
	"image": _draw_detections(frame, bbox, subtitle=subtitle),
	"caption": f"Segment {split_idx + 1} - Frame {frame_idx + 1}",
	}
	)
	draw_all_frames_time += time.perf_counter() - t_draw_all

	split_model = time.perf_counter() - t_model
	infer_model += split_model
	split_iou = 0.0
	split_draw = 0.0

	t_combine = time.perf_counter()
	kept_main = _combine_predictions_per_split(frame_preds)
	dt_combine = time.perf_counter() - t_combine
	combine_time += dt_combine
	LOGGER.info(
	"Inference split %d \| combined_detections=%d",
	split_idx + 1,
	len(kept_main),
	)
	for det_idx, det_info in enumerate(kept_main):
	conf_values_txt = ", ".join(f"{float(c):.2f}" for c in det_info["matched_conf_values"])
	frame_indices_txt = ", ".join(str(int(i) + 1) for i in det_info["matched_frame_indices"])
	LOGGER.info(
	(
	"Inference split %d combined detection %d \| matches=%d/%d "
	"(required=%d, ratio=%.2f) \| combine_median_conf=%.2f \| combine_max_conf=%.2f \| "
	"matched_frames=[%s] \| matched_confs=[%s]"
	),
	split_idx + 1,
	det_idx + 1,
	det_info["match_count"],
	det_info["n_frames"],
	det_info["required_matches"],
	det_info["match_ratio"],
	det_info["median_conf"],
	det_info["max_conf"],
	frame_indices_txt,
	conf_values_txt,
	)
	if not kept_main:
	split_elapsed = time.perf_counter() - split_t0
	split_loop_time += split_elapsed
	LOGGER.info(
	(
	"Inference split %d timing \| total=%.3fs \| model=%.3fs \| combine=%.3fs \| "
	"iou=%.3fs \| draw=%.3fs \| avg_model_ms=%.1f"
	),
	split_idx + 1,
	split_elapsed,
	split_model,
	dt_combine,
	split_iou,
	split_draw,
	(1000.0 * split_model / max(len(frames_for_infer), 1)),
	)
	continue

	for det_idx, det_info in enumerate(kept_main):
	main_box = det_info["box"]
	selected_frame_idx = None
	selected_bbox = None
	selection_source = None

	# Prefer the earliest frame that overlaps the combined detection, using a relaxed
	# threshold for display (so we show the first visible appearance of the event).
	for frame_idx, bbox in enumerate(frame_preds):
	if bbox.size == 0:
	continue
	t_iou = time.perf_counter()
	ious = box_iou(bbox[:, :4], main_box[:4].reshape(1, 4))
	dt_iou = time.perf_counter() - t_iou
	split_iou += dt_iou
	iou_time += dt_iou
	if (ious > DISPLAY_DET_MATCH_IOU_THRESHOLD).any():
	match_idx = int(np.argmax(ious[0]))
	selected_frame_idx = int(frame_idx)
	selected_bbox = np.asarray(bbox[match_idx], dtype=np.float64).reshape(1, 5)
	selection_source = "display_first_overlap"
	break

	first_match_frame_idx = det_info.get("first_match_frame_idx")
	first_match_bbox = det_info.get("first_match_bbox")
	if selected_frame_idx is None or selected_bbox is None:
	if (
	first_match_frame_idx is None
	or first_match_bbox is None
	or int(first_match_frame_idx) < 0
	or int(first_match_frame_idx) >= len(frames_for_infer)
	):
	LOGGER.warning(
	"Inference split %d detection %d \| missing display frame and first matched frame/bbox",
	split_idx + 1,
	det_idx + 1,
	)
	continue
	selected_frame_idx = int(first_match_frame_idx)
	selected_bbox = np.asarray(first_match_bbox, dtype=np.float64).reshape(1, 5)
	selection_source = "combine_first_match_fallback"

	frame = frames_for_infer[selected_frame_idx]
	LOGGER.info(
	(
	"Inference split %d detection %d \| selected_frame=%d \| source=%s \| "
	"selected frame_conf=%.2f \| combine_median_conf=%.2f \| combine_max_conf=%.2f"
	),
	split_idx + 1,
	det_idx + 1,
	selected_frame_idx + 1,
	selection_source,
	float(selected_bbox[0, 4]),
	det_info["median_conf"],
	det_info["max_conf"],
	)
	subtitle = (
	f"segment {split_idx + 1} / detection {det_idx + 1} \| "
	f"frame {selected_frame_idx + 1} \| "
	f"matchs {det_info['match_count']}/{det_info['n_frames']} \| "
	f"conf_med {det_info['median_conf']:.2f}"
	)
	t_draw = time.perf_counter()
	outputs.append(_draw_detections(frame, selected_bbox, subtitle=subtitle))
	dt_draw = time.perf_counter() - t_draw
	split_draw += dt_draw
	draw_time += dt_draw

	split_elapsed = time.perf_counter() - split_t0
	split_loop_time += split_elapsed
	LOGGER.info(
	(
	"Inference split %d timing \| total=%.3fs \| model=%.3fs \| combine=%.3fs \| "
	"iou=%.3fs \| draw=%.3fs \| avg_model_ms=%.1f"
	),
	split_idx + 1,
	split_elapsed,
	split_model,
	dt_combine,
	split_iou,
	split_draw,
	(1000.0 * split_model / max(len(frames_for_infer), 1)),
	)

	timing["split_loop"] = split_loop_time
	timing["model_infer"] = infer_model
	timing["combine_predictions"] = combine_time
	timing["iou_matching"] = iou_time
	timing["draw_detections"] = draw_time
	timing["draw_all_frame_predictions"] = draw_all_frames_time
	timing["wall"] = time.perf_counter() - wall_t0
	_log_timing_summary("Inference", timing, wall_time=timing["wall"])
	LOGGER.info(
	"Inference done \| output_images=%d all_frame_prediction_images=%d",
	len(outputs),
	len(all_frame_predictions),
	)
	return {"detections": outputs, "all_frame_predictions": all_frame_predictions}


	def _upload_signature(uploaded_file):
	buffer = uploaded_file.getbuffer()
	size = uploaded_file.size if uploaded_file.size is not None else len(buffer)
	digest = sha1(buffer).hexdigest()
	return (uploaded_file.name or "uploaded.mp4", int(size), digest)


	def _write_uploaded_video(uploaded_file):
	ext = os.path.splitext(uploaded_file.name or "")[1] or ".mp4"
	with tempfile.NamedTemporaryFile(prefix="upload_", suffix=ext, delete=False) as tmp:
	tmp.write(uploaded_file.getbuffer())
	return tmp.name


	def _render_outputs(outputs):
	detections = outputs
	all_frame_predictions = []
	if isinstance(outputs, dict):
	detections = outputs.get("detections", [])
	all_frame_predictions = outputs.get("all_frame_predictions", [])

	if not detections:
	st.warning("Aucune detection d'incendie trouvee dans cette video.")
	else:
	st.subheader("Incendies detectes")
	columns = st.columns(2)
	for idx, image in enumerate(detections):
	columns[idx % 2].image(image, caption=f"Detection {idx + 1}", use_container_width=True)

	# if all_frame_predictions:
	# with st.expander(
	# f"Predictions sur toutes les frames echantillonnees ({len(all_frame_predictions)})",
	# expanded=False,
	# ):
	# columns = st.columns(2)
	# for idx, item in enumerate(all_frame_predictions):
	# image = item["image"] if isinstance(item, dict) else item
	# caption = (
	# item.get("caption", f"Frame {idx + 1}")
	# if isinstance(item, dict)
	# else f"Frame {idx + 1}"
	# )
	# columns[idx % 2].image(image, caption=caption, use_container_width=True)


	def main():
	st.set_page_config(page_title="Detection d'incendies Pyronear", layout="wide")
	st.image(PYRONEAR_LOGO_URL, width=220)
	st.title("Detection d'incendies Pyronear")
	st.write("Televersez un MP4 pour lancer la detection automatiquement.")

	uploaded = st.file_uploader("Televerser un MP4", type=["mp4"])
	if uploaded is None:
	st.info("En attente du televersement d'une video.")
	return

	signature = _upload_signature(uploaded)
	previous_signature = st.session_state.get("upload_signature")
	if signature != previous_signature:
	temp_path = None
	st.session_state["upload_signature"] = signature
	with st.spinner("Detection d'incendies en cours..."):
	try:
	temp_path = _write_uploaded_video(uploaded)
	st.session_state["output_images"] = infer(temp_path)
	st.session_state["inference_error"] = None
	except Exception as exc:
	LOGGER.exception("Inference failed")
	st.session_state["output_images"] = []
	st.session_state["inference_error"] = str(exc)
	finally:
	if temp_path and os.path.exists(temp_path):
	os.remove(temp_path)

	if st.session_state.get("inference_error"):
	st.error(f"Echec de la detection : {st.session_state['inference_error']}")
	return

	_render_outputs(st.session_state.get("output_images", []))


	if __name__ == "__main__":
	main()