Spaces:

pyronear
/

Pyronear-Wildfire-Detection

Build error

App Files Files Community

Mateo commited on Feb 17

Commit

065d662

1 Parent(s): 53889b2

optimize time

Browse files

Files changed (2) hide show

app.py +44 -24
vision.py +67 -24

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ LOGGER = logging.getLogger(__name__)
 DEFAULT_SPLIT_CFG = {
-    "n_samples": 30,
     "max_w": 400,
     "crop_y": (0.25, 0.90),
     "dx_threshold_px": 1.5,
@@ -34,6 +34,7 @@ DEFAULT_SPLIT_CFG = {
     "jump_meanabs_threshold": 18.0,
     "progress_every": 0,
 }
 def _log_timing_summary(label, stats, wall_time=None, max_items=12):
@@ -285,23 +286,27 @@ def timer(name, stats):
     stats[name] = stats.get(name, 0.0) + (time.perf_counter() - t0)
-def _iter_sampled_frames(video_path, n_samples):
     timing = {}
     wall_t0 = time.perf_counter()
-    with timer("extract_bgr_with_ffmpeg", timing):
-        frames = _extract_bgr_with_ffmpeg(video_path, int(n_samples))
     timing["wall"] = time.perf_counter() - wall_t0
     _log_timing_summary("Iter sampled frames", timing, wall_time=timing["wall"])
     for out_idx, frame in enumerate(frames):
         yield out_idx, frame
-def iter_frames(video_path, n_samples, max_w, crop_y):
     timing = {"resize": 0.0, "crop": 0.0}
     wall_t0 = time.perf_counter()
     frame_count = 0
     try:
-        for out_idx, frame in _iter_sampled_frames(video_path, n_samples):
             frame_count += 1
             proc = frame
             if max_w > 0 and proc.shape[1] != max_w:
@@ -402,7 +407,7 @@ def estimate_dx_orb_affine(prev_gray, gray, orb, bf, min_matches, keep_ratio, ti
 def split_video_into_stable_segments_fast(
     video_path,
-    n_samples=30,
     max_w=400,
     crop_y=(0.25, 0.90),
     dx_threshold_px=1.5,
@@ -415,6 +420,7 @@ def split_video_into_stable_segments_fast(
     keep_ratio=0.4,
     jump_meanabs_threshold=18.0,
     progress_every=200,
 ):
     wall_t0 = time.perf_counter()
     timing_total = {}
@@ -429,7 +435,13 @@ def split_video_into_stable_segments_fast(
     frame_count = 0
     with timer("loop_total", timing_total):
-        for _, frame in iter_frames(video_path, n_samples=n_samples, max_w=max_w, crop_y=crop_y):
             frame_count += 1
             with timer("to_gray", timing_total):
@@ -559,7 +571,7 @@ def _bgr_to_pil(frame):
     return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-def extract_segment_frames(video_path, segments, n_samples):
     timing = {}
     wall_t0 = time.perf_counter()
@@ -585,7 +597,7 @@ def extract_segment_frames(video_path, segments, n_samples):
     # Detection runs on original sampled frames (no resize / no crop).
     to_pil_time = 0.0
     with timer("assign_frames_to_segments", timing):
-        for frame_idx, frame in _iter_sampled_frames(video_path, n_samples=n_samples):
             while segment_idx < len(normalized_segments) and frame_idx > normalized_segments[segment_idx][1]:
                 segment_idx += 1
@@ -623,7 +635,7 @@ def extract_segment_frames(video_path, segments, n_samples):
     return [frames for frames in grouped_frames if frames]
-def split_video_stable(video_path, split_cfg=None, fallback_n=30):
     if not video_path or not os.path.exists(video_path):
         return []
@@ -635,13 +647,17 @@ def split_video_stable(video_path, split_cfg=None, fallback_n=30):
     LOGGER.info("Split config | %s", cfg)
     with timer("split_video_into_stable_segments_fast", timing):
-        segments, _, _, _ = split_video_into_stable_segments_fast(video_path, **cfg)
     with timer("extract_segment_frames", timing):
         frame_groups = extract_segment_frames(
             video_path,
             segments,
             n_samples=cfg["n_samples"],
         )
     if frame_groups:
@@ -655,8 +671,12 @@ def split_video_stable(video_path, split_cfg=None, fallback_n=30):
         return frame_groups
     LOGGER.info("Split result | no stable segment, using fallback sampling n=%d", fallback_n)
-    with timer("fallback_split_video", timing):
-        fallback_frames = split_video(video_path, n=fallback_n)
     LOGGER.info("Fallback frame count | %d", len(fallback_frames))
     timing["wall"] = time.perf_counter() - wall_t0
     _log_timing_summary("split_video_stable", timing, wall_time=timing["wall"])
@@ -743,6 +763,7 @@ def infer(video_file):
     with timer("resolve_video_path", timing):
         video_path = _resolve_video_path(video_file)
     LOGGER.info("Inference start | video=%s", video_path)
     with timer("split_video_stable", timing):
         split_frames = split_video_stable(video_path)
     if not split_frames:
@@ -760,19 +781,18 @@ def infer(video_file):
     for split_idx, frames in enumerate(split_frames):
         split_t0 = time.perf_counter()
         LOGGER.info("Inference split %d | frames=%d", split_idx + 1, len(frames))
-        frame_preds = []
-        split_model = 0.0
         split_iou = 0.0
         split_draw = 0.0
-        for frame in frames:
-            t_model = time.perf_counter()
-            bbox = np.asarray(model(frame), dtype=np.float64).reshape(-1, 5)
-            dt_model = time.perf_counter() - t_model
-            split_model += dt_model
-            infer_model += dt_model
-            frame_preds.append(bbox)
         t_combine = time.perf_counter()
         kept_main = _combine_predictions_per_split(frame_preds)
         dt_combine = time.perf_counter() - t_combine

 DEFAULT_SPLIT_CFG = {
+    "n_samples": 16,
     "max_w": 400,
     "crop_y": (0.25, 0.90),
     "dx_threshold_px": 1.5,
     "jump_meanabs_threshold": 18.0,
     "progress_every": 0,
 }
+INFER_BATCH_SIZE = max(1, int(os.getenv("INFER_BATCH_SIZE", "8")))
 def _log_timing_summary(label, stats, wall_time=None, max_items=12):
     stats[name] = stats.get(name, 0.0) + (time.perf_counter() - t0)
+def _iter_sampled_frames(video_path, n_samples, sampled_frames=None):
     timing = {}
     wall_t0 = time.perf_counter()
+    if sampled_frames is None:
+        with timer("extract_bgr_with_ffmpeg", timing):
+            frames = _extract_bgr_with_ffmpeg(video_path, int(n_samples))
+    else:
+        with timer("reuse_sampled_frames", timing):
+            frames = sampled_frames
     timing["wall"] = time.perf_counter() - wall_t0
     _log_timing_summary("Iter sampled frames", timing, wall_time=timing["wall"])
     for out_idx, frame in enumerate(frames):
         yield out_idx, frame
+def iter_frames(video_path, n_samples, max_w, crop_y, sampled_frames=None):
     timing = {"resize": 0.0, "crop": 0.0}
     wall_t0 = time.perf_counter()
     frame_count = 0
     try:
+        for out_idx, frame in _iter_sampled_frames(video_path, n_samples, sampled_frames=sampled_frames):
             frame_count += 1
             proc = frame
             if max_w > 0 and proc.shape[1] != max_w:
 def split_video_into_stable_segments_fast(
     video_path,
+    n_samples=16,
     max_w=400,
     crop_y=(0.25, 0.90),
     dx_threshold_px=1.5,
     keep_ratio=0.4,
     jump_meanabs_threshold=18.0,
     progress_every=200,
+    sampled_frames=None,
 ):
     wall_t0 = time.perf_counter()
     timing_total = {}
     frame_count = 0
     with timer("loop_total", timing_total):
+        for _, frame in iter_frames(
+            video_path,
+            n_samples=n_samples,
+            max_w=max_w,
+            crop_y=crop_y,
+            sampled_frames=sampled_frames,
+        ):
             frame_count += 1
             with timer("to_gray", timing_total):
     return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+def extract_segment_frames(video_path, segments, n_samples, sampled_frames=None):
     timing = {}
     wall_t0 = time.perf_counter()
     # Detection runs on original sampled frames (no resize / no crop).
     to_pil_time = 0.0
     with timer("assign_frames_to_segments", timing):
+        for frame_idx, frame in _iter_sampled_frames(video_path, n_samples=n_samples, sampled_frames=sampled_frames):
             while segment_idx < len(normalized_segments) and frame_idx > normalized_segments[segment_idx][1]:
                 segment_idx += 1
     return [frames for frames in grouped_frames if frames]
+def split_video_stable(video_path, split_cfg=None, fallback_n=16):
     if not video_path or not os.path.exists(video_path):
         return []
     LOGGER.info("Split config | %s", cfg)
+    with timer("extract_sampled_frames", timing):
+        sampled_frames = _extract_bgr_with_ffmpeg(video_path, int(cfg["n_samples"]))
     with timer("split_video_into_stable_segments_fast", timing):
+        segments, _, _, _ = split_video_into_stable_segments_fast(video_path, sampled_frames=sampled_frames, **cfg)
     with timer("extract_segment_frames", timing):
         frame_groups = extract_segment_frames(
             video_path,
             segments,
             n_samples=cfg["n_samples"],
+            sampled_frames=sampled_frames,
         )
     if frame_groups:
         return frame_groups
     LOGGER.info("Split result | no stable segment, using fallback sampling n=%d", fallback_n)
+    if int(fallback_n) == int(cfg["n_samples"]):
+        with timer("fallback_reuse_sampled_frames", timing):
+            fallback_frames = [_bgr_to_pil(frame) for frame in sampled_frames]
+    else:
+        with timer("fallback_split_video", timing):
+            fallback_frames = split_video(video_path, n=fallback_n)
     LOGGER.info("Fallback frame count | %d", len(fallback_frames))
     timing["wall"] = time.perf_counter() - wall_t0
     _log_timing_summary("split_video_stable", timing, wall_time=timing["wall"])
     with timer("resolve_video_path", timing):
         video_path = _resolve_video_path(video_file)
     LOGGER.info("Inference start | video=%s", video_path)
+    LOGGER.info("Inference config | batch_size=%d", INFER_BATCH_SIZE)
     with timer("split_video_stable", timing):
         split_frames = split_video_stable(video_path)
     if not split_frames:
     for split_idx, frames in enumerate(split_frames):
         split_t0 = time.perf_counter()
         LOGGER.info("Inference split %d | frames=%d", split_idx + 1, len(frames))
+        t_model = time.perf_counter()
+        if hasattr(model, "infer_batch"):
+            frame_preds = model.infer_batch(frames, batch_size=INFER_BATCH_SIZE)
+        else:
+            frame_preds = [model(frame) for frame in frames]
+        frame_preds = [np.asarray(bbox, dtype=np.float64).reshape(-1, 5) for bbox in frame_preds]
+        split_model = time.perf_counter() - t_model
+        infer_model += split_model
         split_iou = 0.0
         split_draw = 0.0
         t_combine = time.perf_counter()
         kept_main = _combine_predictions_per_split(frame_preds)
         dt_combine = time.perf_counter() - t_combine

vision.py CHANGED Viewed

@@ -7,7 +7,7 @@ import logging
 import os
 import platform
 import tarfile
-from typing import Tuple
 from urllib.request import urlretrieve
 import numpy as np
@@ -181,28 +181,7 @@ class Classifier:
         return pred
-    def __call__(self, pil_img: Image.Image, occlusion_bboxes: dict = {}) -> np.ndarray:
-        """Run the classifier on an input image.
-        Args:
-            pil_img: The input PIL image.
-            occlusion_mask: Optional occlusion mask to exclude certain areas.
-        Returns:
-            Processed predictions.
-        """
-        np_img, pad = self.prep_process(pil_img)
-        if self.format == "ncnn":
-            extractor = self.model.create_extractor()
-            extractor.set_light_mode(True)
-            extractor.input("in0", np_img)
-            pred = ncnn.Mat()
-            extractor.extract("out0", pred)
-            pred = np.asarray(pred)
-        else:
-            pred = self.ort_session.run(["output0"], {"images": np_img})[0][0]
         # Convert pad to a tuple if required
         if isinstance(pad, list):
             pad = tuple(pad)
@@ -214,7 +193,7 @@ class Classifier:
         pred = pred[(pred[:, 2] - pred[:, 0]) < self.max_bbox_size, :]
         pred = np.reshape(pred, (-1, 5))
-        logging.info(f"Model original pred : {pred}")
         # Remove prediction in bbox occlusion mask
         if len(occlusion_bboxes):
@@ -227,3 +206,67 @@ class Classifier:
             pred = pred[keep]
         return pred

 import os
 import platform
 import tarfile
+from typing import Sequence, Tuple
 from urllib.request import urlretrieve
 import numpy as np
         return pred
+    def _finalize_prediction(self, pred: np.ndarray, pad: Tuple[int, int], occlusion_bboxes: dict) -> np.ndarray:
         # Convert pad to a tuple if required
         if isinstance(pad, list):
             pad = tuple(pad)
         pred = pred[(pred[:, 2] - pred[:, 0]) < self.max_bbox_size, :]
         pred = np.reshape(pred, (-1, 5))
+        logging.debug("Model original pred : %s", pred)
         # Remove prediction in bbox occlusion mask
         if len(occlusion_bboxes):
             pred = pred[keep]
         return pred
+    def infer_batch(self, pil_imgs: Sequence[Image.Image], occlusion_bboxes: dict = None, batch_size: int = 8):
+        if not pil_imgs:
+            return []
+        if occlusion_bboxes is None:
+            occlusion_bboxes = {}
+        # NCNN path stays single-image.
+        if self.format != "onnx":
+            return [self(pil_img, occlusion_bboxes=occlusion_bboxes) for pil_img in pil_imgs]
+        batch_size = max(1, int(batch_size))
+        outputs = []
+        for start in range(0, len(pil_imgs), batch_size):
+            chunk = pil_imgs[start : start + batch_size]
+            batch_imgs = []
+            pads = []
+            for pil_img in chunk:
+                np_img, pad = self.prep_process(pil_img)
+                batch_imgs.append(np_img)
+                pads.append(pad)
+            np_batch = np.concatenate(batch_imgs, axis=0)
+            raw = self.ort_session.run(["output0"], {"images": np_batch})[0]
+            if raw.ndim >= 3 and raw.shape[0] == len(chunk):
+                raw_preds = [raw[i] for i in range(len(chunk))]
+            elif len(chunk) == 1 and raw.ndim >= 3:
+                raw_preds = [raw[0]]
+            elif len(chunk) == 1:
+                raw_preds = [raw]
+            else:
+                # Fallback for unexpected output shapes.
+                raw_preds = [self.ort_session.run(["output0"], {"images": arr})[0][0] for arr in batch_imgs]
+            for raw_pred, pad in zip(raw_preds, pads):
+                outputs.append(self._finalize_prediction(raw_pred, pad, occlusion_bboxes))
+        return outputs
+    def __call__(self, pil_img: Image.Image, occlusion_bboxes: dict = {}) -> np.ndarray:
+        """Run the classifier on an input image.
+        Args:
+            pil_img: The input PIL image.
+            occlusion_mask: Optional occlusion mask to exclude certain areas.
+        Returns:
+            Processed predictions.
+        """
+        np_img, pad = self.prep_process(pil_img)
+        if self.format == "ncnn":
+            extractor = self.model.create_extractor()
+            extractor.set_light_mode(True)
+            extractor.input("in0", np_img)
+            pred = ncnn.Mat()
+            extractor.extract("out0", pred)
+            pred = np.asarray(pred)
+        else:
+            pred = self.ort_session.run(["output0"], {"images": np_img})[0][0]
+        return self._finalize_prediction(pred, pad, occlusion_bboxes)