Spaces:

pyronear
/

Pyronear-Wildfire-Detection

Build error

App Files Files Community

Mateo commited on Feb 22

Commit

5bd4325

1 Parent(s): 44d5cdf

filter detection by filter

Browse files

Files changed (1) hide show

app.py +225 -30

app.py CHANGED Viewed

@@ -52,6 +52,9 @@ MODEL_IMGSZ = max(320, int(os.getenv("MODEL_IMGSZ", "1024")))
 MAX_INFER_FRAMES_PER_SPLIT = max(0, int(os.getenv("MAX_INFER_FRAMES_PER_SPLIT", "12")))
 MIN_MAIN_MATCH_ABS = max(1, int(os.getenv("MIN_MAIN_MATCH_ABS", "3")))
 MIN_MAIN_MATCH_RATIO = float(os.getenv("MIN_MAIN_MATCH_RATIO", "0.20"))
 def _log_timing_summary(label, stats, wall_time=None, max_items=12):
@@ -934,7 +937,7 @@ def _draw_detections(pil_img, preds, subtitle=None):
 def _combine_predictions_per_split(frame_preds):
     n_frames = len(frame_preds)
     if n_frames == 0:
-        return np.zeros((0, 5), dtype=np.float64)
     boxes = np.zeros((0, 5), dtype=np.float64)
     for bbox in frame_preds:
@@ -942,24 +945,83 @@ def _combine_predictions_per_split(frame_preds):
             boxes = np.vstack([boxes, bbox])
     if boxes.size == 0:
-        return np.zeros((0, 5), dtype=np.float64)
     main_bboxes = np.asarray(nms(boxes), dtype=np.float64)
     if main_bboxes.size == 0:
-        return np.zeros((0, 5), dtype=np.float64)
-    matches_per_main = np.zeros(len(main_bboxes), dtype=int)
-    for bbox in frame_preds:
         if bbox.size == 0:
             continue
         ious = box_iou(bbox[:, :4], main_bboxes[:, :4])
-        matches_per_main += (ious > 0).any(axis=1).astype(int)
     required_matches = max(MIN_MAIN_MATCH_ABS, int(np.ceil(float(MIN_MAIN_MATCH_RATIO) * n_frames)))
     keep_main = matches_per_main >= required_matches
-    if np.any(keep_main):
-        return main_bboxes[keep_main]
-    return np.zeros((0, 5), dtype=np.float64)
 def infer(video_file):
@@ -972,7 +1034,9 @@ def infer(video_file):
     LOGGER.info(
         (
             "Inference config | batch_size=%d motion_segmentation=%s fast_n_samples=%d "
-            "max_infer_frames_per_split=%d min_main_match_abs=%d min_main_match_ratio=%.2f"
         ),
         INFER_BATCH_SIZE,
         ENABLE_MOTION_SEGMENTATION,
@@ -980,6 +1044,9 @@ def infer(video_file):
         MAX_INFER_FRAMES_PER_SPLIT,
         MIN_MAIN_MATCH_ABS,
         MIN_MAIN_MATCH_RATIO,
     )
     with timer("prepare_splits", timing):
         if ENABLE_MOTION_SEGMENTATION:
@@ -993,13 +1060,15 @@ def infer(video_file):
         LOGGER.info("Inference stop | no frames available")
         timing["wall"] = time.perf_counter() - wall_t0
         _log_timing_summary("Inference", timing, wall_time=timing["wall"])
-        return []
     outputs = []
     infer_model = 0.0
     combine_time = 0.0
     iou_time = 0.0
     draw_time = 0.0
     split_loop_time = 0.0
     for split_idx, frames in enumerate(split_frames):
         split_t0 = time.perf_counter()
@@ -1020,6 +1089,38 @@ def infer(video_file):
         else:
             frame_preds = [model(frame) for frame in frames_for_infer]
         frame_preds = [np.asarray(bbox, dtype=np.float64).reshape(-1, 5) for bbox in frame_preds]
         split_model = time.perf_counter() - t_model
         infer_model += split_model
@@ -1035,7 +1136,27 @@ def infer(video_file):
             split_idx + 1,
             len(kept_main),
         )
-        if kept_main.size == 0:
             split_elapsed = time.perf_counter() - split_t0
             split_loop_time += split_elapsed
             LOGGER.info(
@@ -1053,8 +1174,15 @@ def infer(video_file):
             )
             continue
-        for det_idx, main_box in enumerate(kept_main):
-            for frame, bbox in zip(frames_for_infer, frame_preds):
                 if bbox.size == 0:
                     continue
                 t_iou = time.perf_counter()
@@ -1062,16 +1190,58 @@ def infer(video_file):
                 dt_iou = time.perf_counter() - t_iou
                 split_iou += dt_iou
                 iou_time += dt_iou
-                if (ious > 0).any():
                     match_idx = int(np.argmax(ious[0]))
-                    subtitle = f"segment {split_idx + 1} / detection {det_idx + 1}"
-                    t_draw = time.perf_counter()
-                    outputs.append(_draw_detections(frame, bbox[match_idx : match_idx + 1], subtitle=subtitle))
-                    dt_draw = time.perf_counter() - t_draw
-                    split_draw += dt_draw
-                    draw_time += dt_draw
                     break
         split_elapsed = time.perf_counter() - split_t0
         split_loop_time += split_elapsed
         LOGGER.info(
@@ -1093,10 +1263,15 @@ def infer(video_file):
     timing["combine_predictions"] = combine_time
     timing["iou_matching"] = iou_time
     timing["draw_detections"] = draw_time
     timing["wall"] = time.perf_counter() - wall_t0
     _log_timing_summary("Inference", timing, wall_time=timing["wall"])
-    LOGGER.info("Inference done | output_images=%d", len(outputs))
-    return outputs
 def _upload_signature(uploaded_file):
@@ -1114,14 +1289,34 @@ def _write_uploaded_video(uploaded_file):
 def _render_outputs(outputs):
-    if not outputs:
-        st.warning("Aucune detection d'incendie trouvee dans cette video.")
-        return
-    st.subheader("Incendies detectes")
-    columns = st.columns(2)
-    for idx, image in enumerate(outputs):
-        columns[idx % 2].image(image, caption=f"Detection {idx + 1}", use_container_width=True)
 def main():

 MAX_INFER_FRAMES_PER_SPLIT = max(0, int(os.getenv("MAX_INFER_FRAMES_PER_SPLIT", "12")))
 MIN_MAIN_MATCH_ABS = max(1, int(os.getenv("MIN_MAIN_MATCH_ABS", "3")))
 MIN_MAIN_MATCH_RATIO = float(os.getenv("MIN_MAIN_MATCH_RATIO", "0.20"))
+MAIN_DET_MATCH_IOU_THRESHOLD = float(os.getenv("MAIN_DET_MATCH_IOU_THRESHOLD", "0.12"))
+MIN_COMBINED_MEDIAN_CONF = float(os.getenv("MIN_COMBINED_MEDIAN_CONF", "0.12"))
+DISPLAY_DET_MATCH_IOU_THRESHOLD = float(os.getenv("DISPLAY_DET_MATCH_IOU_THRESHOLD", "0.0"))
 def _log_timing_summary(label, stats, wall_time=None, max_items=12):
 def _combine_predictions_per_split(frame_preds):
     n_frames = len(frame_preds)
     if n_frames == 0:
+        return []
     boxes = np.zeros((0, 5), dtype=np.float64)
     for bbox in frame_preds:
             boxes = np.vstack([boxes, bbox])
     if boxes.size == 0:
+        return []
     main_bboxes = np.asarray(nms(boxes), dtype=np.float64)
     if main_bboxes.size == 0:
+        return []
+    n_main = len(main_bboxes)
+    matches_per_main = np.zeros(n_main, dtype=int)
+    conf_max_per_main = np.zeros(n_main, dtype=np.float64)
+    matched_conf_values_per_main = [[] for _ in range(n_main)]
+    matched_frame_indices_per_main = [[] for _ in range(n_main)]
+    first_match_frame_idx_per_main = [None for _ in range(n_main)]
+    first_match_bbox_per_main = [None for _ in range(n_main)]
+    for frame_idx, bbox in enumerate(frame_preds):
         if bbox.size == 0:
             continue
         ious = box_iou(bbox[:, :4], main_bboxes[:, :4])
+        match_mask = ious >= MAIN_DET_MATCH_IOU_THRESHOLD
+        has_match = match_mask.any(axis=1)
+        matches_per_main += has_match.astype(int)
+        if np.any(has_match):
+            # Keep only one bbox per frame for each main bbox (best IoU among matches).
+            masked_ious = np.where(match_mask, ious, -1.0)
+            best_idx_per_main = np.argmax(masked_ious, axis=1)
+            best_conf_per_main = bbox[best_idx_per_main, 4].astype(np.float64)
+            matched_conf = np.where(has_match, best_conf_per_main, 0.0)
+            conf_max_per_main = np.maximum(conf_max_per_main, matched_conf)
+            for main_idx in np.flatnonzero(has_match):
+                matched_conf_values_per_main[main_idx].append(float(best_conf_per_main[main_idx]))
+                matched_frame_indices_per_main[main_idx].append(int(frame_idx))
+                if first_match_frame_idx_per_main[main_idx] is None:
+                    first_match_frame_idx_per_main[main_idx] = int(frame_idx)
+                    first_match_bbox_per_main[main_idx] = np.asarray(
+                        bbox[int(best_idx_per_main[main_idx])], dtype=np.float64
+                    ).copy()
     required_matches = max(MIN_MAIN_MATCH_ABS, int(np.ceil(float(MIN_MAIN_MATCH_RATIO) * n_frames)))
     keep_main = matches_per_main >= required_matches
+    if not np.any(keep_main):
+        return []
+    kept = []
+    for idx in np.flatnonzero(keep_main):
+        match_count = int(matches_per_main[idx])
+        matched_conf_values = matched_conf_values_per_main[idx]
+        median_conf = (
+            float(np.median(np.asarray(matched_conf_values, dtype=np.float64))) if matched_conf_values else 0.0
+        )
+        if median_conf < MIN_COMBINED_MEDIAN_CONF:
+            LOGGER.info(
+                (
+                    "Combine drop candidate | matches=%d/%d (required=%d) | "
+                    "median_conf=%.2f < min_combined_median_conf=%.2f"
+                ),
+                match_count,
+                n_frames,
+                required_matches,
+                median_conf,
+                MIN_COMBINED_MEDIAN_CONF,
+            )
+            continue
+        kept.append(
+            {
+                "box": main_bboxes[idx],
+                "match_count": match_count,
+                "n_frames": int(n_frames),
+                "required_matches": int(required_matches),
+                "match_ratio": float(match_count / max(n_frames, 1)),
+                "median_conf": median_conf,
+                "max_conf": float(conf_max_per_main[idx]),
+                "matched_conf_values": matched_conf_values,
+                "matched_frame_indices": matched_frame_indices_per_main[idx],
+                "first_match_frame_idx": first_match_frame_idx_per_main[idx],
+                "first_match_bbox": first_match_bbox_per_main[idx],
+            }
+        )
+    return kept
 def infer(video_file):
     LOGGER.info(
         (
             "Inference config | batch_size=%d motion_segmentation=%s fast_n_samples=%d "
+            "max_infer_frames_per_split=%d min_main_match_abs=%d min_main_match_ratio=%.2f "
+            "main_det_match_iou_threshold=%.2f min_combined_median_conf=%.2f "
+            "display_det_match_iou_threshold=%.2f"
         ),
         INFER_BATCH_SIZE,
         ENABLE_MOTION_SEGMENTATION,
         MAX_INFER_FRAMES_PER_SPLIT,
         MIN_MAIN_MATCH_ABS,
         MIN_MAIN_MATCH_RATIO,
+        MAIN_DET_MATCH_IOU_THRESHOLD,
+        MIN_COMBINED_MEDIAN_CONF,
+        DISPLAY_DET_MATCH_IOU_THRESHOLD,
     )
     with timer("prepare_splits", timing):
         if ENABLE_MOTION_SEGMENTATION:
         LOGGER.info("Inference stop | no frames available")
         timing["wall"] = time.perf_counter() - wall_t0
         _log_timing_summary("Inference", timing, wall_time=timing["wall"])
+        return {"detections": [], "all_frame_predictions": []}
     outputs = []
+    all_frame_predictions = []
     infer_model = 0.0
     combine_time = 0.0
     iou_time = 0.0
     draw_time = 0.0
+    draw_all_frames_time = 0.0
     split_loop_time = 0.0
     for split_idx, frames in enumerate(split_frames):
         split_t0 = time.perf_counter()
         else:
             frame_preds = [model(frame) for frame in frames_for_infer]
         frame_preds = [np.asarray(bbox, dtype=np.float64).reshape(-1, 5) for bbox in frame_preds]
+        for frame_idx, bbox in enumerate(frame_preds):
+            if bbox.size == 0:
+                LOGGER.info(
+                    "Inference split %d frame %d | detections=0",
+                    split_idx + 1,
+                    frame_idx + 1,
+                )
+                continue
+            confs = bbox[:, 4].astype(np.float64)
+            conf_list_txt = ", ".join(f"{float(c):.2f}" for c in confs.tolist())
+            LOGGER.info(
+                (
+                    "Inference split %d frame %d | detections=%d | confs=[%s] | "
+                    "frame_max_conf=%.2f | frame_mean_conf_all_bboxes=%.2f"
+                ),
+                split_idx + 1,
+                frame_idx + 1,
+                len(bbox),
+                conf_list_txt,
+                float(np.max(confs)),
+                float(np.mean(confs)),
+            )
+        for frame_idx, (frame, bbox) in enumerate(zip(frames_for_infer, frame_preds)):
+            subtitle = f"segment {split_idx + 1} / frame {frame_idx + 1}"
+            t_draw_all = time.perf_counter()
+            all_frame_predictions.append(
+                {
+                    "image": _draw_detections(frame, bbox, subtitle=subtitle),
+                    "caption": f"Segment {split_idx + 1} - Frame {frame_idx + 1}",
+                }
+            )
+            draw_all_frames_time += time.perf_counter() - t_draw_all
         split_model = time.perf_counter() - t_model
         infer_model += split_model
             split_idx + 1,
             len(kept_main),
         )
+        for det_idx, det_info in enumerate(kept_main):
+            conf_values_txt = ", ".join(f"{float(c):.2f}" for c in det_info["matched_conf_values"])
+            frame_indices_txt = ", ".join(str(int(i) + 1) for i in det_info["matched_frame_indices"])
+            LOGGER.info(
+                (
+                    "Inference split %d combined detection %d | matches=%d/%d "
+                    "(required=%d, ratio=%.2f) | combine_median_conf=%.2f | combine_max_conf=%.2f | "
+                    "matched_frames=[%s] | matched_confs=[%s]"
+                ),
+                split_idx + 1,
+                det_idx + 1,
+                det_info["match_count"],
+                det_info["n_frames"],
+                det_info["required_matches"],
+                det_info["match_ratio"],
+                det_info["median_conf"],
+                det_info["max_conf"],
+                frame_indices_txt,
+                conf_values_txt,
+            )
+        if not kept_main:
             split_elapsed = time.perf_counter() - split_t0
             split_loop_time += split_elapsed
             LOGGER.info(
             )
             continue
+        for det_idx, det_info in enumerate(kept_main):
+            main_box = det_info["box"]
+            selected_frame_idx = None
+            selected_bbox = None
+            selection_source = None
+            # Prefer the earliest frame that overlaps the combined detection, using a relaxed
+            # threshold for display (so we show the first visible appearance of the event).
+            for frame_idx, bbox in enumerate(frame_preds):
                 if bbox.size == 0:
                     continue
                 t_iou = time.perf_counter()
                 dt_iou = time.perf_counter() - t_iou
                 split_iou += dt_iou
                 iou_time += dt_iou
+                if (ious > DISPLAY_DET_MATCH_IOU_THRESHOLD).any():
                     match_idx = int(np.argmax(ious[0]))
+                    selected_frame_idx = int(frame_idx)
+                    selected_bbox = np.asarray(bbox[match_idx], dtype=np.float64).reshape(1, 5)
+                    selection_source = "display_first_overlap"
                     break
+            first_match_frame_idx = det_info.get("first_match_frame_idx")
+            first_match_bbox = det_info.get("first_match_bbox")
+            if selected_frame_idx is None or selected_bbox is None:
+                if (
+                    first_match_frame_idx is None
+                    or first_match_bbox is None
+                    or int(first_match_frame_idx) < 0
+                    or int(first_match_frame_idx) >= len(frames_for_infer)
+                ):
+                    LOGGER.warning(
+                        "Inference split %d detection %d | missing display frame and first matched frame/bbox",
+                        split_idx + 1,
+                        det_idx + 1,
+                    )
+                    continue
+                selected_frame_idx = int(first_match_frame_idx)
+                selected_bbox = np.asarray(first_match_bbox, dtype=np.float64).reshape(1, 5)
+                selection_source = "combine_first_match_fallback"
+            frame = frames_for_infer[selected_frame_idx]
+            LOGGER.info(
+                (
+                    "Inference split %d detection %d | selected_frame=%d | source=%s | "
+                    "selected frame_conf=%.2f | combine_median_conf=%.2f | combine_max_conf=%.2f"
+                ),
+                split_idx + 1,
+                det_idx + 1,
+                selected_frame_idx + 1,
+                selection_source,
+                float(selected_bbox[0, 4]),
+                det_info["median_conf"],
+                det_info["max_conf"],
+            )
+            subtitle = (
+                f"segment {split_idx + 1} / detection {det_idx + 1} | "
+                f"frame {selected_frame_idx + 1} | "
+                f"matchs {det_info['match_count']}/{det_info['n_frames']} | "
+                f"conf_med {det_info['median_conf']:.2f}"
+            )
+            t_draw = time.perf_counter()
+            outputs.append(_draw_detections(frame, selected_bbox, subtitle=subtitle))
+            dt_draw = time.perf_counter() - t_draw
+            split_draw += dt_draw
+            draw_time += dt_draw
         split_elapsed = time.perf_counter() - split_t0
         split_loop_time += split_elapsed
         LOGGER.info(
     timing["combine_predictions"] = combine_time
     timing["iou_matching"] = iou_time
     timing["draw_detections"] = draw_time
+    timing["draw_all_frame_predictions"] = draw_all_frames_time
     timing["wall"] = time.perf_counter() - wall_t0
     _log_timing_summary("Inference", timing, wall_time=timing["wall"])
+    LOGGER.info(
+        "Inference done | output_images=%d all_frame_prediction_images=%d",
+        len(outputs),
+        len(all_frame_predictions),
+    )
+    return {"detections": outputs, "all_frame_predictions": all_frame_predictions}
 def _upload_signature(uploaded_file):
 def _render_outputs(outputs):
+    detections = outputs
+    all_frame_predictions = []
+    if isinstance(outputs, dict):
+        detections = outputs.get("detections", [])
+        all_frame_predictions = outputs.get("all_frame_predictions", [])
+    if not detections:
+        st.warning("Aucune detection d'incendie trouvee dans cette video.")
+    else:
+        st.subheader("Incendies detectes")
+        columns = st.columns(2)
+        for idx, image in enumerate(detections):
+            columns[idx % 2].image(image, caption=f"Detection {idx + 1}", use_container_width=True)
+    # if all_frame_predictions:
+    #     with st.expander(
+    #         f"Predictions sur toutes les frames echantillonnees ({len(all_frame_predictions)})",
+    #         expanded=False,
+    #     ):
+    #         columns = st.columns(2)
+    #         for idx, item in enumerate(all_frame_predictions):
+    #             image = item["image"] if isinstance(item, dict) else item
+    #             caption = (
+    #                 item.get("caption", f"Frame {idx + 1}")
+    #                 if isinstance(item, dict)
+    #                 else f"Frame {idx + 1}"
+    #             )
+    #             columns[idx % 2].image(image, caption=caption, use_container_width=True)
 def main():