Mateo commited on
Commit
5bd4325
·
1 Parent(s): 44d5cdf

filter detection by filter

Browse files
Files changed (1) hide show
  1. app.py +225 -30
app.py CHANGED
@@ -52,6 +52,9 @@ MODEL_IMGSZ = max(320, int(os.getenv("MODEL_IMGSZ", "1024")))
52
  MAX_INFER_FRAMES_PER_SPLIT = max(0, int(os.getenv("MAX_INFER_FRAMES_PER_SPLIT", "12")))
53
  MIN_MAIN_MATCH_ABS = max(1, int(os.getenv("MIN_MAIN_MATCH_ABS", "3")))
54
  MIN_MAIN_MATCH_RATIO = float(os.getenv("MIN_MAIN_MATCH_RATIO", "0.20"))
 
 
 
55
 
56
 
57
  def _log_timing_summary(label, stats, wall_time=None, max_items=12):
@@ -934,7 +937,7 @@ def _draw_detections(pil_img, preds, subtitle=None):
934
  def _combine_predictions_per_split(frame_preds):
935
  n_frames = len(frame_preds)
936
  if n_frames == 0:
937
- return np.zeros((0, 5), dtype=np.float64)
938
 
939
  boxes = np.zeros((0, 5), dtype=np.float64)
940
  for bbox in frame_preds:
@@ -942,24 +945,83 @@ def _combine_predictions_per_split(frame_preds):
942
  boxes = np.vstack([boxes, bbox])
943
 
944
  if boxes.size == 0:
945
- return np.zeros((0, 5), dtype=np.float64)
946
 
947
  main_bboxes = np.asarray(nms(boxes), dtype=np.float64)
948
  if main_bboxes.size == 0:
949
- return np.zeros((0, 5), dtype=np.float64)
950
 
951
- matches_per_main = np.zeros(len(main_bboxes), dtype=int)
952
- for bbox in frame_preds:
 
 
 
 
 
 
953
  if bbox.size == 0:
954
  continue
955
  ious = box_iou(bbox[:, :4], main_bboxes[:, :4])
956
- matches_per_main += (ious > 0).any(axis=1).astype(int)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
957
 
958
  required_matches = max(MIN_MAIN_MATCH_ABS, int(np.ceil(float(MIN_MAIN_MATCH_RATIO) * n_frames)))
959
  keep_main = matches_per_main >= required_matches
960
- if np.any(keep_main):
961
- return main_bboxes[keep_main]
962
- return np.zeros((0, 5), dtype=np.float64)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
963
 
964
 
965
  def infer(video_file):
@@ -972,7 +1034,9 @@ def infer(video_file):
972
  LOGGER.info(
973
  (
974
  "Inference config | batch_size=%d motion_segmentation=%s fast_n_samples=%d "
975
- "max_infer_frames_per_split=%d min_main_match_abs=%d min_main_match_ratio=%.2f"
 
 
976
  ),
977
  INFER_BATCH_SIZE,
978
  ENABLE_MOTION_SEGMENTATION,
@@ -980,6 +1044,9 @@ def infer(video_file):
980
  MAX_INFER_FRAMES_PER_SPLIT,
981
  MIN_MAIN_MATCH_ABS,
982
  MIN_MAIN_MATCH_RATIO,
 
 
 
983
  )
984
  with timer("prepare_splits", timing):
985
  if ENABLE_MOTION_SEGMENTATION:
@@ -993,13 +1060,15 @@ def infer(video_file):
993
  LOGGER.info("Inference stop | no frames available")
994
  timing["wall"] = time.perf_counter() - wall_t0
995
  _log_timing_summary("Inference", timing, wall_time=timing["wall"])
996
- return []
997
 
998
  outputs = []
 
999
  infer_model = 0.0
1000
  combine_time = 0.0
1001
  iou_time = 0.0
1002
  draw_time = 0.0
 
1003
  split_loop_time = 0.0
1004
  for split_idx, frames in enumerate(split_frames):
1005
  split_t0 = time.perf_counter()
@@ -1020,6 +1089,38 @@ def infer(video_file):
1020
  else:
1021
  frame_preds = [model(frame) for frame in frames_for_infer]
1022
  frame_preds = [np.asarray(bbox, dtype=np.float64).reshape(-1, 5) for bbox in frame_preds]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1023
 
1024
  split_model = time.perf_counter() - t_model
1025
  infer_model += split_model
@@ -1035,7 +1136,27 @@ def infer(video_file):
1035
  split_idx + 1,
1036
  len(kept_main),
1037
  )
1038
- if kept_main.size == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1039
  split_elapsed = time.perf_counter() - split_t0
1040
  split_loop_time += split_elapsed
1041
  LOGGER.info(
@@ -1053,8 +1174,15 @@ def infer(video_file):
1053
  )
1054
  continue
1055
 
1056
- for det_idx, main_box in enumerate(kept_main):
1057
- for frame, bbox in zip(frames_for_infer, frame_preds):
 
 
 
 
 
 
 
1058
  if bbox.size == 0:
1059
  continue
1060
  t_iou = time.perf_counter()
@@ -1062,16 +1190,58 @@ def infer(video_file):
1062
  dt_iou = time.perf_counter() - t_iou
1063
  split_iou += dt_iou
1064
  iou_time += dt_iou
1065
- if (ious > 0).any():
1066
  match_idx = int(np.argmax(ious[0]))
1067
- subtitle = f"segment {split_idx + 1} / detection {det_idx + 1}"
1068
- t_draw = time.perf_counter()
1069
- outputs.append(_draw_detections(frame, bbox[match_idx : match_idx + 1], subtitle=subtitle))
1070
- dt_draw = time.perf_counter() - t_draw
1071
- split_draw += dt_draw
1072
- draw_time += dt_draw
1073
  break
1074
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1075
  split_elapsed = time.perf_counter() - split_t0
1076
  split_loop_time += split_elapsed
1077
  LOGGER.info(
@@ -1093,10 +1263,15 @@ def infer(video_file):
1093
  timing["combine_predictions"] = combine_time
1094
  timing["iou_matching"] = iou_time
1095
  timing["draw_detections"] = draw_time
 
1096
  timing["wall"] = time.perf_counter() - wall_t0
1097
  _log_timing_summary("Inference", timing, wall_time=timing["wall"])
1098
- LOGGER.info("Inference done | output_images=%d", len(outputs))
1099
- return outputs
 
 
 
 
1100
 
1101
 
1102
  def _upload_signature(uploaded_file):
@@ -1114,14 +1289,34 @@ def _write_uploaded_video(uploaded_file):
1114
 
1115
 
1116
  def _render_outputs(outputs):
1117
- if not outputs:
1118
- st.warning("Aucune detection d'incendie trouvee dans cette video.")
1119
- return
 
 
1120
 
1121
- st.subheader("Incendies detectes")
1122
- columns = st.columns(2)
1123
- for idx, image in enumerate(outputs):
1124
- columns[idx % 2].image(image, caption=f"Detection {idx + 1}", use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1125
 
1126
 
1127
  def main():
 
52
  MAX_INFER_FRAMES_PER_SPLIT = max(0, int(os.getenv("MAX_INFER_FRAMES_PER_SPLIT", "12")))
53
  MIN_MAIN_MATCH_ABS = max(1, int(os.getenv("MIN_MAIN_MATCH_ABS", "3")))
54
  MIN_MAIN_MATCH_RATIO = float(os.getenv("MIN_MAIN_MATCH_RATIO", "0.20"))
55
+ MAIN_DET_MATCH_IOU_THRESHOLD = float(os.getenv("MAIN_DET_MATCH_IOU_THRESHOLD", "0.12"))
56
+ MIN_COMBINED_MEDIAN_CONF = float(os.getenv("MIN_COMBINED_MEDIAN_CONF", "0.12"))
57
+ DISPLAY_DET_MATCH_IOU_THRESHOLD = float(os.getenv("DISPLAY_DET_MATCH_IOU_THRESHOLD", "0.0"))
58
 
59
 
60
  def _log_timing_summary(label, stats, wall_time=None, max_items=12):
 
937
  def _combine_predictions_per_split(frame_preds):
938
  n_frames = len(frame_preds)
939
  if n_frames == 0:
940
+ return []
941
 
942
  boxes = np.zeros((0, 5), dtype=np.float64)
943
  for bbox in frame_preds:
 
945
  boxes = np.vstack([boxes, bbox])
946
 
947
  if boxes.size == 0:
948
+ return []
949
 
950
  main_bboxes = np.asarray(nms(boxes), dtype=np.float64)
951
  if main_bboxes.size == 0:
952
+ return []
953
 
954
+ n_main = len(main_bboxes)
955
+ matches_per_main = np.zeros(n_main, dtype=int)
956
+ conf_max_per_main = np.zeros(n_main, dtype=np.float64)
957
+ matched_conf_values_per_main = [[] for _ in range(n_main)]
958
+ matched_frame_indices_per_main = [[] for _ in range(n_main)]
959
+ first_match_frame_idx_per_main = [None for _ in range(n_main)]
960
+ first_match_bbox_per_main = [None for _ in range(n_main)]
961
+ for frame_idx, bbox in enumerate(frame_preds):
962
  if bbox.size == 0:
963
  continue
964
  ious = box_iou(bbox[:, :4], main_bboxes[:, :4])
965
+ match_mask = ious >= MAIN_DET_MATCH_IOU_THRESHOLD
966
+ has_match = match_mask.any(axis=1)
967
+ matches_per_main += has_match.astype(int)
968
+ if np.any(has_match):
969
+ # Keep only one bbox per frame for each main bbox (best IoU among matches).
970
+ masked_ious = np.where(match_mask, ious, -1.0)
971
+ best_idx_per_main = np.argmax(masked_ious, axis=1)
972
+ best_conf_per_main = bbox[best_idx_per_main, 4].astype(np.float64)
973
+ matched_conf = np.where(has_match, best_conf_per_main, 0.0)
974
+ conf_max_per_main = np.maximum(conf_max_per_main, matched_conf)
975
+ for main_idx in np.flatnonzero(has_match):
976
+ matched_conf_values_per_main[main_idx].append(float(best_conf_per_main[main_idx]))
977
+ matched_frame_indices_per_main[main_idx].append(int(frame_idx))
978
+ if first_match_frame_idx_per_main[main_idx] is None:
979
+ first_match_frame_idx_per_main[main_idx] = int(frame_idx)
980
+ first_match_bbox_per_main[main_idx] = np.asarray(
981
+ bbox[int(best_idx_per_main[main_idx])], dtype=np.float64
982
+ ).copy()
983
 
984
  required_matches = max(MIN_MAIN_MATCH_ABS, int(np.ceil(float(MIN_MAIN_MATCH_RATIO) * n_frames)))
985
  keep_main = matches_per_main >= required_matches
986
+ if not np.any(keep_main):
987
+ return []
988
+
989
+ kept = []
990
+ for idx in np.flatnonzero(keep_main):
991
+ match_count = int(matches_per_main[idx])
992
+ matched_conf_values = matched_conf_values_per_main[idx]
993
+ median_conf = (
994
+ float(np.median(np.asarray(matched_conf_values, dtype=np.float64))) if matched_conf_values else 0.0
995
+ )
996
+ if median_conf < MIN_COMBINED_MEDIAN_CONF:
997
+ LOGGER.info(
998
+ (
999
+ "Combine drop candidate | matches=%d/%d (required=%d) | "
1000
+ "median_conf=%.2f < min_combined_median_conf=%.2f"
1001
+ ),
1002
+ match_count,
1003
+ n_frames,
1004
+ required_matches,
1005
+ median_conf,
1006
+ MIN_COMBINED_MEDIAN_CONF,
1007
+ )
1008
+ continue
1009
+ kept.append(
1010
+ {
1011
+ "box": main_bboxes[idx],
1012
+ "match_count": match_count,
1013
+ "n_frames": int(n_frames),
1014
+ "required_matches": int(required_matches),
1015
+ "match_ratio": float(match_count / max(n_frames, 1)),
1016
+ "median_conf": median_conf,
1017
+ "max_conf": float(conf_max_per_main[idx]),
1018
+ "matched_conf_values": matched_conf_values,
1019
+ "matched_frame_indices": matched_frame_indices_per_main[idx],
1020
+ "first_match_frame_idx": first_match_frame_idx_per_main[idx],
1021
+ "first_match_bbox": first_match_bbox_per_main[idx],
1022
+ }
1023
+ )
1024
+ return kept
1025
 
1026
 
1027
  def infer(video_file):
 
1034
  LOGGER.info(
1035
  (
1036
  "Inference config | batch_size=%d motion_segmentation=%s fast_n_samples=%d "
1037
+ "max_infer_frames_per_split=%d min_main_match_abs=%d min_main_match_ratio=%.2f "
1038
+ "main_det_match_iou_threshold=%.2f min_combined_median_conf=%.2f "
1039
+ "display_det_match_iou_threshold=%.2f"
1040
  ),
1041
  INFER_BATCH_SIZE,
1042
  ENABLE_MOTION_SEGMENTATION,
 
1044
  MAX_INFER_FRAMES_PER_SPLIT,
1045
  MIN_MAIN_MATCH_ABS,
1046
  MIN_MAIN_MATCH_RATIO,
1047
+ MAIN_DET_MATCH_IOU_THRESHOLD,
1048
+ MIN_COMBINED_MEDIAN_CONF,
1049
+ DISPLAY_DET_MATCH_IOU_THRESHOLD,
1050
  )
1051
  with timer("prepare_splits", timing):
1052
  if ENABLE_MOTION_SEGMENTATION:
 
1060
  LOGGER.info("Inference stop | no frames available")
1061
  timing["wall"] = time.perf_counter() - wall_t0
1062
  _log_timing_summary("Inference", timing, wall_time=timing["wall"])
1063
+ return {"detections": [], "all_frame_predictions": []}
1064
 
1065
  outputs = []
1066
+ all_frame_predictions = []
1067
  infer_model = 0.0
1068
  combine_time = 0.0
1069
  iou_time = 0.0
1070
  draw_time = 0.0
1071
+ draw_all_frames_time = 0.0
1072
  split_loop_time = 0.0
1073
  for split_idx, frames in enumerate(split_frames):
1074
  split_t0 = time.perf_counter()
 
1089
  else:
1090
  frame_preds = [model(frame) for frame in frames_for_infer]
1091
  frame_preds = [np.asarray(bbox, dtype=np.float64).reshape(-1, 5) for bbox in frame_preds]
1092
+ for frame_idx, bbox in enumerate(frame_preds):
1093
+ if bbox.size == 0:
1094
+ LOGGER.info(
1095
+ "Inference split %d frame %d | detections=0",
1096
+ split_idx + 1,
1097
+ frame_idx + 1,
1098
+ )
1099
+ continue
1100
+ confs = bbox[:, 4].astype(np.float64)
1101
+ conf_list_txt = ", ".join(f"{float(c):.2f}" for c in confs.tolist())
1102
+ LOGGER.info(
1103
+ (
1104
+ "Inference split %d frame %d | detections=%d | confs=[%s] | "
1105
+ "frame_max_conf=%.2f | frame_mean_conf_all_bboxes=%.2f"
1106
+ ),
1107
+ split_idx + 1,
1108
+ frame_idx + 1,
1109
+ len(bbox),
1110
+ conf_list_txt,
1111
+ float(np.max(confs)),
1112
+ float(np.mean(confs)),
1113
+ )
1114
+ for frame_idx, (frame, bbox) in enumerate(zip(frames_for_infer, frame_preds)):
1115
+ subtitle = f"segment {split_idx + 1} / frame {frame_idx + 1}"
1116
+ t_draw_all = time.perf_counter()
1117
+ all_frame_predictions.append(
1118
+ {
1119
+ "image": _draw_detections(frame, bbox, subtitle=subtitle),
1120
+ "caption": f"Segment {split_idx + 1} - Frame {frame_idx + 1}",
1121
+ }
1122
+ )
1123
+ draw_all_frames_time += time.perf_counter() - t_draw_all
1124
 
1125
  split_model = time.perf_counter() - t_model
1126
  infer_model += split_model
 
1136
  split_idx + 1,
1137
  len(kept_main),
1138
  )
1139
+ for det_idx, det_info in enumerate(kept_main):
1140
+ conf_values_txt = ", ".join(f"{float(c):.2f}" for c in det_info["matched_conf_values"])
1141
+ frame_indices_txt = ", ".join(str(int(i) + 1) for i in det_info["matched_frame_indices"])
1142
+ LOGGER.info(
1143
+ (
1144
+ "Inference split %d combined detection %d | matches=%d/%d "
1145
+ "(required=%d, ratio=%.2f) | combine_median_conf=%.2f | combine_max_conf=%.2f | "
1146
+ "matched_frames=[%s] | matched_confs=[%s]"
1147
+ ),
1148
+ split_idx + 1,
1149
+ det_idx + 1,
1150
+ det_info["match_count"],
1151
+ det_info["n_frames"],
1152
+ det_info["required_matches"],
1153
+ det_info["match_ratio"],
1154
+ det_info["median_conf"],
1155
+ det_info["max_conf"],
1156
+ frame_indices_txt,
1157
+ conf_values_txt,
1158
+ )
1159
+ if not kept_main:
1160
  split_elapsed = time.perf_counter() - split_t0
1161
  split_loop_time += split_elapsed
1162
  LOGGER.info(
 
1174
  )
1175
  continue
1176
 
1177
+ for det_idx, det_info in enumerate(kept_main):
1178
+ main_box = det_info["box"]
1179
+ selected_frame_idx = None
1180
+ selected_bbox = None
1181
+ selection_source = None
1182
+
1183
+ # Prefer the earliest frame that overlaps the combined detection, using a relaxed
1184
+ # threshold for display (so we show the first visible appearance of the event).
1185
+ for frame_idx, bbox in enumerate(frame_preds):
1186
  if bbox.size == 0:
1187
  continue
1188
  t_iou = time.perf_counter()
 
1190
  dt_iou = time.perf_counter() - t_iou
1191
  split_iou += dt_iou
1192
  iou_time += dt_iou
1193
+ if (ious > DISPLAY_DET_MATCH_IOU_THRESHOLD).any():
1194
  match_idx = int(np.argmax(ious[0]))
1195
+ selected_frame_idx = int(frame_idx)
1196
+ selected_bbox = np.asarray(bbox[match_idx], dtype=np.float64).reshape(1, 5)
1197
+ selection_source = "display_first_overlap"
 
 
 
1198
  break
1199
 
1200
+ first_match_frame_idx = det_info.get("first_match_frame_idx")
1201
+ first_match_bbox = det_info.get("first_match_bbox")
1202
+ if selected_frame_idx is None or selected_bbox is None:
1203
+ if (
1204
+ first_match_frame_idx is None
1205
+ or first_match_bbox is None
1206
+ or int(first_match_frame_idx) < 0
1207
+ or int(first_match_frame_idx) >= len(frames_for_infer)
1208
+ ):
1209
+ LOGGER.warning(
1210
+ "Inference split %d detection %d | missing display frame and first matched frame/bbox",
1211
+ split_idx + 1,
1212
+ det_idx + 1,
1213
+ )
1214
+ continue
1215
+ selected_frame_idx = int(first_match_frame_idx)
1216
+ selected_bbox = np.asarray(first_match_bbox, dtype=np.float64).reshape(1, 5)
1217
+ selection_source = "combine_first_match_fallback"
1218
+
1219
+ frame = frames_for_infer[selected_frame_idx]
1220
+ LOGGER.info(
1221
+ (
1222
+ "Inference split %d detection %d | selected_frame=%d | source=%s | "
1223
+ "selected frame_conf=%.2f | combine_median_conf=%.2f | combine_max_conf=%.2f"
1224
+ ),
1225
+ split_idx + 1,
1226
+ det_idx + 1,
1227
+ selected_frame_idx + 1,
1228
+ selection_source,
1229
+ float(selected_bbox[0, 4]),
1230
+ det_info["median_conf"],
1231
+ det_info["max_conf"],
1232
+ )
1233
+ subtitle = (
1234
+ f"segment {split_idx + 1} / detection {det_idx + 1} | "
1235
+ f"frame {selected_frame_idx + 1} | "
1236
+ f"matchs {det_info['match_count']}/{det_info['n_frames']} | "
1237
+ f"conf_med {det_info['median_conf']:.2f}"
1238
+ )
1239
+ t_draw = time.perf_counter()
1240
+ outputs.append(_draw_detections(frame, selected_bbox, subtitle=subtitle))
1241
+ dt_draw = time.perf_counter() - t_draw
1242
+ split_draw += dt_draw
1243
+ draw_time += dt_draw
1244
+
1245
  split_elapsed = time.perf_counter() - split_t0
1246
  split_loop_time += split_elapsed
1247
  LOGGER.info(
 
1263
  timing["combine_predictions"] = combine_time
1264
  timing["iou_matching"] = iou_time
1265
  timing["draw_detections"] = draw_time
1266
+ timing["draw_all_frame_predictions"] = draw_all_frames_time
1267
  timing["wall"] = time.perf_counter() - wall_t0
1268
  _log_timing_summary("Inference", timing, wall_time=timing["wall"])
1269
+ LOGGER.info(
1270
+ "Inference done | output_images=%d all_frame_prediction_images=%d",
1271
+ len(outputs),
1272
+ len(all_frame_predictions),
1273
+ )
1274
+ return {"detections": outputs, "all_frame_predictions": all_frame_predictions}
1275
 
1276
 
1277
  def _upload_signature(uploaded_file):
 
1289
 
1290
 
1291
  def _render_outputs(outputs):
1292
+ detections = outputs
1293
+ all_frame_predictions = []
1294
+ if isinstance(outputs, dict):
1295
+ detections = outputs.get("detections", [])
1296
+ all_frame_predictions = outputs.get("all_frame_predictions", [])
1297
 
1298
+ if not detections:
1299
+ st.warning("Aucune detection d'incendie trouvee dans cette video.")
1300
+ else:
1301
+ st.subheader("Incendies detectes")
1302
+ columns = st.columns(2)
1303
+ for idx, image in enumerate(detections):
1304
+ columns[idx % 2].image(image, caption=f"Detection {idx + 1}", use_container_width=True)
1305
+
1306
+ # if all_frame_predictions:
1307
+ # with st.expander(
1308
+ # f"Predictions sur toutes les frames echantillonnees ({len(all_frame_predictions)})",
1309
+ # expanded=False,
1310
+ # ):
1311
+ # columns = st.columns(2)
1312
+ # for idx, item in enumerate(all_frame_predictions):
1313
+ # image = item["image"] if isinstance(item, dict) else item
1314
+ # caption = (
1315
+ # item.get("caption", f"Frame {idx + 1}")
1316
+ # if isinstance(item, dict)
1317
+ # else f"Frame {idx + 1}"
1318
+ # )
1319
+ # columns[idx % 2].image(image, caption=caption, use_container_width=True)
1320
 
1321
 
1322
  def main():