Spaces:
Build error
Build error
filter detection by filter
Browse files
app.py
CHANGED
|
@@ -52,6 +52,9 @@ MODEL_IMGSZ = max(320, int(os.getenv("MODEL_IMGSZ", "1024")))
|
|
| 52 |
MAX_INFER_FRAMES_PER_SPLIT = max(0, int(os.getenv("MAX_INFER_FRAMES_PER_SPLIT", "12")))
|
| 53 |
MIN_MAIN_MATCH_ABS = max(1, int(os.getenv("MIN_MAIN_MATCH_ABS", "3")))
|
| 54 |
MIN_MAIN_MATCH_RATIO = float(os.getenv("MIN_MAIN_MATCH_RATIO", "0.20"))
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def _log_timing_summary(label, stats, wall_time=None, max_items=12):
|
|
@@ -934,7 +937,7 @@ def _draw_detections(pil_img, preds, subtitle=None):
|
|
| 934 |
def _combine_predictions_per_split(frame_preds):
|
| 935 |
n_frames = len(frame_preds)
|
| 936 |
if n_frames == 0:
|
| 937 |
-
return
|
| 938 |
|
| 939 |
boxes = np.zeros((0, 5), dtype=np.float64)
|
| 940 |
for bbox in frame_preds:
|
|
@@ -942,24 +945,83 @@ def _combine_predictions_per_split(frame_preds):
|
|
| 942 |
boxes = np.vstack([boxes, bbox])
|
| 943 |
|
| 944 |
if boxes.size == 0:
|
| 945 |
-
return
|
| 946 |
|
| 947 |
main_bboxes = np.asarray(nms(boxes), dtype=np.float64)
|
| 948 |
if main_bboxes.size == 0:
|
| 949 |
-
return
|
| 950 |
|
| 951 |
-
|
| 952 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 953 |
if bbox.size == 0:
|
| 954 |
continue
|
| 955 |
ious = box_iou(bbox[:, :4], main_bboxes[:, :4])
|
| 956 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 957 |
|
| 958 |
required_matches = max(MIN_MAIN_MATCH_ABS, int(np.ceil(float(MIN_MAIN_MATCH_RATIO) * n_frames)))
|
| 959 |
keep_main = matches_per_main >= required_matches
|
| 960 |
-
if np.any(keep_main):
|
| 961 |
-
return
|
| 962 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 963 |
|
| 964 |
|
| 965 |
def infer(video_file):
|
|
@@ -972,7 +1034,9 @@ def infer(video_file):
|
|
| 972 |
LOGGER.info(
|
| 973 |
(
|
| 974 |
"Inference config | batch_size=%d motion_segmentation=%s fast_n_samples=%d "
|
| 975 |
-
"max_infer_frames_per_split=%d min_main_match_abs=%d min_main_match_ratio=%.2f"
|
|
|
|
|
|
|
| 976 |
),
|
| 977 |
INFER_BATCH_SIZE,
|
| 978 |
ENABLE_MOTION_SEGMENTATION,
|
|
@@ -980,6 +1044,9 @@ def infer(video_file):
|
|
| 980 |
MAX_INFER_FRAMES_PER_SPLIT,
|
| 981 |
MIN_MAIN_MATCH_ABS,
|
| 982 |
MIN_MAIN_MATCH_RATIO,
|
|
|
|
|
|
|
|
|
|
| 983 |
)
|
| 984 |
with timer("prepare_splits", timing):
|
| 985 |
if ENABLE_MOTION_SEGMENTATION:
|
|
@@ -993,13 +1060,15 @@ def infer(video_file):
|
|
| 993 |
LOGGER.info("Inference stop | no frames available")
|
| 994 |
timing["wall"] = time.perf_counter() - wall_t0
|
| 995 |
_log_timing_summary("Inference", timing, wall_time=timing["wall"])
|
| 996 |
-
return []
|
| 997 |
|
| 998 |
outputs = []
|
|
|
|
| 999 |
infer_model = 0.0
|
| 1000 |
combine_time = 0.0
|
| 1001 |
iou_time = 0.0
|
| 1002 |
draw_time = 0.0
|
|
|
|
| 1003 |
split_loop_time = 0.0
|
| 1004 |
for split_idx, frames in enumerate(split_frames):
|
| 1005 |
split_t0 = time.perf_counter()
|
|
@@ -1020,6 +1089,38 @@ def infer(video_file):
|
|
| 1020 |
else:
|
| 1021 |
frame_preds = [model(frame) for frame in frames_for_infer]
|
| 1022 |
frame_preds = [np.asarray(bbox, dtype=np.float64).reshape(-1, 5) for bbox in frame_preds]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1023 |
|
| 1024 |
split_model = time.perf_counter() - t_model
|
| 1025 |
infer_model += split_model
|
|
@@ -1035,7 +1136,27 @@ def infer(video_file):
|
|
| 1035 |
split_idx + 1,
|
| 1036 |
len(kept_main),
|
| 1037 |
)
|
| 1038 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1039 |
split_elapsed = time.perf_counter() - split_t0
|
| 1040 |
split_loop_time += split_elapsed
|
| 1041 |
LOGGER.info(
|
|
@@ -1053,8 +1174,15 @@ def infer(video_file):
|
|
| 1053 |
)
|
| 1054 |
continue
|
| 1055 |
|
| 1056 |
-
for det_idx,
|
| 1057 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1058 |
if bbox.size == 0:
|
| 1059 |
continue
|
| 1060 |
t_iou = time.perf_counter()
|
|
@@ -1062,16 +1190,58 @@ def infer(video_file):
|
|
| 1062 |
dt_iou = time.perf_counter() - t_iou
|
| 1063 |
split_iou += dt_iou
|
| 1064 |
iou_time += dt_iou
|
| 1065 |
-
if (ious >
|
| 1066 |
match_idx = int(np.argmax(ious[0]))
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
dt_draw = time.perf_counter() - t_draw
|
| 1071 |
-
split_draw += dt_draw
|
| 1072 |
-
draw_time += dt_draw
|
| 1073 |
break
|
| 1074 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1075 |
split_elapsed = time.perf_counter() - split_t0
|
| 1076 |
split_loop_time += split_elapsed
|
| 1077 |
LOGGER.info(
|
|
@@ -1093,10 +1263,15 @@ def infer(video_file):
|
|
| 1093 |
timing["combine_predictions"] = combine_time
|
| 1094 |
timing["iou_matching"] = iou_time
|
| 1095 |
timing["draw_detections"] = draw_time
|
|
|
|
| 1096 |
timing["wall"] = time.perf_counter() - wall_t0
|
| 1097 |
_log_timing_summary("Inference", timing, wall_time=timing["wall"])
|
| 1098 |
-
LOGGER.info(
|
| 1099 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1100 |
|
| 1101 |
|
| 1102 |
def _upload_signature(uploaded_file):
|
|
@@ -1114,14 +1289,34 @@ def _write_uploaded_video(uploaded_file):
|
|
| 1114 |
|
| 1115 |
|
| 1116 |
def _render_outputs(outputs):
|
| 1117 |
-
|
| 1118 |
-
|
| 1119 |
-
|
|
|
|
|
|
|
| 1120 |
|
| 1121 |
-
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1125 |
|
| 1126 |
|
| 1127 |
def main():
|
|
|
|
| 52 |
MAX_INFER_FRAMES_PER_SPLIT = max(0, int(os.getenv("MAX_INFER_FRAMES_PER_SPLIT", "12")))
|
| 53 |
MIN_MAIN_MATCH_ABS = max(1, int(os.getenv("MIN_MAIN_MATCH_ABS", "3")))
|
| 54 |
MIN_MAIN_MATCH_RATIO = float(os.getenv("MIN_MAIN_MATCH_RATIO", "0.20"))
|
| 55 |
+
MAIN_DET_MATCH_IOU_THRESHOLD = float(os.getenv("MAIN_DET_MATCH_IOU_THRESHOLD", "0.12"))
|
| 56 |
+
MIN_COMBINED_MEDIAN_CONF = float(os.getenv("MIN_COMBINED_MEDIAN_CONF", "0.12"))
|
| 57 |
+
DISPLAY_DET_MATCH_IOU_THRESHOLD = float(os.getenv("DISPLAY_DET_MATCH_IOU_THRESHOLD", "0.0"))
|
| 58 |
|
| 59 |
|
| 60 |
def _log_timing_summary(label, stats, wall_time=None, max_items=12):
|
|
|
|
| 937 |
def _combine_predictions_per_split(frame_preds):
|
| 938 |
n_frames = len(frame_preds)
|
| 939 |
if n_frames == 0:
|
| 940 |
+
return []
|
| 941 |
|
| 942 |
boxes = np.zeros((0, 5), dtype=np.float64)
|
| 943 |
for bbox in frame_preds:
|
|
|
|
| 945 |
boxes = np.vstack([boxes, bbox])
|
| 946 |
|
| 947 |
if boxes.size == 0:
|
| 948 |
+
return []
|
| 949 |
|
| 950 |
main_bboxes = np.asarray(nms(boxes), dtype=np.float64)
|
| 951 |
if main_bboxes.size == 0:
|
| 952 |
+
return []
|
| 953 |
|
| 954 |
+
n_main = len(main_bboxes)
|
| 955 |
+
matches_per_main = np.zeros(n_main, dtype=int)
|
| 956 |
+
conf_max_per_main = np.zeros(n_main, dtype=np.float64)
|
| 957 |
+
matched_conf_values_per_main = [[] for _ in range(n_main)]
|
| 958 |
+
matched_frame_indices_per_main = [[] for _ in range(n_main)]
|
| 959 |
+
first_match_frame_idx_per_main = [None for _ in range(n_main)]
|
| 960 |
+
first_match_bbox_per_main = [None for _ in range(n_main)]
|
| 961 |
+
for frame_idx, bbox in enumerate(frame_preds):
|
| 962 |
if bbox.size == 0:
|
| 963 |
continue
|
| 964 |
ious = box_iou(bbox[:, :4], main_bboxes[:, :4])
|
| 965 |
+
match_mask = ious >= MAIN_DET_MATCH_IOU_THRESHOLD
|
| 966 |
+
has_match = match_mask.any(axis=1)
|
| 967 |
+
matches_per_main += has_match.astype(int)
|
| 968 |
+
if np.any(has_match):
|
| 969 |
+
# Keep only one bbox per frame for each main bbox (best IoU among matches).
|
| 970 |
+
masked_ious = np.where(match_mask, ious, -1.0)
|
| 971 |
+
best_idx_per_main = np.argmax(masked_ious, axis=1)
|
| 972 |
+
best_conf_per_main = bbox[best_idx_per_main, 4].astype(np.float64)
|
| 973 |
+
matched_conf = np.where(has_match, best_conf_per_main, 0.0)
|
| 974 |
+
conf_max_per_main = np.maximum(conf_max_per_main, matched_conf)
|
| 975 |
+
for main_idx in np.flatnonzero(has_match):
|
| 976 |
+
matched_conf_values_per_main[main_idx].append(float(best_conf_per_main[main_idx]))
|
| 977 |
+
matched_frame_indices_per_main[main_idx].append(int(frame_idx))
|
| 978 |
+
if first_match_frame_idx_per_main[main_idx] is None:
|
| 979 |
+
first_match_frame_idx_per_main[main_idx] = int(frame_idx)
|
| 980 |
+
first_match_bbox_per_main[main_idx] = np.asarray(
|
| 981 |
+
bbox[int(best_idx_per_main[main_idx])], dtype=np.float64
|
| 982 |
+
).copy()
|
| 983 |
|
| 984 |
required_matches = max(MIN_MAIN_MATCH_ABS, int(np.ceil(float(MIN_MAIN_MATCH_RATIO) * n_frames)))
|
| 985 |
keep_main = matches_per_main >= required_matches
|
| 986 |
+
if not np.any(keep_main):
|
| 987 |
+
return []
|
| 988 |
+
|
| 989 |
+
kept = []
|
| 990 |
+
for idx in np.flatnonzero(keep_main):
|
| 991 |
+
match_count = int(matches_per_main[idx])
|
| 992 |
+
matched_conf_values = matched_conf_values_per_main[idx]
|
| 993 |
+
median_conf = (
|
| 994 |
+
float(np.median(np.asarray(matched_conf_values, dtype=np.float64))) if matched_conf_values else 0.0
|
| 995 |
+
)
|
| 996 |
+
if median_conf < MIN_COMBINED_MEDIAN_CONF:
|
| 997 |
+
LOGGER.info(
|
| 998 |
+
(
|
| 999 |
+
"Combine drop candidate | matches=%d/%d (required=%d) | "
|
| 1000 |
+
"median_conf=%.2f < min_combined_median_conf=%.2f"
|
| 1001 |
+
),
|
| 1002 |
+
match_count,
|
| 1003 |
+
n_frames,
|
| 1004 |
+
required_matches,
|
| 1005 |
+
median_conf,
|
| 1006 |
+
MIN_COMBINED_MEDIAN_CONF,
|
| 1007 |
+
)
|
| 1008 |
+
continue
|
| 1009 |
+
kept.append(
|
| 1010 |
+
{
|
| 1011 |
+
"box": main_bboxes[idx],
|
| 1012 |
+
"match_count": match_count,
|
| 1013 |
+
"n_frames": int(n_frames),
|
| 1014 |
+
"required_matches": int(required_matches),
|
| 1015 |
+
"match_ratio": float(match_count / max(n_frames, 1)),
|
| 1016 |
+
"median_conf": median_conf,
|
| 1017 |
+
"max_conf": float(conf_max_per_main[idx]),
|
| 1018 |
+
"matched_conf_values": matched_conf_values,
|
| 1019 |
+
"matched_frame_indices": matched_frame_indices_per_main[idx],
|
| 1020 |
+
"first_match_frame_idx": first_match_frame_idx_per_main[idx],
|
| 1021 |
+
"first_match_bbox": first_match_bbox_per_main[idx],
|
| 1022 |
+
}
|
| 1023 |
+
)
|
| 1024 |
+
return kept
|
| 1025 |
|
| 1026 |
|
| 1027 |
def infer(video_file):
|
|
|
|
| 1034 |
LOGGER.info(
|
| 1035 |
(
|
| 1036 |
"Inference config | batch_size=%d motion_segmentation=%s fast_n_samples=%d "
|
| 1037 |
+
"max_infer_frames_per_split=%d min_main_match_abs=%d min_main_match_ratio=%.2f "
|
| 1038 |
+
"main_det_match_iou_threshold=%.2f min_combined_median_conf=%.2f "
|
| 1039 |
+
"display_det_match_iou_threshold=%.2f"
|
| 1040 |
),
|
| 1041 |
INFER_BATCH_SIZE,
|
| 1042 |
ENABLE_MOTION_SEGMENTATION,
|
|
|
|
| 1044 |
MAX_INFER_FRAMES_PER_SPLIT,
|
| 1045 |
MIN_MAIN_MATCH_ABS,
|
| 1046 |
MIN_MAIN_MATCH_RATIO,
|
| 1047 |
+
MAIN_DET_MATCH_IOU_THRESHOLD,
|
| 1048 |
+
MIN_COMBINED_MEDIAN_CONF,
|
| 1049 |
+
DISPLAY_DET_MATCH_IOU_THRESHOLD,
|
| 1050 |
)
|
| 1051 |
with timer("prepare_splits", timing):
|
| 1052 |
if ENABLE_MOTION_SEGMENTATION:
|
|
|
|
| 1060 |
LOGGER.info("Inference stop | no frames available")
|
| 1061 |
timing["wall"] = time.perf_counter() - wall_t0
|
| 1062 |
_log_timing_summary("Inference", timing, wall_time=timing["wall"])
|
| 1063 |
+
return {"detections": [], "all_frame_predictions": []}
|
| 1064 |
|
| 1065 |
outputs = []
|
| 1066 |
+
all_frame_predictions = []
|
| 1067 |
infer_model = 0.0
|
| 1068 |
combine_time = 0.0
|
| 1069 |
iou_time = 0.0
|
| 1070 |
draw_time = 0.0
|
| 1071 |
+
draw_all_frames_time = 0.0
|
| 1072 |
split_loop_time = 0.0
|
| 1073 |
for split_idx, frames in enumerate(split_frames):
|
| 1074 |
split_t0 = time.perf_counter()
|
|
|
|
| 1089 |
else:
|
| 1090 |
frame_preds = [model(frame) for frame in frames_for_infer]
|
| 1091 |
frame_preds = [np.asarray(bbox, dtype=np.float64).reshape(-1, 5) for bbox in frame_preds]
|
| 1092 |
+
for frame_idx, bbox in enumerate(frame_preds):
|
| 1093 |
+
if bbox.size == 0:
|
| 1094 |
+
LOGGER.info(
|
| 1095 |
+
"Inference split %d frame %d | detections=0",
|
| 1096 |
+
split_idx + 1,
|
| 1097 |
+
frame_idx + 1,
|
| 1098 |
+
)
|
| 1099 |
+
continue
|
| 1100 |
+
confs = bbox[:, 4].astype(np.float64)
|
| 1101 |
+
conf_list_txt = ", ".join(f"{float(c):.2f}" for c in confs.tolist())
|
| 1102 |
+
LOGGER.info(
|
| 1103 |
+
(
|
| 1104 |
+
"Inference split %d frame %d | detections=%d | confs=[%s] | "
|
| 1105 |
+
"frame_max_conf=%.2f | frame_mean_conf_all_bboxes=%.2f"
|
| 1106 |
+
),
|
| 1107 |
+
split_idx + 1,
|
| 1108 |
+
frame_idx + 1,
|
| 1109 |
+
len(bbox),
|
| 1110 |
+
conf_list_txt,
|
| 1111 |
+
float(np.max(confs)),
|
| 1112 |
+
float(np.mean(confs)),
|
| 1113 |
+
)
|
| 1114 |
+
for frame_idx, (frame, bbox) in enumerate(zip(frames_for_infer, frame_preds)):
|
| 1115 |
+
subtitle = f"segment {split_idx + 1} / frame {frame_idx + 1}"
|
| 1116 |
+
t_draw_all = time.perf_counter()
|
| 1117 |
+
all_frame_predictions.append(
|
| 1118 |
+
{
|
| 1119 |
+
"image": _draw_detections(frame, bbox, subtitle=subtitle),
|
| 1120 |
+
"caption": f"Segment {split_idx + 1} - Frame {frame_idx + 1}",
|
| 1121 |
+
}
|
| 1122 |
+
)
|
| 1123 |
+
draw_all_frames_time += time.perf_counter() - t_draw_all
|
| 1124 |
|
| 1125 |
split_model = time.perf_counter() - t_model
|
| 1126 |
infer_model += split_model
|
|
|
|
| 1136 |
split_idx + 1,
|
| 1137 |
len(kept_main),
|
| 1138 |
)
|
| 1139 |
+
for det_idx, det_info in enumerate(kept_main):
|
| 1140 |
+
conf_values_txt = ", ".join(f"{float(c):.2f}" for c in det_info["matched_conf_values"])
|
| 1141 |
+
frame_indices_txt = ", ".join(str(int(i) + 1) for i in det_info["matched_frame_indices"])
|
| 1142 |
+
LOGGER.info(
|
| 1143 |
+
(
|
| 1144 |
+
"Inference split %d combined detection %d | matches=%d/%d "
|
| 1145 |
+
"(required=%d, ratio=%.2f) | combine_median_conf=%.2f | combine_max_conf=%.2f | "
|
| 1146 |
+
"matched_frames=[%s] | matched_confs=[%s]"
|
| 1147 |
+
),
|
| 1148 |
+
split_idx + 1,
|
| 1149 |
+
det_idx + 1,
|
| 1150 |
+
det_info["match_count"],
|
| 1151 |
+
det_info["n_frames"],
|
| 1152 |
+
det_info["required_matches"],
|
| 1153 |
+
det_info["match_ratio"],
|
| 1154 |
+
det_info["median_conf"],
|
| 1155 |
+
det_info["max_conf"],
|
| 1156 |
+
frame_indices_txt,
|
| 1157 |
+
conf_values_txt,
|
| 1158 |
+
)
|
| 1159 |
+
if not kept_main:
|
| 1160 |
split_elapsed = time.perf_counter() - split_t0
|
| 1161 |
split_loop_time += split_elapsed
|
| 1162 |
LOGGER.info(
|
|
|
|
| 1174 |
)
|
| 1175 |
continue
|
| 1176 |
|
| 1177 |
+
for det_idx, det_info in enumerate(kept_main):
|
| 1178 |
+
main_box = det_info["box"]
|
| 1179 |
+
selected_frame_idx = None
|
| 1180 |
+
selected_bbox = None
|
| 1181 |
+
selection_source = None
|
| 1182 |
+
|
| 1183 |
+
# Prefer the earliest frame that overlaps the combined detection, using a relaxed
|
| 1184 |
+
# threshold for display (so we show the first visible appearance of the event).
|
| 1185 |
+
for frame_idx, bbox in enumerate(frame_preds):
|
| 1186 |
if bbox.size == 0:
|
| 1187 |
continue
|
| 1188 |
t_iou = time.perf_counter()
|
|
|
|
| 1190 |
dt_iou = time.perf_counter() - t_iou
|
| 1191 |
split_iou += dt_iou
|
| 1192 |
iou_time += dt_iou
|
| 1193 |
+
if (ious > DISPLAY_DET_MATCH_IOU_THRESHOLD).any():
|
| 1194 |
match_idx = int(np.argmax(ious[0]))
|
| 1195 |
+
selected_frame_idx = int(frame_idx)
|
| 1196 |
+
selected_bbox = np.asarray(bbox[match_idx], dtype=np.float64).reshape(1, 5)
|
| 1197 |
+
selection_source = "display_first_overlap"
|
|
|
|
|
|
|
|
|
|
| 1198 |
break
|
| 1199 |
|
| 1200 |
+
first_match_frame_idx = det_info.get("first_match_frame_idx")
|
| 1201 |
+
first_match_bbox = det_info.get("first_match_bbox")
|
| 1202 |
+
if selected_frame_idx is None or selected_bbox is None:
|
| 1203 |
+
if (
|
| 1204 |
+
first_match_frame_idx is None
|
| 1205 |
+
or first_match_bbox is None
|
| 1206 |
+
or int(first_match_frame_idx) < 0
|
| 1207 |
+
or int(first_match_frame_idx) >= len(frames_for_infer)
|
| 1208 |
+
):
|
| 1209 |
+
LOGGER.warning(
|
| 1210 |
+
"Inference split %d detection %d | missing display frame and first matched frame/bbox",
|
| 1211 |
+
split_idx + 1,
|
| 1212 |
+
det_idx + 1,
|
| 1213 |
+
)
|
| 1214 |
+
continue
|
| 1215 |
+
selected_frame_idx = int(first_match_frame_idx)
|
| 1216 |
+
selected_bbox = np.asarray(first_match_bbox, dtype=np.float64).reshape(1, 5)
|
| 1217 |
+
selection_source = "combine_first_match_fallback"
|
| 1218 |
+
|
| 1219 |
+
frame = frames_for_infer[selected_frame_idx]
|
| 1220 |
+
LOGGER.info(
|
| 1221 |
+
(
|
| 1222 |
+
"Inference split %d detection %d | selected_frame=%d | source=%s | "
|
| 1223 |
+
"selected frame_conf=%.2f | combine_median_conf=%.2f | combine_max_conf=%.2f"
|
| 1224 |
+
),
|
| 1225 |
+
split_idx + 1,
|
| 1226 |
+
det_idx + 1,
|
| 1227 |
+
selected_frame_idx + 1,
|
| 1228 |
+
selection_source,
|
| 1229 |
+
float(selected_bbox[0, 4]),
|
| 1230 |
+
det_info["median_conf"],
|
| 1231 |
+
det_info["max_conf"],
|
| 1232 |
+
)
|
| 1233 |
+
subtitle = (
|
| 1234 |
+
f"segment {split_idx + 1} / detection {det_idx + 1} | "
|
| 1235 |
+
f"frame {selected_frame_idx + 1} | "
|
| 1236 |
+
f"matchs {det_info['match_count']}/{det_info['n_frames']} | "
|
| 1237 |
+
f"conf_med {det_info['median_conf']:.2f}"
|
| 1238 |
+
)
|
| 1239 |
+
t_draw = time.perf_counter()
|
| 1240 |
+
outputs.append(_draw_detections(frame, selected_bbox, subtitle=subtitle))
|
| 1241 |
+
dt_draw = time.perf_counter() - t_draw
|
| 1242 |
+
split_draw += dt_draw
|
| 1243 |
+
draw_time += dt_draw
|
| 1244 |
+
|
| 1245 |
split_elapsed = time.perf_counter() - split_t0
|
| 1246 |
split_loop_time += split_elapsed
|
| 1247 |
LOGGER.info(
|
|
|
|
| 1263 |
timing["combine_predictions"] = combine_time
|
| 1264 |
timing["iou_matching"] = iou_time
|
| 1265 |
timing["draw_detections"] = draw_time
|
| 1266 |
+
timing["draw_all_frame_predictions"] = draw_all_frames_time
|
| 1267 |
timing["wall"] = time.perf_counter() - wall_t0
|
| 1268 |
_log_timing_summary("Inference", timing, wall_time=timing["wall"])
|
| 1269 |
+
LOGGER.info(
|
| 1270 |
+
"Inference done | output_images=%d all_frame_prediction_images=%d",
|
| 1271 |
+
len(outputs),
|
| 1272 |
+
len(all_frame_predictions),
|
| 1273 |
+
)
|
| 1274 |
+
return {"detections": outputs, "all_frame_predictions": all_frame_predictions}
|
| 1275 |
|
| 1276 |
|
| 1277 |
def _upload_signature(uploaded_file):
|
|
|
|
| 1289 |
|
| 1290 |
|
| 1291 |
def _render_outputs(outputs):
|
| 1292 |
+
detections = outputs
|
| 1293 |
+
all_frame_predictions = []
|
| 1294 |
+
if isinstance(outputs, dict):
|
| 1295 |
+
detections = outputs.get("detections", [])
|
| 1296 |
+
all_frame_predictions = outputs.get("all_frame_predictions", [])
|
| 1297 |
|
| 1298 |
+
if not detections:
|
| 1299 |
+
st.warning("Aucune detection d'incendie trouvee dans cette video.")
|
| 1300 |
+
else:
|
| 1301 |
+
st.subheader("Incendies detectes")
|
| 1302 |
+
columns = st.columns(2)
|
| 1303 |
+
for idx, image in enumerate(detections):
|
| 1304 |
+
columns[idx % 2].image(image, caption=f"Detection {idx + 1}", use_container_width=True)
|
| 1305 |
+
|
| 1306 |
+
# if all_frame_predictions:
|
| 1307 |
+
# with st.expander(
|
| 1308 |
+
# f"Predictions sur toutes les frames echantillonnees ({len(all_frame_predictions)})",
|
| 1309 |
+
# expanded=False,
|
| 1310 |
+
# ):
|
| 1311 |
+
# columns = st.columns(2)
|
| 1312 |
+
# for idx, item in enumerate(all_frame_predictions):
|
| 1313 |
+
# image = item["image"] if isinstance(item, dict) else item
|
| 1314 |
+
# caption = (
|
| 1315 |
+
# item.get("caption", f"Frame {idx + 1}")
|
| 1316 |
+
# if isinstance(item, dict)
|
| 1317 |
+
# else f"Frame {idx + 1}"
|
| 1318 |
+
# )
|
| 1319 |
+
# columns[idx % 2].image(image, caption=caption, use_container_width=True)
|
| 1320 |
|
| 1321 |
|
| 1322 |
def main():
|