Spaces:
Build error
Build error
Commit ·
f906054
1
Parent(s): 0f291a2
improve video generation and logging tqdm
Browse files- app.py +17 -6
- requirements.txt +1 -1
- tools/video_readers.py +26 -21
- tracking/utils.py +17 -18
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from typing import Dict, List, Tuple
|
|
| 8 |
|
| 9 |
import datetime
|
| 10 |
import logging
|
|
|
|
| 11 |
|
| 12 |
# imports for tracking
|
| 13 |
import torch
|
|
@@ -70,11 +71,14 @@ config_track = DotDict({
|
|
| 70 |
|
| 71 |
|
| 72 |
logger.info('---Yolo model...')
|
|
|
|
|
|
|
| 73 |
URL_MODEL = "https://github.com/surfriderfoundationeurope/IA_Pau/releases/download/v0.1/yolov5.pt"
|
| 74 |
FILE_MODEL = "yolov5.pt"
|
| 75 |
model_path = download_from_url(URL_MODEL, FILE_MODEL, logger, "./models")
|
| 76 |
model_yolo = load_model(model_path, config_track.device)
|
| 77 |
|
|
|
|
| 78 |
logger.info('---Centernet model...')
|
| 79 |
URL_MODEL = "https://partage.imt.fr/index.php/s/sJi22N6gedN6T4q/download"
|
| 80 |
FILE_MODEL = "mobilenet_v3_pretrained.pth"
|
|
@@ -132,8 +136,11 @@ def track(args):
|
|
| 132 |
detections = []
|
| 133 |
logger.info('---Detecting...')
|
| 134 |
if args.model_type == "yolo":
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
| 137 |
elif args.model_type == "centernet":
|
| 138 |
detections = get_detections_for_video(reader, detector, batch_size=args.detection_batch_size, device=device)
|
| 139 |
|
|
@@ -141,7 +148,7 @@ def track(args):
|
|
| 141 |
display = None
|
| 142 |
results = track_video(reader, iter(detections), args, engine, transition_variance, observation_variance, display, is_yolo=args.model_type=="yolo")
|
| 143 |
reader.video.release()
|
| 144 |
-
|
| 145 |
# store unfiltered results
|
| 146 |
datestr = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
|
| 147 |
output_filename = op.splitext(args.video_path)[0] + "_" + datestr + '_unfiltered.txt'
|
|
@@ -179,11 +186,15 @@ def run_model(video_path, model_type, seconds, skip, tau, kappa):
|
|
| 179 |
with open(output_json_path, 'w') as f_out:
|
| 180 |
json.dump(output_json, f_out)
|
| 181 |
|
| 182 |
-
#output_json={"detected_trash":[]}
|
| 183 |
# build video output
|
| 184 |
logger.info('---Generating new video...')
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
config_track.skip_frames, config_track.max_length,
|
| 188 |
config_track.downscale_output, logger)
|
| 189 |
|
|
|
|
| 8 |
|
| 9 |
import datetime
|
| 10 |
import logging
|
| 11 |
+
import warnings
|
| 12 |
|
| 13 |
# imports for tracking
|
| 14 |
import torch
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
logger.info('---Yolo model...')
|
| 74 |
+
# Yolo has warning problems, so we set an env variable to remove it
|
| 75 |
+
os.environ["VERBOSE"] = "False"
|
| 76 |
URL_MODEL = "https://github.com/surfriderfoundationeurope/IA_Pau/releases/download/v0.1/yolov5.pt"
|
| 77 |
FILE_MODEL = "yolov5.pt"
|
| 78 |
model_path = download_from_url(URL_MODEL, FILE_MODEL, logger, "./models")
|
| 79 |
model_yolo = load_model(model_path, config_track.device)
|
| 80 |
|
| 81 |
+
|
| 82 |
logger.info('---Centernet model...')
|
| 83 |
URL_MODEL = "https://partage.imt.fr/index.php/s/sJi22N6gedN6T4q/download"
|
| 84 |
FILE_MODEL = "mobilenet_v3_pretrained.pth"
|
|
|
|
| 136 |
detections = []
|
| 137 |
logger.info('---Detecting...')
|
| 138 |
if args.model_type == "yolo":
|
| 139 |
+
with warnings.catch_warnings():
|
| 140 |
+
warnings.filterwarnings("ignore")
|
| 141 |
+
|
| 142 |
+
for frame in reader:
|
| 143 |
+
detections.append(detector(frame))
|
| 144 |
elif args.model_type == "centernet":
|
| 145 |
detections = get_detections_for_video(reader, detector, batch_size=args.detection_batch_size, device=device)
|
| 146 |
|
|
|
|
| 148 |
display = None
|
| 149 |
results = track_video(reader, iter(detections), args, engine, transition_variance, observation_variance, display, is_yolo=args.model_type=="yolo")
|
| 150 |
reader.video.release()
|
| 151 |
+
|
| 152 |
# store unfiltered results
|
| 153 |
datestr = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
|
| 154 |
output_filename = op.splitext(args.video_path)[0] + "_" + datestr + '_unfiltered.txt'
|
|
|
|
| 186 |
with open(output_json_path, 'w') as f_out:
|
| 187 |
json.dump(output_json, f_out)
|
| 188 |
|
|
|
|
| 189 |
# build video output
|
| 190 |
logger.info('---Generating new video...')
|
| 191 |
+
reader = IterableFrameReader(video_filename=config_track.video_path,
|
| 192 |
+
skip_frames=0,
|
| 193 |
+
progress_bar=True,
|
| 194 |
+
preload=False,
|
| 195 |
+
max_frame=config_track.max_length)
|
| 196 |
+
|
| 197 |
+
generate_video_with_annotations(reader, output_json, output_path,
|
| 198 |
config_track.skip_frames, config_track.max_length,
|
| 199 |
config_track.downscale_output, logger)
|
| 200 |
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
scikit-image
|
|
|
|
| 2 |
opencv-python
|
| 3 |
torch
|
| 4 |
torchvision
|
|
@@ -9,7 +10,6 @@ tqdm
|
|
| 9 |
tensorboard
|
| 10 |
imgaug
|
| 11 |
psycopg2-binary
|
| 12 |
-
moviepy
|
| 13 |
git+https://github.com/pykalman/pykalman.git
|
| 14 |
yolov5
|
| 15 |
gradio
|
|
|
|
| 1 |
scikit-image
|
| 2 |
+
scikit-video
|
| 3 |
opencv-python
|
| 4 |
torch
|
| 5 |
torchvision
|
|
|
|
| 10 |
tensorboard
|
| 11 |
imgaug
|
| 12 |
psycopg2-binary
|
|
|
|
| 13 |
git+https://github.com/pykalman/pykalman.git
|
| 14 |
yolov5
|
| 15 |
gradio
|
tools/video_readers.py
CHANGED
|
@@ -3,6 +3,7 @@ import torch
|
|
| 3 |
from tqdm import tqdm
|
| 4 |
from itertools import cycle
|
| 5 |
|
|
|
|
| 6 |
class AdvancedFrameReader:
|
| 7 |
def __init__(self, video_name, read_every, rescale_factor, init_time_min, init_time_s):
|
| 8 |
|
|
@@ -18,7 +19,6 @@ class AdvancedFrameReader:
|
|
| 18 |
|
| 19 |
self.init_rescale_factor = rescale_factor
|
| 20 |
|
| 21 |
-
|
| 22 |
self.frame_skip = read_every - 1
|
| 23 |
self.fps = self.cap.get(cv2.CAP_PROP_FPS)/read_every
|
| 24 |
print(f'Reading at {self.fps:.2f} fps')
|
|
@@ -27,7 +27,6 @@ class AdvancedFrameReader:
|
|
| 27 |
self.init_frame = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
|
| 28 |
self.total_num_frames = self.cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
| 29 |
|
| 30 |
-
|
| 31 |
def post_process(self, ret, frame):
|
| 32 |
if ret:
|
| 33 |
if self.original_shape_mode:
|
|
@@ -55,7 +54,7 @@ class AdvancedFrameReader:
|
|
| 55 |
time = 60 * time_min + time_s
|
| 56 |
self.cap.set(cv2.CAP_PROP_POS_MSEC, 1000 * time)
|
| 57 |
self.init_frame = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
|
| 58 |
-
print('Reading from {}min{}sec'.format(time_min,time_s))
|
| 59 |
self.nb_frames_read = 0
|
| 60 |
|
| 61 |
def reset_init_frame(self):
|
|
@@ -72,7 +71,7 @@ class AdvancedFrameReader:
|
|
| 72 |
height = int(self.original_height/rescale_factor)
|
| 73 |
self.new_shape = (width, height)
|
| 74 |
self.original_shape_mode = False
|
| 75 |
-
print('Reading in {}x{}'.format(width, height))
|
| 76 |
|
| 77 |
def set_original_shape_mode(self, mode):
|
| 78 |
self.original_shape_mode = mode
|
|
@@ -83,17 +82,21 @@ class AdvancedFrameReader:
|
|
| 83 |
|
| 84 |
class IterableFrameReader:
|
| 85 |
def __init__(self, video_filename, skip_frames=0, output_shape=None, progress_bar=False, preload=False, max_frame=0):
|
|
|
|
| 86 |
self.video_filename = video_filename
|
| 87 |
self.max_frame_arg = max_frame
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
self.video = cv2.VideoCapture(video_filename)
|
| 90 |
-
self.input_shape = (self.video.get(cv2.CAP_PROP_FRAME_WIDTH)
|
| 91 |
-
|
| 92 |
-
self.
|
| 93 |
-
self.total_num_frames = self.video.get(cv2.CAP_PROP_FRAME_COUNT)
|
| 94 |
|
| 95 |
self.max_num_frames = min(max_frame, self.total_num_frames) if max_frame!=0 else self.total_num_frames
|
| 96 |
self.counter = 0
|
|
|
|
| 97 |
|
| 98 |
if output_shape is None:
|
| 99 |
w, h = self.input_shape
|
|
@@ -105,24 +108,27 @@ class IterableFrameReader:
|
|
| 105 |
|
| 106 |
self.fps = self.video.get(cv2.CAP_PROP_FPS) / (self.skip_frames+1)
|
| 107 |
|
| 108 |
-
print(f'Reading video at {self.fps}fps.')
|
| 109 |
-
if progress_bar:
|
| 110 |
-
self.progress_bar = tqdm(total=int(self.max_num_frames/(self.skip_frames+1)), leave=True)
|
| 111 |
-
self.progress_bar_update = self.progress_bar.update
|
| 112 |
-
else:
|
| 113 |
-
self.progress_bar_update = lambda: None
|
| 114 |
-
|
| 115 |
if self.preload:
|
| 116 |
-
print('Preloading frames in RAM...')
|
| 117 |
self.frames = self._load_all_frames()
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
def reset_video(self):
|
| 120 |
""" This method is needed as cv2.CAP_PROP_POS_FRAMES
|
| 121 |
does not work on all backends
|
| 122 |
"""
|
| 123 |
self.video.release()
|
|
|
|
| 124 |
self.__init__(self.video_filename, self.skip_frames, self.output_shape,
|
| 125 |
-
self.
|
| 126 |
|
| 127 |
def _load_all_frames(self):
|
| 128 |
frames = []
|
|
@@ -139,7 +145,7 @@ class IterableFrameReader:
|
|
| 139 |
if self.preload:
|
| 140 |
if self.counter < len(self.frames):
|
| 141 |
frame = self.frames[self.counter]
|
| 142 |
-
self.
|
| 143 |
return frame
|
| 144 |
else:
|
| 145 |
if self.counter < self.max_num_frames:
|
|
@@ -150,12 +156,11 @@ class IterableFrameReader:
|
|
| 150 |
self.reset_video()
|
| 151 |
raise StopIteration
|
| 152 |
|
| 153 |
-
|
| 154 |
def _read_frame(self):
|
| 155 |
ret, frame = self.video.read()
|
| 156 |
self._skip_frames()
|
| 157 |
if ret:
|
| 158 |
-
self.
|
| 159 |
frame = cv2.resize(frame, self.output_shape)
|
| 160 |
return ret, frame
|
| 161 |
|
|
@@ -191,8 +196,8 @@ class SimpleVideoReader:
|
|
| 191 |
for _ in range(self.skip_frames):
|
| 192 |
self.video.read()
|
| 193 |
|
| 194 |
-
class TorchIterableFromReader(torch.utils.data.IterableDataset):
|
| 195 |
|
|
|
|
| 196 |
def __init__(self, reader, transforms):
|
| 197 |
self.transforms = transforms
|
| 198 |
self.reader = reader
|
|
|
|
| 3 |
from tqdm import tqdm
|
| 4 |
from itertools import cycle
|
| 5 |
|
| 6 |
+
|
| 7 |
class AdvancedFrameReader:
|
| 8 |
def __init__(self, video_name, read_every, rescale_factor, init_time_min, init_time_s):
|
| 9 |
|
|
|
|
| 19 |
|
| 20 |
self.init_rescale_factor = rescale_factor
|
| 21 |
|
|
|
|
| 22 |
self.frame_skip = read_every - 1
|
| 23 |
self.fps = self.cap.get(cv2.CAP_PROP_FPS)/read_every
|
| 24 |
print(f'Reading at {self.fps:.2f} fps')
|
|
|
|
| 27 |
self.init_frame = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
|
| 28 |
self.total_num_frames = self.cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
| 29 |
|
|
|
|
| 30 |
def post_process(self, ret, frame):
|
| 31 |
if ret:
|
| 32 |
if self.original_shape_mode:
|
|
|
|
| 54 |
time = 60 * time_min + time_s
|
| 55 |
self.cap.set(cv2.CAP_PROP_POS_MSEC, 1000 * time)
|
| 56 |
self.init_frame = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
|
| 57 |
+
#print('Reading from {}min{}sec'.format(time_min,time_s))
|
| 58 |
self.nb_frames_read = 0
|
| 59 |
|
| 60 |
def reset_init_frame(self):
|
|
|
|
| 71 |
height = int(self.original_height/rescale_factor)
|
| 72 |
self.new_shape = (width, height)
|
| 73 |
self.original_shape_mode = False
|
| 74 |
+
#print('Reading in {}x{}'.format(width, height))
|
| 75 |
|
| 76 |
def set_original_shape_mode(self, mode):
|
| 77 |
self.original_shape_mode = mode
|
|
|
|
| 82 |
|
| 83 |
class IterableFrameReader:
|
| 84 |
def __init__(self, video_filename, skip_frames=0, output_shape=None, progress_bar=False, preload=False, max_frame=0):
|
| 85 |
+
# store arguments for reset
|
| 86 |
self.video_filename = video_filename
|
| 87 |
self.max_frame_arg = max_frame
|
| 88 |
+
self.progress_bar_arg = progress_bar
|
| 89 |
+
self.preload = preload
|
| 90 |
+
self.skip_frames = skip_frames
|
| 91 |
|
| 92 |
self.video = cv2.VideoCapture(video_filename)
|
| 93 |
+
self.input_shape = (int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
| 94 |
+
int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
| 95 |
+
self.total_num_frames = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
| 96 |
|
| 97 |
self.max_num_frames = min(max_frame, self.total_num_frames) if max_frame!=0 else self.total_num_frames
|
| 98 |
self.counter = 0
|
| 99 |
+
self.progress_bar = None
|
| 100 |
|
| 101 |
if output_shape is None:
|
| 102 |
w, h = self.input_shape
|
|
|
|
| 108 |
|
| 109 |
self.fps = self.video.get(cv2.CAP_PROP_FPS) / (self.skip_frames+1)
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
if self.preload:
|
|
|
|
| 112 |
self.frames = self._load_all_frames()
|
| 113 |
|
| 114 |
+
def update_progress_bar(self):
|
| 115 |
+
if self.progress_bar_arg:
|
| 116 |
+
if self.progress_bar:
|
| 117 |
+
# update_progress_bar
|
| 118 |
+
self.progress_bar.update()
|
| 119 |
+
else:
|
| 120 |
+
# create progress bar
|
| 121 |
+
self.progress_bar = tqdm(total=int(self.max_num_frames/(self.skip_frames+1)),
|
| 122 |
+
position=1, leave=True)
|
| 123 |
+
|
| 124 |
def reset_video(self):
|
| 125 |
""" This method is needed as cv2.CAP_PROP_POS_FRAMES
|
| 126 |
does not work on all backends
|
| 127 |
"""
|
| 128 |
self.video.release()
|
| 129 |
+
self.progress_bar.close()
|
| 130 |
self.__init__(self.video_filename, self.skip_frames, self.output_shape,
|
| 131 |
+
self.progress_bar_arg, self.preload, self.max_frame_arg)
|
| 132 |
|
| 133 |
def _load_all_frames(self):
|
| 134 |
frames = []
|
|
|
|
| 145 |
if self.preload:
|
| 146 |
if self.counter < len(self.frames):
|
| 147 |
frame = self.frames[self.counter]
|
| 148 |
+
self.update_progress_bar()
|
| 149 |
return frame
|
| 150 |
else:
|
| 151 |
if self.counter < self.max_num_frames:
|
|
|
|
| 156 |
self.reset_video()
|
| 157 |
raise StopIteration
|
| 158 |
|
|
|
|
| 159 |
def _read_frame(self):
|
| 160 |
ret, frame = self.video.read()
|
| 161 |
self._skip_frames()
|
| 162 |
if ret:
|
| 163 |
+
self.update_progress_bar()
|
| 164 |
frame = cv2.resize(frame, self.output_shape)
|
| 165 |
return ret, frame
|
| 166 |
|
|
|
|
| 196 |
for _ in range(self.skip_frames):
|
| 197 |
self.video.read()
|
| 198 |
|
|
|
|
| 199 |
|
| 200 |
+
class TorchIterableFromReader(torch.utils.data.IterableDataset):
|
| 201 |
def __init__(self, reader, transforms):
|
| 202 |
self.transforms = transforms
|
| 203 |
self.reader = reader
|
tracking/utils.py
CHANGED
|
@@ -8,8 +8,8 @@ from tools.video_readers import TorchIterableFromReader
|
|
| 8 |
from time import time
|
| 9 |
from detection.transforms import TransformFrames
|
| 10 |
from collections import defaultdict
|
| 11 |
-
from moviepy.editor import ImageSequenceClip
|
| 12 |
from skimage.transform import downscale_local_mean
|
|
|
|
| 13 |
|
| 14 |
class GaussianMixture(object):
|
| 15 |
def __init__(self, means, covariance, weights):
|
|
@@ -82,7 +82,7 @@ def get_detections_for_video(reader, detector, batch_size=16, device=None):
|
|
| 82 |
return detections
|
| 83 |
|
| 84 |
|
| 85 |
-
def generate_video_with_annotations(
|
| 86 |
fps = 24
|
| 87 |
logger.info("---intepreting json")
|
| 88 |
results = defaultdict(list)
|
|
@@ -105,31 +105,30 @@ def generate_video_with_annotations(video, output_detected, output_filename, ski
|
|
| 105 |
results[frame_nb * (skip_frames+1) + i].append((object_nb, new_x, new_y, object_class))
|
| 106 |
logger.info("---writing video")
|
| 107 |
|
| 108 |
-
|
| 109 |
-
# writer = cv2.VideoWriter(filename=output_filename,
|
| 110 |
-
#apiPreference=cv2.CAP_FFMPEG,
|
| 111 |
-
# fourcc=fourcc,
|
| 112 |
-
# fps=fps,
|
| 113 |
-
# frameSize=video.shape)
|
| 114 |
|
| 115 |
font = cv2.FONT_HERSHEY_COMPLEX
|
| 116 |
-
|
| 117 |
-
frames = []
|
| 118 |
-
while ret:
|
| 119 |
detections_for_frame = results[frame_nb]
|
| 120 |
for detection in detections_for_frame:
|
| 121 |
cv2.putText(frame, f'{detection[0]}/{detection[3]}', (int(detection[1]), int(detection[2])+5), font, 2, (0, 0, 255), 3, cv2.LINE_AA)
|
| 122 |
|
| 123 |
frame = downscale_local_mean(frame, (downscale,downscale,1)).astype(np.uint8)
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
ret, frame, frame_nb = video.read()
|
| 127 |
-
if frame_nb > maxframes:
|
| 128 |
-
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
logger.info("---finished writing video")
|
| 135 |
|
|
|
|
| 8 |
from time import time
|
| 9 |
from detection.transforms import TransformFrames
|
| 10 |
from collections import defaultdict
|
|
|
|
| 11 |
from skimage.transform import downscale_local_mean
|
| 12 |
+
from skvideo.io import FFmpegWriter
|
| 13 |
|
| 14 |
class GaussianMixture(object):
|
| 15 |
def __init__(self, means, covariance, weights):
|
|
|
|
| 82 |
return detections
|
| 83 |
|
| 84 |
|
| 85 |
+
def generate_video_with_annotations(reader, output_detected, output_filename, skip_frames, maxframes, downscale, logger):
|
| 86 |
fps = 24
|
| 87 |
logger.info("---intepreting json")
|
| 88 |
results = defaultdict(list)
|
|
|
|
| 105 |
results[frame_nb * (skip_frames+1) + i].append((object_nb, new_x, new_y, object_class))
|
| 106 |
logger.info("---writing video")
|
| 107 |
|
| 108 |
+
writer = FFmpegWriter(filename = output_filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
font = cv2.FONT_HERSHEY_COMPLEX
|
| 111 |
+
for frame_nb, frame in enumerate(reader):
|
|
|
|
|
|
|
| 112 |
detections_for_frame = results[frame_nb]
|
| 113 |
for detection in detections_for_frame:
|
| 114 |
cv2.putText(frame, f'{detection[0]}/{detection[3]}', (int(detection[1]), int(detection[2])+5), font, 2, (0, 0, 255), 3, cv2.LINE_AA)
|
| 115 |
|
| 116 |
frame = downscale_local_mean(frame, (downscale,downscale,1)).astype(np.uint8)
|
| 117 |
+
writer.writeFrame(frame[:,:,::-1])
|
| 118 |
+
# moviepy version
|
| 119 |
+
# frames.append(frame[:,:,::-1])
|
| 120 |
|
| 121 |
+
#ret, frame, frame_nb = video.read()
|
| 122 |
+
#if frame_nb > maxframes:
|
| 123 |
+
# break
|
| 124 |
|
| 125 |
+
writer.close()
|
| 126 |
+
reader.video.release()
|
| 127 |
+
|
| 128 |
+
# version with moviepy
|
| 129 |
+
#clip = ImageSequenceClip(sequence=frames, fps=fps)
|
| 130 |
+
#clip.write_videofile(output_filename, fps=fps)
|
| 131 |
+
#del frames
|
| 132 |
|
| 133 |
logger.info("---finished writing video")
|
| 134 |
|