charlesollion commited on
Commit
f906054
·
1 Parent(s): 0f291a2

improve video generation and logging tqdm

Browse files
Files changed (4) hide show
  1. app.py +17 -6
  2. requirements.txt +1 -1
  3. tools/video_readers.py +26 -21
  4. tracking/utils.py +17 -18
app.py CHANGED
@@ -8,6 +8,7 @@ from typing import Dict, List, Tuple
8
 
9
  import datetime
10
  import logging
 
11
 
12
  # imports for tracking
13
  import torch
@@ -70,11 +71,14 @@ config_track = DotDict({
70
 
71
 
72
  logger.info('---Yolo model...')
 
 
73
  URL_MODEL = "https://github.com/surfriderfoundationeurope/IA_Pau/releases/download/v0.1/yolov5.pt"
74
  FILE_MODEL = "yolov5.pt"
75
  model_path = download_from_url(URL_MODEL, FILE_MODEL, logger, "./models")
76
  model_yolo = load_model(model_path, config_track.device)
77
 
 
78
  logger.info('---Centernet model...')
79
  URL_MODEL = "https://partage.imt.fr/index.php/s/sJi22N6gedN6T4q/download"
80
  FILE_MODEL = "mobilenet_v3_pretrained.pth"
@@ -132,8 +136,11 @@ def track(args):
132
  detections = []
133
  logger.info('---Detecting...')
134
  if args.model_type == "yolo":
135
- for frame in reader:
136
- detections.append(detector(frame))
 
 
 
137
  elif args.model_type == "centernet":
138
  detections = get_detections_for_video(reader, detector, batch_size=args.detection_batch_size, device=device)
139
 
@@ -141,7 +148,7 @@ def track(args):
141
  display = None
142
  results = track_video(reader, iter(detections), args, engine, transition_variance, observation_variance, display, is_yolo=args.model_type=="yolo")
143
  reader.video.release()
144
-
145
  # store unfiltered results
146
  datestr = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
147
  output_filename = op.splitext(args.video_path)[0] + "_" + datestr + '_unfiltered.txt'
@@ -179,11 +186,15 @@ def run_model(video_path, model_type, seconds, skip, tau, kappa):
179
  with open(output_json_path, 'w') as f_out:
180
  json.dump(output_json, f_out)
181
 
182
- #output_json={"detected_trash":[]}
183
  # build video output
184
  logger.info('---Generating new video...')
185
- video = SimpleVideoReader(video_path, skip_frames=0)
186
- generate_video_with_annotations(video, output_json, output_path,
 
 
 
 
 
187
  config_track.skip_frames, config_track.max_length,
188
  config_track.downscale_output, logger)
189
 
 
8
 
9
  import datetime
10
  import logging
11
+ import warnings
12
 
13
  # imports for tracking
14
  import torch
 
71
 
72
 
73
  logger.info('---Yolo model...')
74
+ # Yolo has warning problems, so we set an env variable to remove it
75
+ os.environ["VERBOSE"] = "False"
76
  URL_MODEL = "https://github.com/surfriderfoundationeurope/IA_Pau/releases/download/v0.1/yolov5.pt"
77
  FILE_MODEL = "yolov5.pt"
78
  model_path = download_from_url(URL_MODEL, FILE_MODEL, logger, "./models")
79
  model_yolo = load_model(model_path, config_track.device)
80
 
81
+
82
  logger.info('---Centernet model...')
83
  URL_MODEL = "https://partage.imt.fr/index.php/s/sJi22N6gedN6T4q/download"
84
  FILE_MODEL = "mobilenet_v3_pretrained.pth"
 
136
  detections = []
137
  logger.info('---Detecting...')
138
  if args.model_type == "yolo":
139
+ with warnings.catch_warnings():
140
+ warnings.filterwarnings("ignore")
141
+
142
+ for frame in reader:
143
+ detections.append(detector(frame))
144
  elif args.model_type == "centernet":
145
  detections = get_detections_for_video(reader, detector, batch_size=args.detection_batch_size, device=device)
146
 
 
148
  display = None
149
  results = track_video(reader, iter(detections), args, engine, transition_variance, observation_variance, display, is_yolo=args.model_type=="yolo")
150
  reader.video.release()
151
+
152
  # store unfiltered results
153
  datestr = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
154
  output_filename = op.splitext(args.video_path)[0] + "_" + datestr + '_unfiltered.txt'
 
186
  with open(output_json_path, 'w') as f_out:
187
  json.dump(output_json, f_out)
188
 
 
189
  # build video output
190
  logger.info('---Generating new video...')
191
+ reader = IterableFrameReader(video_filename=config_track.video_path,
192
+ skip_frames=0,
193
+ progress_bar=True,
194
+ preload=False,
195
+ max_frame=config_track.max_length)
196
+
197
+ generate_video_with_annotations(reader, output_json, output_path,
198
  config_track.skip_frames, config_track.max_length,
199
  config_track.downscale_output, logger)
200
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  scikit-image
 
2
  opencv-python
3
  torch
4
  torchvision
@@ -9,7 +10,6 @@ tqdm
9
  tensorboard
10
  imgaug
11
  psycopg2-binary
12
- moviepy
13
  git+https://github.com/pykalman/pykalman.git
14
  yolov5
15
  gradio
 
1
  scikit-image
2
+ scikit-video
3
  opencv-python
4
  torch
5
  torchvision
 
10
  tensorboard
11
  imgaug
12
  psycopg2-binary
 
13
  git+https://github.com/pykalman/pykalman.git
14
  yolov5
15
  gradio
tools/video_readers.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from tqdm import tqdm
4
  from itertools import cycle
5
 
 
6
  class AdvancedFrameReader:
7
  def __init__(self, video_name, read_every, rescale_factor, init_time_min, init_time_s):
8
 
@@ -18,7 +19,6 @@ class AdvancedFrameReader:
18
 
19
  self.init_rescale_factor = rescale_factor
20
 
21
-
22
  self.frame_skip = read_every - 1
23
  self.fps = self.cap.get(cv2.CAP_PROP_FPS)/read_every
24
  print(f'Reading at {self.fps:.2f} fps')
@@ -27,7 +27,6 @@ class AdvancedFrameReader:
27
  self.init_frame = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
28
  self.total_num_frames = self.cap.get(cv2.CAP_PROP_FRAME_COUNT)
29
 
30
-
31
  def post_process(self, ret, frame):
32
  if ret:
33
  if self.original_shape_mode:
@@ -55,7 +54,7 @@ class AdvancedFrameReader:
55
  time = 60 * time_min + time_s
56
  self.cap.set(cv2.CAP_PROP_POS_MSEC, 1000 * time)
57
  self.init_frame = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
58
- print('Reading from {}min{}sec'.format(time_min,time_s))
59
  self.nb_frames_read = 0
60
 
61
  def reset_init_frame(self):
@@ -72,7 +71,7 @@ class AdvancedFrameReader:
72
  height = int(self.original_height/rescale_factor)
73
  self.new_shape = (width, height)
74
  self.original_shape_mode = False
75
- print('Reading in {}x{}'.format(width, height))
76
 
77
  def set_original_shape_mode(self, mode):
78
  self.original_shape_mode = mode
@@ -83,17 +82,21 @@ class AdvancedFrameReader:
83
 
84
  class IterableFrameReader:
85
  def __init__(self, video_filename, skip_frames=0, output_shape=None, progress_bar=False, preload=False, max_frame=0):
 
86
  self.video_filename = video_filename
87
  self.max_frame_arg = max_frame
 
 
 
88
 
89
  self.video = cv2.VideoCapture(video_filename)
90
- self.input_shape = (self.video.get(cv2.CAP_PROP_FRAME_WIDTH), self.video.get(cv2.CAP_PROP_FRAME_HEIGHT))
91
- self.skip_frames = skip_frames
92
- self.preload = preload
93
- self.total_num_frames = self.video.get(cv2.CAP_PROP_FRAME_COUNT)
94
 
95
  self.max_num_frames = min(max_frame, self.total_num_frames) if max_frame!=0 else self.total_num_frames
96
  self.counter = 0
 
97
 
98
  if output_shape is None:
99
  w, h = self.input_shape
@@ -105,24 +108,27 @@ class IterableFrameReader:
105
 
106
  self.fps = self.video.get(cv2.CAP_PROP_FPS) / (self.skip_frames+1)
107
 
108
- print(f'Reading video at {self.fps}fps.')
109
- if progress_bar:
110
- self.progress_bar = tqdm(total=int(self.max_num_frames/(self.skip_frames+1)), leave=True)
111
- self.progress_bar_update = self.progress_bar.update
112
- else:
113
- self.progress_bar_update = lambda: None
114
-
115
  if self.preload:
116
- print('Preloading frames in RAM...')
117
  self.frames = self._load_all_frames()
118
 
 
 
 
 
 
 
 
 
 
 
119
  def reset_video(self):
120
  """ This method is needed as cv2.CAP_PROP_POS_FRAMES
121
  does not work on all backends
122
  """
123
  self.video.release()
 
124
  self.__init__(self.video_filename, self.skip_frames, self.output_shape,
125
- self.progress_bar is not None, self.preload, self.max_frame_arg)
126
 
127
  def _load_all_frames(self):
128
  frames = []
@@ -139,7 +145,7 @@ class IterableFrameReader:
139
  if self.preload:
140
  if self.counter < len(self.frames):
141
  frame = self.frames[self.counter]
142
- self.progress_bar_update()
143
  return frame
144
  else:
145
  if self.counter < self.max_num_frames:
@@ -150,12 +156,11 @@ class IterableFrameReader:
150
  self.reset_video()
151
  raise StopIteration
152
 
153
-
154
  def _read_frame(self):
155
  ret, frame = self.video.read()
156
  self._skip_frames()
157
  if ret:
158
- self.progress_bar_update()
159
  frame = cv2.resize(frame, self.output_shape)
160
  return ret, frame
161
 
@@ -191,8 +196,8 @@ class SimpleVideoReader:
191
  for _ in range(self.skip_frames):
192
  self.video.read()
193
 
194
- class TorchIterableFromReader(torch.utils.data.IterableDataset):
195
 
 
196
  def __init__(self, reader, transforms):
197
  self.transforms = transforms
198
  self.reader = reader
 
3
  from tqdm import tqdm
4
  from itertools import cycle
5
 
6
+
7
  class AdvancedFrameReader:
8
  def __init__(self, video_name, read_every, rescale_factor, init_time_min, init_time_s):
9
 
 
19
 
20
  self.init_rescale_factor = rescale_factor
21
 
 
22
  self.frame_skip = read_every - 1
23
  self.fps = self.cap.get(cv2.CAP_PROP_FPS)/read_every
24
  print(f'Reading at {self.fps:.2f} fps')
 
27
  self.init_frame = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
28
  self.total_num_frames = self.cap.get(cv2.CAP_PROP_FRAME_COUNT)
29
 
 
30
  def post_process(self, ret, frame):
31
  if ret:
32
  if self.original_shape_mode:
 
54
  time = 60 * time_min + time_s
55
  self.cap.set(cv2.CAP_PROP_POS_MSEC, 1000 * time)
56
  self.init_frame = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
57
+ #print('Reading from {}min{}sec'.format(time_min,time_s))
58
  self.nb_frames_read = 0
59
 
60
  def reset_init_frame(self):
 
71
  height = int(self.original_height/rescale_factor)
72
  self.new_shape = (width, height)
73
  self.original_shape_mode = False
74
+ #print('Reading in {}x{}'.format(width, height))
75
 
76
  def set_original_shape_mode(self, mode):
77
  self.original_shape_mode = mode
 
82
 
83
  class IterableFrameReader:
84
  def __init__(self, video_filename, skip_frames=0, output_shape=None, progress_bar=False, preload=False, max_frame=0):
85
+ # store arguments for reset
86
  self.video_filename = video_filename
87
  self.max_frame_arg = max_frame
88
+ self.progress_bar_arg = progress_bar
89
+ self.preload = preload
90
+ self.skip_frames = skip_frames
91
 
92
  self.video = cv2.VideoCapture(video_filename)
93
+ self.input_shape = (int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH)),
94
+ int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
95
+ self.total_num_frames = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT))
 
96
 
97
  self.max_num_frames = min(max_frame, self.total_num_frames) if max_frame!=0 else self.total_num_frames
98
  self.counter = 0
99
+ self.progress_bar = None
100
 
101
  if output_shape is None:
102
  w, h = self.input_shape
 
108
 
109
  self.fps = self.video.get(cv2.CAP_PROP_FPS) / (self.skip_frames+1)
110
 
 
 
 
 
 
 
 
111
  if self.preload:
 
112
  self.frames = self._load_all_frames()
113
 
114
+ def update_progress_bar(self):
115
+ if self.progress_bar_arg:
116
+ if self.progress_bar:
117
+ # update_progress_bar
118
+ self.progress_bar.update()
119
+ else:
120
+ # create progress bar
121
+ self.progress_bar = tqdm(total=int(self.max_num_frames/(self.skip_frames+1)),
122
+ position=1, leave=True)
123
+
124
  def reset_video(self):
125
  """ This method is needed as cv2.CAP_PROP_POS_FRAMES
126
  does not work on all backends
127
  """
128
  self.video.release()
129
+ self.progress_bar.close()
130
  self.__init__(self.video_filename, self.skip_frames, self.output_shape,
131
+ self.progress_bar_arg, self.preload, self.max_frame_arg)
132
 
133
  def _load_all_frames(self):
134
  frames = []
 
145
  if self.preload:
146
  if self.counter < len(self.frames):
147
  frame = self.frames[self.counter]
148
+ self.update_progress_bar()
149
  return frame
150
  else:
151
  if self.counter < self.max_num_frames:
 
156
  self.reset_video()
157
  raise StopIteration
158
 
 
159
  def _read_frame(self):
160
  ret, frame = self.video.read()
161
  self._skip_frames()
162
  if ret:
163
+ self.update_progress_bar()
164
  frame = cv2.resize(frame, self.output_shape)
165
  return ret, frame
166
 
 
196
  for _ in range(self.skip_frames):
197
  self.video.read()
198
 
 
199
 
200
+ class TorchIterableFromReader(torch.utils.data.IterableDataset):
201
  def __init__(self, reader, transforms):
202
  self.transforms = transforms
203
  self.reader = reader
tracking/utils.py CHANGED
@@ -8,8 +8,8 @@ from tools.video_readers import TorchIterableFromReader
8
  from time import time
9
  from detection.transforms import TransformFrames
10
  from collections import defaultdict
11
- from moviepy.editor import ImageSequenceClip
12
  from skimage.transform import downscale_local_mean
 
13
 
14
  class GaussianMixture(object):
15
  def __init__(self, means, covariance, weights):
@@ -82,7 +82,7 @@ def get_detections_for_video(reader, detector, batch_size=16, device=None):
82
  return detections
83
 
84
 
85
- def generate_video_with_annotations(video, output_detected, output_filename, skip_frames, maxframes, downscale, logger):
86
  fps = 24
87
  logger.info("---intepreting json")
88
  results = defaultdict(list)
@@ -105,31 +105,30 @@ def generate_video_with_annotations(video, output_detected, output_filename, ski
105
  results[frame_nb * (skip_frames+1) + i].append((object_nb, new_x, new_y, object_class))
106
  logger.info("---writing video")
107
 
108
- #fourcc = cv2.VideoWriter_fourcc(*'mp4v')
109
- # writer = cv2.VideoWriter(filename=output_filename,
110
- #apiPreference=cv2.CAP_FFMPEG,
111
- # fourcc=fourcc,
112
- # fps=fps,
113
- # frameSize=video.shape)
114
 
115
  font = cv2.FONT_HERSHEY_COMPLEX
116
- ret, frame, frame_nb = video.read()
117
- frames = []
118
- while ret:
119
  detections_for_frame = results[frame_nb]
120
  for detection in detections_for_frame:
121
  cv2.putText(frame, f'{detection[0]}/{detection[3]}', (int(detection[1]), int(detection[2])+5), font, 2, (0, 0, 255), 3, cv2.LINE_AA)
122
 
123
  frame = downscale_local_mean(frame, (downscale,downscale,1)).astype(np.uint8)
124
- frames.append(frame[:,:,::-1])
 
 
125
 
126
- ret, frame, frame_nb = video.read()
127
- if frame_nb > maxframes:
128
- break
129
 
130
- clip = ImageSequenceClip(sequence=frames, fps=fps)
131
- clip.write_videofile(output_filename, fps=fps)
132
- del frames
 
 
 
 
133
 
134
  logger.info("---finished writing video")
135
 
 
8
  from time import time
9
  from detection.transforms import TransformFrames
10
  from collections import defaultdict
 
11
  from skimage.transform import downscale_local_mean
12
+ from skvideo.io import FFmpegWriter
13
 
14
  class GaussianMixture(object):
15
  def __init__(self, means, covariance, weights):
 
82
  return detections
83
 
84
 
85
+ def generate_video_with_annotations(reader, output_detected, output_filename, skip_frames, maxframes, downscale, logger):
86
  fps = 24
87
  logger.info("---intepreting json")
88
  results = defaultdict(list)
 
105
  results[frame_nb * (skip_frames+1) + i].append((object_nb, new_x, new_y, object_class))
106
  logger.info("---writing video")
107
 
108
+ writer = FFmpegWriter(filename = output_filename)
 
 
 
 
 
109
 
110
  font = cv2.FONT_HERSHEY_COMPLEX
111
+ for frame_nb, frame in enumerate(reader):
 
 
112
  detections_for_frame = results[frame_nb]
113
  for detection in detections_for_frame:
114
  cv2.putText(frame, f'{detection[0]}/{detection[3]}', (int(detection[1]), int(detection[2])+5), font, 2, (0, 0, 255), 3, cv2.LINE_AA)
115
 
116
  frame = downscale_local_mean(frame, (downscale,downscale,1)).astype(np.uint8)
117
+ writer.writeFrame(frame[:,:,::-1])
118
+ # moviepy version
119
+ # frames.append(frame[:,:,::-1])
120
 
121
+ #ret, frame, frame_nb = video.read()
122
+ #if frame_nb > maxframes:
123
+ # break
124
 
125
+ writer.close()
126
+ reader.video.release()
127
+
128
+ # version with moviepy
129
+ #clip = ImageSequenceClip(sequence=frames, fps=fps)
130
+ #clip.write_videofile(output_filename, fps=fps)
131
+ #del frames
132
 
133
  logger.info("---finished writing video")
134