TYTTYTTYT commited on
Commit
12c3a2e
·
verified ·
1 Parent(s): 693246c

remove useless logs

Browse files
processing_qwen3_vl.py CHANGED
@@ -195,10 +195,8 @@ class ZFQwen3VLProcessor(ProcessorMixin):
195
  self.video_processor.focus_size, # type: ignore
196
  )
197
 
198
- print(len(curr_timestamp), curr_timestamp)
199
  video_placeholder = ""
200
  frame_seqlen = video_grid_thw[index][1:].prod() // merge_length
201
- print(video_grid_thw)
202
  for frame_idx in range(video_grid_thw[index][0]):
203
  curr_time = curr_timestamp[frame_idx]
204
  video_placeholder += f"<{curr_time:.1f} seconds>"
@@ -303,11 +301,9 @@ class ZFQwen3VLProcessor(ProcessorMixin):
303
  ):
304
  if not isinstance(indices, list):
305
  indices = indices.tolist()
306
- print(len(indices), indices)
307
  b_size = merge_size * focus_size
308
  if len(indices) % b_size != 0:
309
  indices.extend(indices[-1] for _ in range(b_size - len(indices) % b_size)) # type: ignore
310
- print(len(indices), indices)
311
  timestamps = [idx / video_fps for idx in indices]
312
  # @JJJYmmm frames are merged by self.merge_size, \
313
  # so we need to average the timestamps between the first/last frame within the temporal patch
 
195
  self.video_processor.focus_size, # type: ignore
196
  )
197
 
 
198
  video_placeholder = ""
199
  frame_seqlen = video_grid_thw[index][1:].prod() // merge_length
 
200
  for frame_idx in range(video_grid_thw[index][0]):
201
  curr_time = curr_timestamp[frame_idx]
202
  video_placeholder += f"<{curr_time:.1f} seconds>"
 
301
  ):
302
  if not isinstance(indices, list):
303
  indices = indices.tolist()
 
304
  b_size = merge_size * focus_size
305
  if len(indices) % b_size != 0:
306
  indices.extend(indices[-1] for _ in range(b_size - len(indices) % b_size)) # type: ignore
 
307
  timestamps = [idx / video_fps for idx in indices]
308
  # @JJJYmmm frames are merged by self.merge_size, \
309
  # so we need to average the timestamps between the first/last frame within the temporal patch
video_processing_qwen3_vl.py CHANGED
@@ -186,9 +186,6 @@ class ZFQwen3VLVideoProcessor(BaseVideoProcessor):
186
  grouped_videos, grouped_videos_index = group_videos_by_shape(videos)
187
  resized_videos_grouped = {}
188
 
189
- for vid in videos:
190
- print(f'vid type: {type(vid)}, vid shape: {vid.shape}')
191
-
192
  for shape, stacked_videos in grouped_videos.items():
193
  B, T, C, H, W = stacked_videos.shape
194
  num_frames, height, width = T, H, W
@@ -282,14 +279,12 @@ class ZFQwen3VLVideoProcessor(BaseVideoProcessor):
282
  if isinstance(video_url_or_urls, list):
283
  return list(zip(*[self.fetch_videos(x, sample_indices_fn=sample_indices_fn) for x in video_url_or_urls]))
284
  else:
285
- video, metadata = load_video(
286
  video_url_or_urls, # type: ignore
287
  backend="torchcodec",
288
  sample_indices_fn=sample_indices_fn,
289
  device=self.processor_device
290
  ) # type: ignore
291
- print(f'Loaded video shape: {video.shape}, dtype: {video.dtype}, device: {video.device}')
292
- return video, metadata
293
 
294
  def normalize(
295
  self,
 
186
  grouped_videos, grouped_videos_index = group_videos_by_shape(videos)
187
  resized_videos_grouped = {}
188
 
 
 
 
189
  for shape, stacked_videos in grouped_videos.items():
190
  B, T, C, H, W = stacked_videos.shape
191
  num_frames, height, width = T, H, W
 
279
  if isinstance(video_url_or_urls, list):
280
  return list(zip(*[self.fetch_videos(x, sample_indices_fn=sample_indices_fn) for x in video_url_or_urls]))
281
  else:
282
+ return load_video(
283
  video_url_or_urls, # type: ignore
284
  backend="torchcodec",
285
  sample_indices_fn=sample_indices_fn,
286
  device=self.processor_device
287
  ) # type: ignore
 
 
288
 
289
  def normalize(
290
  self,