TYTTYTTYT
/

zf_qwen3_vl_processor

TYTTYTTYT commited on 28 days ago

Commit

12c3a2e

verified ·

1 Parent(s): 693246c

remove useless logs

Files changed (2) hide show

processing_qwen3_vl.py CHANGED Viewed

@@ -195,10 +195,8 @@ class ZFQwen3VLProcessor(ProcessorMixin):
                         self.video_processor.focus_size, # type: ignore
                     )
-                    print(len(curr_timestamp), curr_timestamp)
                     video_placeholder = ""
                     frame_seqlen = video_grid_thw[index][1:].prod() // merge_length
-                    print(video_grid_thw)
                     for frame_idx in range(video_grid_thw[index][0]):
                         curr_time = curr_timestamp[frame_idx]
                         video_placeholder += f"<{curr_time:.1f} seconds>"
@@ -303,11 +301,9 @@ class ZFQwen3VLProcessor(ProcessorMixin):
     ):
         if not isinstance(indices, list):
             indices = indices.tolist()
-        print(len(indices), indices)
         b_size = merge_size * focus_size
         if len(indices) % b_size != 0:
             indices.extend(indices[-1] for _ in range(b_size - len(indices) % b_size)) # type: ignore
-        print(len(indices), indices)
         timestamps = [idx / video_fps for idx in indices]
         # @JJJYmmm frames are merged by self.merge_size, \
         # so we need to average the timestamps between the first/last frame within the temporal patch

                         self.video_processor.focus_size, # type: ignore
                     )
                     video_placeholder = ""
                     frame_seqlen = video_grid_thw[index][1:].prod() // merge_length
                     for frame_idx in range(video_grid_thw[index][0]):
                         curr_time = curr_timestamp[frame_idx]
                         video_placeholder += f"<{curr_time:.1f} seconds>"
     ):
         if not isinstance(indices, list):
             indices = indices.tolist()
         b_size = merge_size * focus_size
         if len(indices) % b_size != 0:
             indices.extend(indices[-1] for _ in range(b_size - len(indices) % b_size)) # type: ignore
         timestamps = [idx / video_fps for idx in indices]
         # @JJJYmmm frames are merged by self.merge_size, \
         # so we need to average the timestamps between the first/last frame within the temporal patch

video_processing_qwen3_vl.py CHANGED Viewed

@@ -186,9 +186,6 @@ class ZFQwen3VLVideoProcessor(BaseVideoProcessor):
         grouped_videos, grouped_videos_index = group_videos_by_shape(videos)
         resized_videos_grouped = {}
-        for vid in videos:
-            print(f'vid type: {type(vid)}, vid shape: {vid.shape}')
         for shape, stacked_videos in grouped_videos.items():
             B, T, C, H, W = stacked_videos.shape
             num_frames, height, width = T, H, W
@@ -282,14 +279,12 @@ class ZFQwen3VLVideoProcessor(BaseVideoProcessor):
         if isinstance(video_url_or_urls, list):
             return list(zip(*[self.fetch_videos(x, sample_indices_fn=sample_indices_fn) for x in video_url_or_urls]))
         else:
-            video, metadata = load_video(
                 video_url_or_urls, # type: ignore
                 backend="torchcodec",
                 sample_indices_fn=sample_indices_fn,
                 device=self.processor_device
             ) # type: ignore
-            print(f'Loaded video shape: {video.shape}, dtype: {video.dtype}, device: {video.device}')
-            return video, metadata
     def normalize(
         self,

         grouped_videos, grouped_videos_index = group_videos_by_shape(videos)
         resized_videos_grouped = {}
         for shape, stacked_videos in grouped_videos.items():
             B, T, C, H, W = stacked_videos.shape
             num_frames, height, width = T, H, W
         if isinstance(video_url_or_urls, list):
             return list(zip(*[self.fetch_videos(x, sample_indices_fn=sample_indices_fn) for x in video_url_or_urls]))
         else:
+            return load_video(
                 video_url_or_urls, # type: ignore
                 backend="torchcodec",
                 sample_indices_fn=sample_indices_fn,
                 device=self.processor_device
             ) # type: ignore
     def normalize(
         self,