remove useless logs
Browse files- processing_qwen3_vl.py +0 -4
- video_processing_qwen3_vl.py +1 -6
processing_qwen3_vl.py
CHANGED
|
@@ -195,10 +195,8 @@ class ZFQwen3VLProcessor(ProcessorMixin):
|
|
| 195 |
self.video_processor.focus_size, # type: ignore
|
| 196 |
)
|
| 197 |
|
| 198 |
-
print(len(curr_timestamp), curr_timestamp)
|
| 199 |
video_placeholder = ""
|
| 200 |
frame_seqlen = video_grid_thw[index][1:].prod() // merge_length
|
| 201 |
-
print(video_grid_thw)
|
| 202 |
for frame_idx in range(video_grid_thw[index][0]):
|
| 203 |
curr_time = curr_timestamp[frame_idx]
|
| 204 |
video_placeholder += f"<{curr_time:.1f} seconds>"
|
|
@@ -303,11 +301,9 @@ class ZFQwen3VLProcessor(ProcessorMixin):
|
|
| 303 |
):
|
| 304 |
if not isinstance(indices, list):
|
| 305 |
indices = indices.tolist()
|
| 306 |
-
print(len(indices), indices)
|
| 307 |
b_size = merge_size * focus_size
|
| 308 |
if len(indices) % b_size != 0:
|
| 309 |
indices.extend(indices[-1] for _ in range(b_size - len(indices) % b_size)) # type: ignore
|
| 310 |
-
print(len(indices), indices)
|
| 311 |
timestamps = [idx / video_fps for idx in indices]
|
| 312 |
# @JJJYmmm frames are merged by self.merge_size, \
|
| 313 |
# so we need to average the timestamps between the first/last frame within the temporal patch
|
|
|
|
| 195 |
self.video_processor.focus_size, # type: ignore
|
| 196 |
)
|
| 197 |
|
|
|
|
| 198 |
video_placeholder = ""
|
| 199 |
frame_seqlen = video_grid_thw[index][1:].prod() // merge_length
|
|
|
|
| 200 |
for frame_idx in range(video_grid_thw[index][0]):
|
| 201 |
curr_time = curr_timestamp[frame_idx]
|
| 202 |
video_placeholder += f"<{curr_time:.1f} seconds>"
|
|
|
|
| 301 |
):
|
| 302 |
if not isinstance(indices, list):
|
| 303 |
indices = indices.tolist()
|
|
|
|
| 304 |
b_size = merge_size * focus_size
|
| 305 |
if len(indices) % b_size != 0:
|
| 306 |
indices.extend(indices[-1] for _ in range(b_size - len(indices) % b_size)) # type: ignore
|
|
|
|
| 307 |
timestamps = [idx / video_fps for idx in indices]
|
| 308 |
# @JJJYmmm frames are merged by self.merge_size, \
|
| 309 |
# so we need to average the timestamps between the first/last frame within the temporal patch
|
video_processing_qwen3_vl.py
CHANGED
|
@@ -186,9 +186,6 @@ class ZFQwen3VLVideoProcessor(BaseVideoProcessor):
|
|
| 186 |
grouped_videos, grouped_videos_index = group_videos_by_shape(videos)
|
| 187 |
resized_videos_grouped = {}
|
| 188 |
|
| 189 |
-
for vid in videos:
|
| 190 |
-
print(f'vid type: {type(vid)}, vid shape: {vid.shape}')
|
| 191 |
-
|
| 192 |
for shape, stacked_videos in grouped_videos.items():
|
| 193 |
B, T, C, H, W = stacked_videos.shape
|
| 194 |
num_frames, height, width = T, H, W
|
|
@@ -282,14 +279,12 @@ class ZFQwen3VLVideoProcessor(BaseVideoProcessor):
|
|
| 282 |
if isinstance(video_url_or_urls, list):
|
| 283 |
return list(zip(*[self.fetch_videos(x, sample_indices_fn=sample_indices_fn) for x in video_url_or_urls]))
|
| 284 |
else:
|
| 285 |
-
|
| 286 |
video_url_or_urls, # type: ignore
|
| 287 |
backend="torchcodec",
|
| 288 |
sample_indices_fn=sample_indices_fn,
|
| 289 |
device=self.processor_device
|
| 290 |
) # type: ignore
|
| 291 |
-
print(f'Loaded video shape: {video.shape}, dtype: {video.dtype}, device: {video.device}')
|
| 292 |
-
return video, metadata
|
| 293 |
|
| 294 |
def normalize(
|
| 295 |
self,
|
|
|
|
| 186 |
grouped_videos, grouped_videos_index = group_videos_by_shape(videos)
|
| 187 |
resized_videos_grouped = {}
|
| 188 |
|
|
|
|
|
|
|
|
|
|
| 189 |
for shape, stacked_videos in grouped_videos.items():
|
| 190 |
B, T, C, H, W = stacked_videos.shape
|
| 191 |
num_frames, height, width = T, H, W
|
|
|
|
| 279 |
if isinstance(video_url_or_urls, list):
|
| 280 |
return list(zip(*[self.fetch_videos(x, sample_indices_fn=sample_indices_fn) for x in video_url_or_urls]))
|
| 281 |
else:
|
| 282 |
+
return load_video(
|
| 283 |
video_url_or_urls, # type: ignore
|
| 284 |
backend="torchcodec",
|
| 285 |
sample_indices_fn=sample_indices_fn,
|
| 286 |
device=self.processor_device
|
| 287 |
) # type: ignore
|
|
|
|
|
|
|
| 288 |
|
| 289 |
def normalize(
|
| 290 |
self,
|