update files
Browse files- processing_logics.py +0 -12
processing_logics.py
CHANGED
|
@@ -57,7 +57,6 @@ def process_video_with_decord(video_file, data_args):
|
|
| 57 |
video = torch.zeros((10, 720, 720, 3)).numpy()
|
| 58 |
print(f"load {video_file} error, use empty tensor instead.")
|
| 59 |
frame_time = ",".join([f"{i:.2f}s" for i in frame_time])
|
| 60 |
-
print("load video : \t", video.shape)
|
| 61 |
num_frames_to_sample = num_frames = len(frame_idx)
|
| 62 |
# https://github.com/dmlc/decord/issues/208
|
| 63 |
vr.seek(0)
|
|
@@ -231,17 +230,9 @@ def preprocess_qwen(sources, tokenizer: PreTrainedTokenizer,enable_thinking: boo
|
|
| 231 |
conv = {"role" : role, "content" : content}
|
| 232 |
messages.append(conv)
|
| 233 |
|
| 234 |
-
|
| 235 |
-
print(f"enable_thinking: {enable_thinking}")
|
| 236 |
input_id = tokenizer.apply_chat_template(messages, add_generation_prompt=True, enable_thinking=enable_thinking)
|
| 237 |
|
| 238 |
|
| 239 |
-
print(f"---------------------------------")
|
| 240 |
-
print(f"****COMPETE_CONV****\n{messages}")
|
| 241 |
-
print(f"****TOKENIZE_RESULTS****\n{tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)}")
|
| 242 |
-
print(f"****INPUT_IDS****\n{input_id}")
|
| 243 |
-
print(f"---------------------------------")
|
| 244 |
-
|
| 245 |
for idx, encode_id in enumerate(input_id):
|
| 246 |
if encode_id == image_token_index:
|
| 247 |
input_id[idx] = IMAGE_TOKEN_INDEX
|
|
@@ -325,7 +316,6 @@ class LogicsProcessor(ProcessorMixin):
|
|
| 325 |
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
|
| 326 |
**kwargs
|
| 327 |
) -> BatchFeature:
|
| 328 |
-
print(text)
|
| 329 |
device='cuda'
|
| 330 |
if not isinstance(images, list):
|
| 331 |
images = [images]
|
|
@@ -364,14 +354,12 @@ class LogicsProcessor(ProcessorMixin):
|
|
| 364 |
image_inputs["image_sizes"] = [video_frames[0].size]
|
| 365 |
image_inputs["modalities"] = ["video"]
|
| 366 |
|
| 367 |
-
print(image_inputs)
|
| 368 |
|
| 369 |
|
| 370 |
# ---------------- text ----------------
|
| 371 |
text_ids = self.text_tokenizer(question=text, img_path=images, tokenizer=self.tokenizer, enable_thinking=self.config.enable_thinking)
|
| 372 |
text_inputs = {}
|
| 373 |
text_inputs["text_inputs"] = text_ids
|
| 374 |
-
print(text_inputs)
|
| 375 |
|
| 376 |
if images is not None:
|
| 377 |
return BatchFeature(data={**text_inputs, **image_inputs})
|
|
|
|
| 57 |
video = torch.zeros((10, 720, 720, 3)).numpy()
|
| 58 |
print(f"load {video_file} error, use empty tensor instead.")
|
| 59 |
frame_time = ",".join([f"{i:.2f}s" for i in frame_time])
|
|
|
|
| 60 |
num_frames_to_sample = num_frames = len(frame_idx)
|
| 61 |
# https://github.com/dmlc/decord/issues/208
|
| 62 |
vr.seek(0)
|
|
|
|
| 230 |
conv = {"role" : role, "content" : content}
|
| 231 |
messages.append(conv)
|
| 232 |
|
|
|
|
|
|
|
| 233 |
input_id = tokenizer.apply_chat_template(messages, add_generation_prompt=True, enable_thinking=enable_thinking)
|
| 234 |
|
| 235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
for idx, encode_id in enumerate(input_id):
|
| 237 |
if encode_id == image_token_index:
|
| 238 |
input_id[idx] = IMAGE_TOKEN_INDEX
|
|
|
|
| 316 |
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
|
| 317 |
**kwargs
|
| 318 |
) -> BatchFeature:
|
|
|
|
| 319 |
device='cuda'
|
| 320 |
if not isinstance(images, list):
|
| 321 |
images = [images]
|
|
|
|
| 354 |
image_inputs["image_sizes"] = [video_frames[0].size]
|
| 355 |
image_inputs["modalities"] = ["video"]
|
| 356 |
|
|
|
|
| 357 |
|
| 358 |
|
| 359 |
# ---------------- text ----------------
|
| 360 |
text_ids = self.text_tokenizer(question=text, img_path=images, tokenizer=self.tokenizer, enable_thinking=self.config.enable_thinking)
|
| 361 |
text_inputs = {}
|
| 362 |
text_inputs["text_inputs"] = text_ids
|
|
|
|
| 363 |
|
| 364 |
if images is not None:
|
| 365 |
return BatchFeature(data={**text_inputs, **image_inputs})
|