Fix: "cont" is not defined and wrong parameter is passed to modalities in model.generate
#2
by
snowleopard-mllm
- opened
README.md
CHANGED
|
@@ -63,8 +63,8 @@ device = "cuda"
|
|
| 63 |
device_map = "auto"
|
| 64 |
tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map)
|
| 65 |
model.eval()
|
| 66 |
-
video_path = ""
|
| 67 |
-
max_frames_num =
|
| 68 |
video,frame_time,video_time = load_video(video_path, max_frames_num, 1, force_sample=True)
|
| 69 |
video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].cuda().bfloat16()
|
| 70 |
video = [video]
|
|
@@ -77,11 +77,11 @@ conv.append_message(conv.roles[0], question)
|
|
| 77 |
conv.append_message(conv.roles[1], None)
|
| 78 |
prompt_question = conv.get_prompt()
|
| 79 |
input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
|
| 80 |
-
|
| 81 |
inputs=input_ids,
|
| 82 |
images=input_data[0][0],
|
| 83 |
images_highres=input_data[0][1],
|
| 84 |
-
modalities=
|
| 85 |
do_sample=False,
|
| 86 |
temperature=0,
|
| 87 |
max_new_tokens=128,
|
|
|
|
| 63 |
device_map = "auto"
|
| 64 |
tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map)
|
| 65 |
model.eval()
|
| 66 |
+
video_path = "your_path_to_a_video_file"
|
| 67 |
+
max_frames_num = 64
|
| 68 |
video,frame_time,video_time = load_video(video_path, max_frames_num, 1, force_sample=True)
|
| 69 |
video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].cuda().bfloat16()
|
| 70 |
video = [video]
|
|
|
|
| 77 |
conv.append_message(conv.roles[1], None)
|
| 78 |
prompt_question = conv.get_prompt()
|
| 79 |
input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
|
| 80 |
+
cont = model.generate(
|
| 81 |
inputs=input_ids,
|
| 82 |
images=input_data[0][0],
|
| 83 |
images_highres=input_data[0][1],
|
| 84 |
+
modalities=input_data[2],
|
| 85 |
do_sample=False,
|
| 86 |
temperature=0,
|
| 87 |
max_new_tokens=128,
|