Improve: Add descriptive tags and clean up README structure
#1
by
nielsr
HF Staff
- opened
README.md
CHANGED
|
@@ -5,9 +5,11 @@ library_name: transformers
|
|
| 5 |
license: apache-2.0
|
| 6 |
metrics:
|
| 7 |
- accuracy
|
|
|
|
| 8 |
tags:
|
| 9 |
- multimodal
|
| 10 |
-
|
|
|
|
| 11 |
model-index:
|
| 12 |
- name: InternVL2.5_HiCo_R16
|
| 13 |
results:
|
|
@@ -61,7 +63,6 @@ model-index:
|
|
| 61 |
value: 64.9
|
| 62 |
name: accuracy
|
| 63 |
verified: true
|
| 64 |
-
|
| 65 |
---
|
| 66 |
|
| 67 |
# 📕InternVL2.5_HiCo_R16⚡
|
|
@@ -233,7 +234,8 @@ with torch.no_grad():
|
|
| 233 |
|
| 234 |
pixel_values, num_patches_list = load_video(video_path, num_segments=num_segments, max_num=1, get_frame_by_duration=False)
|
| 235 |
pixel_values = pixel_values.to(torch.bfloat16).to(model.device)
|
| 236 |
-
video_prefix = "".join([f"Frame{i+1}: <image
|
|
|
|
| 237 |
# single-turn conversation
|
| 238 |
question1 = "Describe this video in detail."
|
| 239 |
question = video_prefix + question1
|
|
|
|
| 5 |
license: apache-2.0
|
| 6 |
metrics:
|
| 7 |
- accuracy
|
| 8 |
+
pipeline_tag: video-text-to-text
|
| 9 |
tags:
|
| 10 |
- multimodal
|
| 11 |
+
- video-understanding
|
| 12 |
+
- long-context
|
| 13 |
model-index:
|
| 14 |
- name: InternVL2.5_HiCo_R16
|
| 15 |
results:
|
|
|
|
| 63 |
value: 64.9
|
| 64 |
name: accuracy
|
| 65 |
verified: true
|
|
|
|
| 66 |
---
|
| 67 |
|
| 68 |
# 📕InternVL2.5_HiCo_R16⚡
|
|
|
|
| 234 |
|
| 235 |
pixel_values, num_patches_list = load_video(video_path, num_segments=num_segments, max_num=1, get_frame_by_duration=False)
|
| 236 |
pixel_values = pixel_values.to(torch.bfloat16).to(model.device)
|
| 237 |
+
video_prefix = "".join([f"Frame{i+1}: <image>
|
| 238 |
+
" for i in range(len(num_patches_list))])
|
| 239 |
# single-turn conversation
|
| 240 |
question1 = "Describe this video in detail."
|
| 241 |
question = video_prefix + question1
|