Improve: Add descriptive tags and clean up README structure

#1
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +5 -3
README.md CHANGED
@@ -5,9 +5,11 @@ library_name: transformers
5
  license: apache-2.0
6
  metrics:
7
  - accuracy
 
8
  tags:
9
  - multimodal
10
- pipeline_tag: video-text-to-text
 
11
  model-index:
12
  - name: InternVL2.5_HiCo_R16
13
  results:
@@ -61,7 +63,6 @@ model-index:
61
  value: 64.9
62
  name: accuracy
63
  verified: true
64
-
65
  ---
66
 
67
  # 📕InternVL2.5_HiCo_R16⚡
@@ -233,7 +234,8 @@ with torch.no_grad():
233
 
234
  pixel_values, num_patches_list = load_video(video_path, num_segments=num_segments, max_num=1, get_frame_by_duration=False)
235
  pixel_values = pixel_values.to(torch.bfloat16).to(model.device)
236
- video_prefix = "".join([f"Frame{i+1}: <image>\n" for i in range(len(num_patches_list))])
 
237
  # single-turn conversation
238
  question1 = "Describe this video in detail."
239
  question = video_prefix + question1
 
5
  license: apache-2.0
6
  metrics:
7
  - accuracy
8
+ pipeline_tag: video-text-to-text
9
  tags:
10
  - multimodal
11
+ - video-understanding
12
+ - long-context
13
  model-index:
14
  - name: InternVL2.5_HiCo_R16
15
  results:
 
63
  value: 64.9
64
  name: accuracy
65
  verified: true
 
66
  ---
67
 
68
  # 📕InternVL2.5_HiCo_R16⚡
 
234
 
235
  pixel_values, num_patches_list = load_video(video_path, num_segments=num_segments, max_num=1, get_frame_by_duration=False)
236
  pixel_values = pixel_values.to(torch.bfloat16).to(model.device)
237
+ video_prefix = "".join([f"Frame{i+1}: <image>
238
+ " for i in range(len(num_patches_list))])
239
  # single-turn conversation
240
  question1 = "Describe this video in detail."
241
  question = video_prefix + question1