wentao commited on
Commit
62ef3b8
·
1 Parent(s): e55e1be
Files changed (2) hide show
  1. app.py +2 -2
  2. static/eval_results/result.json +17 -102
app.py CHANGED
@@ -34,10 +34,10 @@ def render_html_leaderboard(df):
34
  def build_interface():
35
  with gr.Blocks() as demo:
36
  gr.Markdown("## 🎥 LVU VLM Leaderboard")
37
- gr.Markdown("Benchmarks: Video-MME, MLVU, LongVideoBench")
38
 
39
  with gr.Row():
40
- sort_metric = gr.Dropdown(choices=["Video-MME", "MLVU", "LongVideoBench"], label="Sort by", value="Video-MME")
41
  descending = gr.Checkbox(label="Descending", value=True)
42
 
43
  with gr.Row():
 
34
  def build_interface():
35
  with gr.Blocks() as demo:
36
  gr.Markdown("## 🎥 LVU VLM Leaderboard")
37
+ gr.Markdown("Benchmarks: Video-MME, MLVU, LVBench, LongVideoBench")
38
 
39
  with gr.Row():
40
+ sort_metric = gr.Dropdown(choices=["Video-MME", "MLVU", "LVBench", "LongVideoBench"], label="Sort by", value="Video-MME")
41
  descending = gr.Checkbox(label="Descending", value=True)
42
 
43
  with gr.Row():
static/eval_results/result.json CHANGED
@@ -1,104 +1,19 @@
1
  [
2
- {
3
- "Model": "GPT-4V",
4
- "Video-MME": 59.9,
5
- "MLVU": 49.2,
6
- "LongVideoBench": 59.1
7
- },
8
- {
9
- "Model": "GPT-4o",
10
- "Video-MME": 71.9,
11
- "MLVU": 64.6,
12
- "LongVideoBench": 66.7
13
- },
14
- {
15
- "Model": "Gemini-1.5-Pro",
16
- "Video-MME": 75.0,
17
- "MLVU": 62.9,
18
- "LongVideoBench": 64.0
19
- },
20
- {
21
- "Model": "VideoChat2",
22
- "Video-MME": 39.5,
23
- "MLVU": 47.9,
24
- "LongVideoBench": 39.3
25
- },
26
- {
27
- "Model": "ShareGPT4Video",
28
- "Video-MME": 39.9,
29
- "MLVU": 46.4,
30
- "LongVideoBench": 39.7
31
- },
32
- {
33
- "Model": "LongVA",
34
- "Video-MME": 52.4,
35
- "MLVU": 56.3,
36
- "LongVideoBench": 51.8
37
- },
38
- {
39
- "Model": "Video-CCAM",
40
- "Video-MME": 50.3,
41
- "MLVU": 58.5,
42
- "LongVideoBench": null
43
- },
44
- {
45
- "Model": "Kangaroo",
46
- "Video-MME": 56.0,
47
- "MLVU": 61.0,
48
- "LongVideoBench": 54.8
49
- },
50
- {
51
- "Model": "InternVL2",
52
- "Video-MME": 58.2,
53
- "MLVU": 48.1,
54
- "LongVideoBench": 51.8
55
- },
56
- {
57
- "Model": "LLaVA-OneVision",
58
- "Video-MME": 58.2,
59
- "MLVU": 62.6,
60
- "LongVideoBench": 56.4
61
- },
62
- {
63
- "Model": "Qwen2-VL",
64
- "Video-MME": 63.3,
65
- "MLVU": 64.2,
66
- "LongVideoBench": 52.4
67
- },
68
- {
69
- "Model": "Phi-4-Mini",
70
- "Video-MME": 55.0,
71
- "MLVU": 60.1,
72
- "LongVideoBench": 46.7
73
- },
74
- {
75
- "Model": "LLaVA-Mini",
76
- "Video-MME": 40.3,
77
- "MLVU": 44.3,
78
- "LongVideoBench": 19.3
79
- },
80
- {
81
- "Model": "LongLLaVA",
82
- "Video-MME": 51.6,
83
- "MLVU": 53.3,
84
- "LongVideoBench": 42.1
85
- },
86
- {
87
- "Model": "LongVU",
88
- "Video-MME": 55.3,
89
- "MLVU": 65.4,
90
- "LongVideoBench": 53.5
91
- },
92
- {
93
- "Model": "Video-XL",
94
- "Video-MME": 55.5,
95
- "MLVU": 64.9,
96
- "LongVideoBench": 50.7
97
- },
98
- {
99
- "Model": "VAMBA",
100
- "Video-MME": 57.8,
101
- "MLVU": 65.9,
102
- "LongVideoBench": 55.9
103
- }
104
  ]
 
1
  [
2
+ { "Model": "GPT-4V", "Video-MME": 59.9, "MLVU": 49.2, "LongVideoBench": 59.1, "LVBench": null },
3
+ { "Model": "GPT-4o", "Video-MME": 71.9, "MLVU": 64.6, "LongVideoBench": 66.7, "LVBench": 34.7 },
4
+ { "Model": "Gemini-1.5-Pro", "Video-MME": 75.0, "MLVU": 62.9, "LongVideoBench": 64.0, "LVBench": 33.1 },
5
+ { "Model": "VideoChat2", "Video-MME": 39.5, "MLVU": 47.9, "LongVideoBench": 39.3, "LVBench": null },
6
+ { "Model": "ShareGPT4Video", "Video-MME": 39.9, "MLVU": 46.4, "LongVideoBench": 39.7, "LVBench": null },
7
+ { "Model": "LongVA", "Video-MME": 52.4, "MLVU": 56.3, "LongVideoBench": 51.8, "LVBench": null },
8
+ { "Model": "Video-CCAM", "Video-MME": 50.3, "MLVU": 58.5, "LongVideoBench": null, "LVBench": null },
9
+ { "Model": "Kangaroo", "Video-MME": 56.0, "MLVU": 61.0, "LongVideoBench": 54.8, "LVBench": null },
10
+ { "Model": "InternVL2", "Video-MME": 58.2, "MLVU": 48.1, "LongVideoBench": 51.8, "LVBench": null },
11
+ { "Model": "LLaVA-OneVision", "Video-MME": 58.2, "MLVU": 62.6, "LongVideoBench": 56.4, "LVBench": null },
12
+ { "Model": "Qwen2-VL", "Video-MME": 63.3, "MLVU": 64.2, "LongVideoBench": 52.4, "LVBench": 42.0 },
13
+ { "Model": "Phi-4-Mini", "Video-MME": 55.0, "MLVU": 60.1, "LongVideoBench": 46.7, "LVBench": null },
14
+ { "Model": "LLaVA-Mini", "Video-MME": 40.3, "MLVU": 44.3, "LongVideoBench": 19.3, "LVBench": 17.6 },
15
+ { "Model": "LongLLaVA", "Video-MME": 51.6, "MLVU": 53.3, "LongVideoBench": 42.1, "LVBench": 31.2 },
16
+ { "Model": "LongVU", "Video-MME": 55.3, "MLVU": 65.4, "LongVideoBench": 53.5, "LVBench": 37.8 },
17
+ { "Model": "Video-XL", "Video-MME": 55.5, "MLVU": 64.9, "LongVideoBench": 50.7, "LVBench": 36.8 },
18
+ { "Model": "VAMBA", "Video-MME": 57.8, "MLVU": 65.9, "LongVideoBench": 55.9, "LVBench": 42.1 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ]