commandeaw commited on
Commit
3081b91
·
verified ·
1 Parent(s): c04d819

eval_videomme: rename --mode wild → mcq (wild kept as deprecated alias); update docstrings to use MCQ-mode terminology

Browse files
Files changed (1) hide show
  1. eval_videomme.py +20 -13
eval_videomme.py CHANGED
@@ -1,24 +1,27 @@
1
  """Standalone Video-MME mini eval for DW-KhotTaeVL-2B-QueryFrames.
2
 
3
- This script reproduces the wild-mode QA-frame numbers reported in the
4
- model card. It is fully self-contained — only depends on the
5
- `dw_queryframes.py` module shipped in this same directory plus
6
- publicly-available datasets / models from Hugging Face.
7
 
8
  Usage::
9
 
10
  pip install torch transformers pillow decord huggingface_hub pandas pyarrow
11
 
12
- # Wild mode (query-aware frame selection)
13
- python eval_videomme.py --mode wild --n-questions 50
14
 
15
  # Stock baseline (uniform 8 frames; matches the stock numbers
16
  # in the model card)
17
  python eval_videomme.py --mode stock-uniform --n-questions 50
18
 
19
- For benchmark-mode evaluation (uses Video-MME's own task_type label
20
- to pick uniform-fallback for Object/Temporal Reasoning), run both
21
- modes above then combine via ``build_hybrid.py``.
 
 
 
22
 
23
  Outputs JSON with ``summary`` + ``results`` keys.
24
  """
@@ -134,9 +137,10 @@ def main() -> int:
134
  ap = argparse.ArgumentParser()
135
  ap.add_argument("--base", default="Qwen/Qwen3-VL-2B-Instruct")
136
  ap.add_argument("--clip-model", default="openai/clip-vit-large-patch14")
137
- ap.add_argument("--mode", choices=["wild", "stock-uniform"],
138
- default="wild",
139
- help="'wild' = query-aware (top-K of N candidates); "
 
140
  "'stock-uniform' = stock baseline (uniform 8 frames)")
141
  ap.add_argument("--tag", default="")
142
  ap.add_argument("--n-questions", type=int, default=50)
@@ -148,6 +152,9 @@ def main() -> int:
148
  help="output JSON path (auto-named if omitted)")
149
  ap.add_argument("--chunks", nargs="+", default=DEFAULT_CHUNKS)
150
  args = ap.parse_args()
 
 
 
151
 
152
  pq_path, zip_paths = download_assets(args.chunks)
153
  video_dir = unzip_chunks(zip_paths)
@@ -171,7 +178,7 @@ def main() -> int:
171
  for i, row in df.iterrows():
172
  video_path = video_dir / f"{row['videoID']}.mp4"
173
 
174
- # Wild mode = query-aware (task_type=None lets QA path run).
175
  # Stock-uniform = pass a known no-frame-gain task name to force
176
  # the uniform-fallback path (matches stock 8f
177
  # baseline behavior).
 
1
  """Standalone Video-MME mini eval for DW-KhotTaeVL-2B-QueryFrames.
2
 
3
+ This script reproduces the MCQ-mode (no task_type) QA-frame numbers
4
+ reported in the model card. It is fully self-contained — only
5
+ depends on the `dw_queryframes.py` module shipped in this same
6
+ directory plus publicly-available datasets / models from Hugging Face.
7
 
8
  Usage::
9
 
10
  pip install torch transformers pillow decord huggingface_hub pandas pyarrow
11
 
12
+ # MCQ mode (query-aware frame selection, no task_type)
13
+ python eval_videomme.py --mode mcq --n-questions 50
14
 
15
  # Stock baseline (uniform 8 frames; matches the stock numbers
16
  # in the model card)
17
  python eval_videomme.py --mode stock-uniform --n-questions 50
18
 
19
+ For task-aware MCQ mode (uses Video-MME's own task_type label to
20
+ route Object/Temporal Reasoning questions to uniform sampling),
21
+ run both modes above then combine via ``build_hybrid.py``.
22
+
23
+ The legacy CLI value ``--mode wild`` is accepted as a deprecated
24
+ alias for ``--mode mcq``.
25
 
26
  Outputs JSON with ``summary`` + ``results`` keys.
27
  """
 
137
  ap = argparse.ArgumentParser()
138
  ap.add_argument("--base", default="Qwen/Qwen3-VL-2B-Instruct")
139
  ap.add_argument("--clip-model", default="openai/clip-vit-large-patch14")
140
+ ap.add_argument("--mode", choices=["mcq", "wild", "stock-uniform"],
141
+ default="mcq",
142
+ help="'mcq' = query-aware MCQ mode (default); "
143
+ "'wild' = deprecated alias for 'mcq'; "
144
  "'stock-uniform' = stock baseline (uniform 8 frames)")
145
  ap.add_argument("--tag", default="")
146
  ap.add_argument("--n-questions", type=int, default=50)
 
152
  help="output JSON path (auto-named if omitted)")
153
  ap.add_argument("--chunks", nargs="+", default=DEFAULT_CHUNKS)
154
  args = ap.parse_args()
155
+ # Legacy alias: 'wild' → 'mcq' (deprecated).
156
+ if args.mode == "wild":
157
+ args.mode = "mcq"
158
 
159
  pq_path, zip_paths = download_assets(args.chunks)
160
  video_dir = unzip_chunks(zip_paths)
 
178
  for i, row in df.iterrows():
179
  video_path = video_dir / f"{row['videoID']}.mp4"
180
 
181
+ # MCQ mode = query-aware (task_type=None lets QA path run).
182
  # Stock-uniform = pass a known no-frame-gain task name to force
183
  # the uniform-fallback path (matches stock 8f
184
  # baseline behavior).