commandeaw
/

DW-KhotTaeVL-2B-QueryFrames

@@ -1,24 +1,27 @@
 """Standalone Video-MME mini eval for DW-KhotTaeVL-2B-QueryFrames.
-This script reproduces the wild-mode QA-frame numbers reported in the
-model card. It is fully self-contained — only depends on the
-`dw_queryframes.py` module shipped in this same directory plus
-publicly-available datasets / models from Hugging Face.
 Usage::
     pip install torch transformers pillow decord huggingface_hub pandas pyarrow
-    # Wild mode (query-aware frame selection)
-    python eval_videomme.py --mode wild --n-questions 50
     # Stock baseline (uniform 8 frames; matches the stock numbers
     # in the model card)
     python eval_videomme.py --mode stock-uniform --n-questions 50
-For benchmark-mode evaluation (uses Video-MME's own task_type label
-to pick uniform-fallback for Object/Temporal Reasoning), run both
-modes above then combine via ``build_hybrid.py``.
 Outputs JSON with ``summary`` + ``results`` keys.
 """
@@ -134,9 +137,10 @@ def main() -> int:
     ap = argparse.ArgumentParser()
     ap.add_argument("--base", default="Qwen/Qwen3-VL-2B-Instruct")
     ap.add_argument("--clip-model", default="openai/clip-vit-large-patch14")
-    ap.add_argument("--mode", choices=["wild", "stock-uniform"],
-                    default="wild",
-                    help="'wild' = query-aware (top-K of N candidates); "
                          "'stock-uniform' = stock baseline (uniform 8 frames)")
     ap.add_argument("--tag", default="")
     ap.add_argument("--n-questions", type=int, default=50)
@@ -148,6 +152,9 @@ def main() -> int:
                     help="output JSON path (auto-named if omitted)")
     ap.add_argument("--chunks", nargs="+", default=DEFAULT_CHUNKS)
     args = ap.parse_args()
     pq_path, zip_paths = download_assets(args.chunks)
     video_dir = unzip_chunks(zip_paths)
@@ -171,7 +178,7 @@ def main() -> int:
     for i, row in df.iterrows():
         video_path = video_dir / f"{row['videoID']}.mp4"
-        # Wild mode  = query-aware (task_type=None lets QA path run).
         # Stock-uniform = pass a known no-frame-gain task name to force
         #                 the uniform-fallback path (matches stock 8f
         #                 baseline behavior).

 """Standalone Video-MME mini eval for DW-KhotTaeVL-2B-QueryFrames.
+This script reproduces the MCQ-mode (no task_type) QA-frame numbers
+reported in the model card. It is fully self-contained — only
+depends on the `dw_queryframes.py` module shipped in this same
+directory plus publicly-available datasets / models from Hugging Face.
 Usage::
     pip install torch transformers pillow decord huggingface_hub pandas pyarrow
+    # MCQ mode (query-aware frame selection, no task_type)
+    python eval_videomme.py --mode mcq --n-questions 50
     # Stock baseline (uniform 8 frames; matches the stock numbers
     # in the model card)
     python eval_videomme.py --mode stock-uniform --n-questions 50
+For task-aware MCQ mode (uses Video-MME's own task_type label to
+route Object/Temporal Reasoning questions to uniform sampling),
+run both modes above then combine via ``build_hybrid.py``.
+The legacy CLI value ``--mode wild`` is accepted as a deprecated
+alias for ``--mode mcq``.
 Outputs JSON with ``summary`` + ``results`` keys.
 """
     ap = argparse.ArgumentParser()
     ap.add_argument("--base", default="Qwen/Qwen3-VL-2B-Instruct")
     ap.add_argument("--clip-model", default="openai/clip-vit-large-patch14")
+    ap.add_argument("--mode", choices=["mcq", "wild", "stock-uniform"],
+                    default="mcq",
+                    help="'mcq' = query-aware MCQ mode (default); "
+                         "'wild' = deprecated alias for 'mcq'; "
                          "'stock-uniform' = stock baseline (uniform 8 frames)")
     ap.add_argument("--tag", default="")
     ap.add_argument("--n-questions", type=int, default=50)
                     help="output JSON path (auto-named if omitted)")
     ap.add_argument("--chunks", nargs="+", default=DEFAULT_CHUNKS)
     args = ap.parse_args()
+    # Legacy alias: 'wild' → 'mcq' (deprecated).
+    if args.mode == "wild":
+        args.mode = "mcq"
     pq_path, zip_paths = download_assets(args.chunks)
     video_dir = unzip_chunks(zip_paths)
     for i, row in df.iterrows():
         video_path = video_dir / f"{row['videoID']}.mp4"
+        # MCQ mode = query-aware (task_type=None lets QA path run).
         # Stock-uniform = pass a known no-frame-gain task name to force
         #                 the uniform-fallback path (matches stock 8f
         #                 baseline behavior).