"""Exploration probe: does the v10_r2 model EVER recover the missed (shorter) segment on under-counted multi-seg videos if we sample at high temperature? Distinguishes policy/exploration bottleneck (model CAN find it, just doesn't at temp=0) from a perception wall (never finds it across N samples). Reads /tmp/undercounted.json (sample_id, generator, gt, t0_pred, missed). For each video: N high-temp samples; record every sample's parsed segments. Output: /rank_.jsonl one record per video with all N sample preds. """ import argparse, json, os, sys, time import torch from transformers import AutoProcessor, GenerationConfig, Qwen2_5_VLForConditionalGeneration sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from src.open_r1.reward import parse_segments from src.open_r1.trainer.grpo_trainer_video_GT_soft import SYSTEM_PROMPT, get_question_template CACHE = "/mnt/local-fast/zhangt/forensics_grpo_cache_uniform3584_fps2.0" def get_args(): p = argparse.ArgumentParser() p.add_argument("--model_path", required=True) p.add_argument("--rank", type=int, default=0) p.add_argument("--world_size", type=int, default=1) p.add_argument("--device", type=int, default=0) p.add_argument("--out_dir", default="probe_explore_v10r2") p.add_argument("--n_samples", type=int, default=8) p.add_argument("--temperature", type=float, default=1.0) p.add_argument("--max_new_tokens", type=int, default=64) return p.parse_args() def main(): args = get_args() device = f"cuda:{args.device}" os.makedirs(args.out_dir, exist_ok=True) os.environ["FORENSICS_COT"] = "false" videos = json.load(open("/tmp/undercounted.json")) videos = [v for i, v in enumerate(videos) if i % args.world_size == args.rank] print(f"[rank {args.rank}] {len(videos)} videos N={args.n_samples} temp={args.temperature}", flush=True) model = Qwen2_5_VLForConditionalGeneration.from_pretrained( args.model_path, torch_dtype=torch.bfloat16, use_sliding_window=True, attn_implementation="flash_attention_2", device_map=device) model.eval() model.config.use_cache = True if hasattr(model, "generation_config"): model.generation_config.use_cache = True processor = AutoProcessor.from_pretrained(args.model_path) question = get_question_template() gen_cfg = GenerationConfig( max_new_tokens=args.max_new_tokens, do_sample=True, temperature=args.temperature, top_p=1.0, pad_token_id=processor.tokenizer.pad_token_id, use_cache=True) fout = open(os.path.join(args.out_dir, f"rank_{args.rank}.jsonl"), "w") t0 = time.time(); done = 0 for v in videos: sd = os.path.join(CACHE, "test", v["generator"], v["sample_id"]) vp = os.path.join(sd, "video_inputs.pt") if not os.path.exists(vp): continue try: video_inputs = torch.load(vp, weights_only=False) video_kwargs = json.load(open(os.path.join(sd, "video_kwargs.json"))) chat = [ {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]}, {"role": "user", "content": [ {"type": "video", "video": "placeholder"}, {"type": "text", "text": question}]}, ] text = processor.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) inputs = processor(text=[text], videos=[video_inputs[0]], fps=[video_kwargs["fps"][0]], padding=True, return_tensors="pt", padding_side="left", add_special_tokens=False) inputs = {k: (val.to(device) if hasattr(val, "to") else val) for k, val in inputs.items()} sample_preds = [] for _ in range(args.n_samples): with torch.no_grad(): out_ids = model.generate(**inputs, generation_config=gen_cfg, use_cache=True) gen_ids = out_ids[0][inputs["input_ids"].shape[1]:] txt = processor.tokenizer.decode(gen_ids, skip_special_tokens=True) sample_preds.append(parse_segments(txt)) except Exception as e: print(f" [skip] {v['sample_id']}: {type(e).__name__}: {e}", flush=True) continue rec = {"sample_id": v["sample_id"], "generator": v["generator"], "gt": v["gt"], "t0_pred": v["t0_pred"], "missed": v["missed"], "sample_preds": sample_preds} fout.write(json.dumps(rec) + "\n"); fout.flush() done += 1 if done % 5 == 0: r = done / max(1e-6, time.time() - t0) print(f" rank={args.rank} done={done}/{len(videos)} rate={r:.2f}/s", flush=True) fout.close() print(f"[rank {args.rank}] DONE done={done} elapsed={time.time()-t0:.0f}s", flush=True) if __name__ == "__main__": main()