|
|
| import json, os, sys, time, torch |
|
|
| MODEL_PATH = sys.argv[1] |
| OUTPUT_NAME = sys.argv[2] |
| EVAL_DIR = "/workspace/rl4phyx/RL4Phyx/SFT/sft_eval_footprint" |
| TEST_FILE = os.path.join(EVAL_DIR, "test_1533_openended.jsonl") |
|
|
| print(f"Model: {MODEL_PATH}") |
| print(f"Output: {OUTPUT_NAME}") |
|
|
| from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor |
| from qwen_vl_utils import process_vision_info |
|
|
| model = Qwen2_5_VLForConditionalGeneration.from_pretrained( |
| MODEL_PATH, torch_dtype=torch.bfloat16, device_map="cuda", |
| attn_implementation="sdpa" |
| ) |
| processor = AutoProcessor.from_pretrained(MODEL_PATH) |
| model.eval() |
|
|
| samples = [] |
| with open(TEST_FILE) as f: |
| for line in f: |
| if line.strip(): |
| samples.append(json.loads(line)) |
| print(f"Loaded {len(samples)} samples") |
|
|
| results = [] |
| t0 = time.time() |
| for idx, sample in enumerate(samples): |
| desc = sample.get("description", "") |
| q = sample.get("question", "") |
| parts = [p for p in [desc, q] if p] |
| parts.append("Please reason step by step, and put your final answer within \\boxed{}.") |
| prompt_text = "\n\n".join(parts) |
|
|
| img = sample.get("image_path", "") |
| content = [] |
| if img and os.path.exists(img): |
| content.append({"type": "image", "image": f"file://{img}"}) |
| content.append({"type": "text", "text": prompt_text}) |
|
|
| messages = [{"role": "user", "content": content}] |
| text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| image_inputs, video_inputs = process_vision_info(messages) |
| inputs = processor(text=[text], images=image_inputs, videos=video_inputs, |
| padding=True, return_tensors="pt").to("cuda") |
|
|
| with torch.no_grad(): |
| ids = model.generate(**inputs, max_new_tokens=2048, do_sample=False) |
|
|
| out_ids = ids[0][len(inputs.input_ids[0]):] |
| response = processor.decode(out_ids, skip_special_tokens=True) |
| sample["model_output"] = response |
| results.append(sample) |
|
|
| if (idx + 1) % 50 == 0: |
| elapsed = time.time() - t0 |
| rate = (idx + 1) / elapsed |
| eta = (len(samples) - idx - 1) / rate / 60 |
| print(f" {idx+1}/{len(samples)} ({rate:.1f}/s, ETA {eta:.0f}min)", flush=True) |
|
|
| output_file = os.path.join(EVAL_DIR, f"inference_results_{OUTPUT_NAME}.jsonl") |
| with open(output_file, "w", encoding="utf-8") as f: |
| for r in results: |
| f.write(json.dumps(r, ensure_ascii=False) + "\n") |
| print(f"\nDone: {len(results)} -> {output_file}") |
|
|