rl4phyx-backup / ZeroSearch /One-Shot-RLVR /verify_vlm_processor.py

Upload folder using huggingface_hub

9a71cb6 verified 2 months ago

2.25 kB

	"""
	独立验证 VLM Processor + 图像加载
	不依赖完整 veRL 环境
	"""
	import pandas as pd
	from pathlib import Path
	from PIL import Image

	print("=" * 50)
	print("多模态 VLM 验证脚本")
	print("=" * 50)

	# Step 1: 加载数据
	print("\n[1] 加载 Parquet 数据...")
	df = pd.read_parquet('data/train/physics_vlm/metaphyx_all.parquet')
	print(f" 总行数: {len(df)}")
	print(f" 列名: {df.columns.tolist()}")
	print(f" image_path 示例: {df['image_path'].iloc[0]}")

	# Step 2: 加载图像
	print("\n[2] 加载示例图像...")
	image_dir = Path("d:/Research/Rl4Phyx/MetaPhyX/data/images")
	image_path = image_dir / df['image_path'].iloc[0]
	print(f" 完整路径: {image_path}")
	print(f" 文件存在: {image_path.exists()}")
	image = Image.open(image_path).convert('RGB')
	print(f" 图像尺寸: {image.size}")

	# Step 3: 加载 VLM Processor
	print("\n[3] 加载 Qwen2.5-VL Processor...")
	from transformers import AutoProcessor
	processor = AutoProcessor.from_pretrained('Qwen/Qwen2.5-VL-7B-Instruct', trust_remote_code=True)
	print(f" Processor 类型: {type(processor).__name__}")

	# Step 4: 构建多模态消息
	print("\n[4] 构建多模态消息...")
	prompt = df['prompt'].iloc[0]
	text_content = prompt[0]['content'] if isinstance(prompt, list) else str(prompt)
	print(f" 文本长度: {len(text_content)}")

	messages = [{
	"role": "user",
	"content": [
	{"type": "image", "image": image},
	{"type": "text", "text": text_content}
	]
	}]

	# Step 5: 使用 Processor 编码
	print("\n[5] 使用 Processor 编码...")
	try:
	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = processor(
	text=[text],
	images=[image],
	return_tensors="pt",
	padding=True
	)
	print(f" input_ids shape: {inputs['input_ids'].shape}")
	print(f" attention_mask shape: {inputs['attention_mask'].shape}")
	if 'pixel_values' in inputs:
	print(f" pixel_values shape: {inputs['pixel_values'].shape}")
	print("\n✅ 多模态编码成功！")
	else:
	print("\n❌ 没有 pixel_values 输出")
	except Exception as e:
	print(f"\n❌ 编码失败: {e}")
	import traceback
	traceback.print_exc()

	print("\n" + "=" * 50)