| """ |
| 独立验证 VLM Processor + 图像加载 |
| 不依赖完整 veRL 环境 |
| """ |
| import pandas as pd |
| from pathlib import Path |
| from PIL import Image |
|
|
| print("=" * 50) |
| print("多模态 VLM 验证脚本") |
| print("=" * 50) |
|
|
| |
| print("\n[1] 加载 Parquet 数据...") |
| df = pd.read_parquet('data/train/physics_vlm/metaphyx_all.parquet') |
| print(f" 总行数: {len(df)}") |
| print(f" 列名: {df.columns.tolist()}") |
| print(f" image_path 示例: {df['image_path'].iloc[0]}") |
|
|
| |
| print("\n[2] 加载示例图像...") |
| image_dir = Path("d:/Research/Rl4Phyx/MetaPhyX/data/images") |
| image_path = image_dir / df['image_path'].iloc[0] |
| print(f" 完整路径: {image_path}") |
| print(f" 文件存在: {image_path.exists()}") |
| image = Image.open(image_path).convert('RGB') |
| print(f" 图像尺寸: {image.size}") |
|
|
| |
| print("\n[3] 加载 Qwen2.5-VL Processor...") |
| from transformers import AutoProcessor |
| processor = AutoProcessor.from_pretrained('Qwen/Qwen2.5-VL-7B-Instruct', trust_remote_code=True) |
| print(f" Processor 类型: {type(processor).__name__}") |
|
|
| |
| print("\n[4] 构建多模态消息...") |
| prompt = df['prompt'].iloc[0] |
| text_content = prompt[0]['content'] if isinstance(prompt, list) else str(prompt) |
| print(f" 文本长度: {len(text_content)}") |
|
|
| messages = [{ |
| "role": "user", |
| "content": [ |
| {"type": "image", "image": image}, |
| {"type": "text", "text": text_content} |
| ] |
| }] |
|
|
| |
| print("\n[5] 使用 Processor 编码...") |
| try: |
| text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| inputs = processor( |
| text=[text], |
| images=[image], |
| return_tensors="pt", |
| padding=True |
| ) |
| print(f" input_ids shape: {inputs['input_ids'].shape}") |
| print(f" attention_mask shape: {inputs['attention_mask'].shape}") |
| if 'pixel_values' in inputs: |
| print(f" pixel_values shape: {inputs['pixel_values'].shape}") |
| print("\n✅ 多模态编码成功!") |
| else: |
| print("\n❌ 没有 pixel_values 输出") |
| except Exception as e: |
| print(f"\n❌ 编码失败: {e}") |
| import traceback |
| traceback.print_exc() |
|
|
| print("\n" + "=" * 50) |
|
|