#!/usr/bin/env python3 """ 测试数据集加载逻辑 """ from datasets import load_dataset print("正在加载数据集...") try: # 加载数据集但不解码视频 dataset = load_dataset("WenjiaWang/videoforuser", split="train") print(f"✅ 成功加载数据集,共 {len(dataset)} 个视频") # 测试访问第一个元素 print("\n测试访问第一个元素...") item = dataset[0] print(f"Keys: {item.keys()}") if 'video' in item: video_data = item['video'] print(f"Video type: {type(video_data)}") if isinstance(video_data, dict): print(f"Video dict keys: {video_data.keys()}") if 'path' in video_data: print(f"Video path: {video_data['path']}") if 'bytes' in video_data: print(f"Video bytes length: {len(video_data['bytes']) if video_data['bytes'] else 0}") else: print(f"Video data: {video_data}") print("\n✅ 数据集结构测试成功!") except Exception as e: print(f"❌ 错误: {e}") import traceback traceback.print_exc()