import os import copy import json import random import logging import re import time import math import itertools import ast from dataclasses import dataclass from typing import Dict, Optional, Sequence, List, Tuple from io import BytesIO import base64 from collections.abc import Sequence from types import SimpleNamespace import numpy as np import torch from torch.utils.data import Dataset from PIL import Image from decord import VideoReader import transformers from omegaconf import OmegaConf from starVLA.dataloader.qwenvl_llavajson.qwen_data_config import data_list from starVLA.dataloader.qwenvl_llavajson.rope2d import get_rope_index_25, get_rope_index_2 IGNORE_INDEX = -100 IMAGE_TOKEN_INDEX = 151655 VIDEO_TOKEN_INDEX = 151656 DEFAULT_IMAGE_TOKEN = "\n" DEFAULT_VIDEO_TOKEN = "