# Copyright 2025 starVLA community. All rights reserved. # Licensed under the MIT License, Version 1.0 (the "License"); # Implemented by [Jinhui YE / HKUST University] in [2025]. import torch from typing import Optional, List from transformers.modeling_outputs import CausalLMOutputWithPast from typing import Dict, Optional, List from typing import List, Union, Dict, Optional import torch from PIL import Image from transformers import AutoProcessor, AutoModelForCausalLM from accelerate.logging import get_logger logger = get_logger(__name__) # IGNORE_INDEX = -100 # IMAGE_TOKEN_INDEX = 151655 # VIDEO_TOKEN_INDEX = 151656 # DEFAULT_IMAGE_TOKEN = "" # DEFAULT_VIDEO_TOKEN = "