Update README.md
565c028 verified 4 months ago
1.44 kB
Using 🤗 Transformers to Import Model

from modeling_contextvla import ContextVLA_Qwen3VL
from transformers import AutoProcessor

model = ContextVLA_Qwen3VL.from_pretrained(
    "huiwon/roboalign_contextvla_oxe_sft_1epoch",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16
)

processor = AutoProcessor.from_pretrained(
    "huiwon/roboalign_contextvla_oxe_sft_1epoch"
)
processor.tokenizer.model_max_length = training_args.model_max_length
processor.tokenizer.padding_side = "left"

conversations = [                                                                                                                                       
    {                                                                                                                                                      
        "from": "system",                                                                                                                                    
        "value": "You are an embodied vision-language robotic assistant for multi-object manipulation."
    },
    {
        "from": "human",
        "value": "<video>\nCurrent task is <instruction>. <embodiment_tag>. <state>. Output the robot\u2019s actions to perform this task through Fast tokens."
    },
    {
        "from": "gpt",
        "value": "<fast_token>"
    }
]
# need instruction, embodiment tag, state, and fast token