Instructions to use mjf-su/AutoVLA with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use mjf-su/AutoVLA with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="mjf-su/AutoVLA") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("mjf-su/AutoVLA") model = AutoModelForMultimodalLM.from_pretrained("mjf-su/AutoVLA") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use mjf-su/AutoVLA with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "mjf-su/AutoVLA" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/AutoVLA", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/mjf-su/AutoVLA
- SGLang
How to use mjf-su/AutoVLA with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "mjf-su/AutoVLA" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/AutoVLA", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "mjf-su/AutoVLA" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/AutoVLA", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use mjf-su/AutoVLA with Docker Model Runner:
docker model run hf.co/mjf-su/AutoVLA
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.48, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 0.43850863675276436, | |
| "epoch": 0.0096, | |
| "grad_norm": 4.34375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.5373, | |
| "mean_token_accuracy": 0.8229872425397237, | |
| "num_tokens": 3432459.0, | |
| "step": 10 | |
| }, | |
| { | |
| "entropy": 0.43693508704503375, | |
| "epoch": 0.0192, | |
| "grad_norm": 2.8125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.506, | |
| "mean_token_accuracy": 0.8290777782599131, | |
| "num_tokens": 6859685.0, | |
| "step": 20 | |
| }, | |
| { | |
| "entropy": 0.44068048397699994, | |
| "epoch": 0.0288, | |
| "grad_norm": 2.421875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4912, | |
| "mean_token_accuracy": 0.8306132018566131, | |
| "num_tokens": 10280810.0, | |
| "step": 30 | |
| }, | |
| { | |
| "entropy": 0.4448391616344452, | |
| "epoch": 0.0384, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4852, | |
| "mean_token_accuracy": 0.8322072466214497, | |
| "num_tokens": 13708851.0, | |
| "step": 40 | |
| }, | |
| { | |
| "entropy": 0.4470527251561483, | |
| "epoch": 0.048, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4798, | |
| "mean_token_accuracy": 0.8338870048522949, | |
| "num_tokens": 17136948.0, | |
| "step": 50 | |
| }, | |
| { | |
| "entropy": 0.44311814606189726, | |
| "epoch": 0.0576, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4701, | |
| "mean_token_accuracy": 0.8359324594338735, | |
| "num_tokens": 20560658.0, | |
| "step": 60 | |
| }, | |
| { | |
| "entropy": 0.4470181296269099, | |
| "epoch": 0.0672, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4702, | |
| "mean_token_accuracy": 0.8356486141681672, | |
| "num_tokens": 23987248.0, | |
| "step": 70 | |
| }, | |
| { | |
| "entropy": 0.449398942788442, | |
| "epoch": 0.0768, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.47, | |
| "mean_token_accuracy": 0.8356277287006378, | |
| "num_tokens": 27418078.0, | |
| "step": 80 | |
| }, | |
| { | |
| "entropy": 0.4466124544541041, | |
| "epoch": 0.0864, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4647, | |
| "mean_token_accuracy": 0.8362693071365357, | |
| "num_tokens": 30842206.0, | |
| "step": 90 | |
| }, | |
| { | |
| "entropy": 0.44751456181208293, | |
| "epoch": 0.096, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.465, | |
| "mean_token_accuracy": 0.8364668329556783, | |
| "num_tokens": 34270915.0, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 0.44619213143984476, | |
| "epoch": 0.1056, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.462, | |
| "mean_token_accuracy": 0.8371777753035228, | |
| "num_tokens": 37699757.0, | |
| "step": 110 | |
| }, | |
| { | |
| "entropy": 0.4475706567366918, | |
| "epoch": 0.1152, | |
| "grad_norm": 1.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4618, | |
| "mean_token_accuracy": 0.8366606632868449, | |
| "num_tokens": 41127680.0, | |
| "step": 120 | |
| }, | |
| { | |
| "entropy": 0.44157906572024025, | |
| "epoch": 0.1248, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4539, | |
| "mean_token_accuracy": 0.8387815574804942, | |
| "num_tokens": 44550205.0, | |
| "step": 130 | |
| }, | |
| { | |
| "entropy": 0.44636616806189217, | |
| "epoch": 0.1344, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4597, | |
| "mean_token_accuracy": 0.8369020839532216, | |
| "num_tokens": 47978530.0, | |
| "step": 140 | |
| }, | |
| { | |
| "entropy": 0.45083456734816235, | |
| "epoch": 0.144, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4615, | |
| "mean_token_accuracy": 0.8364426136016846, | |
| "num_tokens": 51415497.0, | |
| "step": 150 | |
| }, | |
| { | |
| "entropy": 0.4423963377873103, | |
| "epoch": 0.1536, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4531, | |
| "mean_token_accuracy": 0.8387805402278901, | |
| "num_tokens": 54843112.0, | |
| "step": 160 | |
| }, | |
| { | |
| "entropy": 0.44047041336695353, | |
| "epoch": 0.1632, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4509, | |
| "mean_token_accuracy": 0.8391756375630697, | |
| "num_tokens": 58269937.0, | |
| "step": 170 | |
| }, | |
| { | |
| "entropy": 0.44535795946915946, | |
| "epoch": 0.1728, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4549, | |
| "mean_token_accuracy": 0.8376253386338551, | |
| "num_tokens": 61698122.0, | |
| "step": 180 | |
| }, | |
| { | |
| "entropy": 0.4441406190395355, | |
| "epoch": 0.1824, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4523, | |
| "mean_token_accuracy": 0.8387903730074565, | |
| "num_tokens": 65129853.0, | |
| "step": 190 | |
| }, | |
| { | |
| "entropy": 0.4387974033753077, | |
| "epoch": 0.192, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4475, | |
| "mean_token_accuracy": 0.8403141776720683, | |
| "num_tokens": 68554676.0, | |
| "step": 200 | |
| }, | |
| { | |
| "entropy": 0.43539145588874817, | |
| "epoch": 0.2016, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4451, | |
| "mean_token_accuracy": 0.8406339287757874, | |
| "num_tokens": 71978060.0, | |
| "step": 210 | |
| }, | |
| { | |
| "entropy": 0.4371789425611496, | |
| "epoch": 0.2112, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4457, | |
| "mean_token_accuracy": 0.8400953491528829, | |
| "num_tokens": 75401498.0, | |
| "step": 220 | |
| }, | |
| { | |
| "entropy": 0.4436704327662786, | |
| "epoch": 0.2208, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4512, | |
| "mean_token_accuracy": 0.8383757730325063, | |
| "num_tokens": 78829143.0, | |
| "step": 230 | |
| }, | |
| { | |
| "entropy": 0.43721583088239035, | |
| "epoch": 0.2304, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4437, | |
| "mean_token_accuracy": 0.8410045862197876, | |
| "num_tokens": 82255415.0, | |
| "step": 240 | |
| }, | |
| { | |
| "entropy": 0.43779849211374916, | |
| "epoch": 0.24, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4449, | |
| "mean_token_accuracy": 0.840403014421463, | |
| "num_tokens": 85678092.0, | |
| "step": 250 | |
| }, | |
| { | |
| "entropy": 0.4423576871554057, | |
| "epoch": 0.2496, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4496, | |
| "mean_token_accuracy": 0.8392611801624298, | |
| "num_tokens": 89108181.0, | |
| "step": 260 | |
| }, | |
| { | |
| "entropy": 0.44207868178685505, | |
| "epoch": 0.2592, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.449, | |
| "mean_token_accuracy": 0.8386734426021576, | |
| "num_tokens": 92534098.0, | |
| "step": 270 | |
| }, | |
| { | |
| "entropy": 0.43932537039120995, | |
| "epoch": 0.2688, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4453, | |
| "mean_token_accuracy": 0.8403779149055481, | |
| "num_tokens": 95965587.0, | |
| "step": 280 | |
| }, | |
| { | |
| "entropy": 0.44096320470174155, | |
| "epoch": 0.2784, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.447, | |
| "mean_token_accuracy": 0.8391348044077556, | |
| "num_tokens": 99394676.0, | |
| "step": 290 | |
| }, | |
| { | |
| "entropy": 0.4383016347885132, | |
| "epoch": 0.288, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4447, | |
| "mean_token_accuracy": 0.840146021048228, | |
| "num_tokens": 102821016.0, | |
| "step": 300 | |
| }, | |
| { | |
| "entropy": 0.4389273832241694, | |
| "epoch": 0.2976, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4452, | |
| "mean_token_accuracy": 0.8401629229386648, | |
| "num_tokens": 106250675.0, | |
| "step": 310 | |
| }, | |
| { | |
| "entropy": 0.43793109953403475, | |
| "epoch": 0.3072, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4423, | |
| "mean_token_accuracy": 0.8408861041069031, | |
| "num_tokens": 109676233.0, | |
| "step": 320 | |
| }, | |
| { | |
| "entropy": 0.4382920225461324, | |
| "epoch": 0.3168, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4438, | |
| "mean_token_accuracy": 0.840533846616745, | |
| "num_tokens": 113104836.0, | |
| "step": 330 | |
| }, | |
| { | |
| "entropy": 0.4332722157239914, | |
| "epoch": 0.3264, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4384, | |
| "mean_token_accuracy": 0.8416322549184163, | |
| "num_tokens": 116528992.0, | |
| "step": 340 | |
| }, | |
| { | |
| "entropy": 0.43754682640234627, | |
| "epoch": 0.336, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.443, | |
| "mean_token_accuracy": 0.8403245389461518, | |
| "num_tokens": 119955123.0, | |
| "step": 350 | |
| }, | |
| { | |
| "entropy": 0.42634722888469695, | |
| "epoch": 0.3456, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4304, | |
| "mean_token_accuracy": 0.844380776087443, | |
| "num_tokens": 123374023.0, | |
| "step": 360 | |
| }, | |
| { | |
| "entropy": 0.43683901329835256, | |
| "epoch": 0.3552, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4426, | |
| "mean_token_accuracy": 0.8407382468382517, | |
| "num_tokens": 126801597.0, | |
| "step": 370 | |
| }, | |
| { | |
| "entropy": 0.4341103653113047, | |
| "epoch": 0.3648, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4394, | |
| "mean_token_accuracy": 0.8416337351004283, | |
| "num_tokens": 130226608.0, | |
| "step": 380 | |
| }, | |
| { | |
| "entropy": 0.43337511718273164, | |
| "epoch": 0.3744, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4371, | |
| "mean_token_accuracy": 0.8418285946051279, | |
| "num_tokens": 133650147.0, | |
| "step": 390 | |
| }, | |
| { | |
| "entropy": 0.43345692853132883, | |
| "epoch": 0.384, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4371, | |
| "mean_token_accuracy": 0.8420754154523213, | |
| "num_tokens": 137072559.0, | |
| "step": 400 | |
| }, | |
| { | |
| "entropy": 0.43710677921772, | |
| "epoch": 0.3936, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4418, | |
| "mean_token_accuracy": 0.8403918604056041, | |
| "num_tokens": 140502777.0, | |
| "step": 410 | |
| }, | |
| { | |
| "entropy": 0.4345085640748342, | |
| "epoch": 0.4032, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4382, | |
| "mean_token_accuracy": 0.8418921589851379, | |
| "num_tokens": 143932092.0, | |
| "step": 420 | |
| }, | |
| { | |
| "entropy": 0.43460349341233573, | |
| "epoch": 0.4128, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4391, | |
| "mean_token_accuracy": 0.8413500209649404, | |
| "num_tokens": 147358021.0, | |
| "step": 430 | |
| }, | |
| { | |
| "entropy": 0.4344378610452016, | |
| "epoch": 0.4224, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4384, | |
| "mean_token_accuracy": 0.8416643917560578, | |
| "num_tokens": 150785208.0, | |
| "step": 440 | |
| }, | |
| { | |
| "entropy": 0.43424378136793773, | |
| "epoch": 0.432, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4378, | |
| "mean_token_accuracy": 0.8417747735977172, | |
| "num_tokens": 154214667.0, | |
| "step": 450 | |
| }, | |
| { | |
| "entropy": 0.43412678241729735, | |
| "epoch": 0.4416, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.439, | |
| "mean_token_accuracy": 0.8415433506170908, | |
| "num_tokens": 157644805.0, | |
| "step": 460 | |
| }, | |
| { | |
| "entropy": 0.4340047796567281, | |
| "epoch": 0.4512, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4372, | |
| "mean_token_accuracy": 0.8419170657793681, | |
| "num_tokens": 161074326.0, | |
| "step": 470 | |
| }, | |
| { | |
| "entropy": 0.4271635631720225, | |
| "epoch": 0.4608, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4295, | |
| "mean_token_accuracy": 0.8433916787306468, | |
| "num_tokens": 164493009.0, | |
| "step": 480 | |
| }, | |
| { | |
| "entropy": 0.4347446064154307, | |
| "epoch": 0.4704, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4385, | |
| "mean_token_accuracy": 0.8410797854264577, | |
| "num_tokens": 167922637.0, | |
| "step": 490 | |
| }, | |
| { | |
| "entropy": 0.43026507596174873, | |
| "epoch": 0.48, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.433, | |
| "mean_token_accuracy": 0.8427020668983459, | |
| "num_tokens": 171349238.0, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1042, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.381015024450142e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |