Instructions to use mjf-su/AutoVLA with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use mjf-su/AutoVLA with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="mjf-su/AutoVLA") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("mjf-su/AutoVLA") model = AutoModelForMultimodalLM.from_pretrained("mjf-su/AutoVLA") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use mjf-su/AutoVLA with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "mjf-su/AutoVLA" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/AutoVLA", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/mjf-su/AutoVLA
- SGLang
How to use mjf-su/AutoVLA with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "mjf-su/AutoVLA" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/AutoVLA", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "mjf-su/AutoVLA" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/AutoVLA", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use mjf-su/AutoVLA with Docker Model Runner:
docker model run hf.co/mjf-su/AutoVLA
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.96, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 0.43850863675276436, | |
| "epoch": 0.0096, | |
| "grad_norm": 4.34375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.5373, | |
| "mean_token_accuracy": 0.8229872425397237, | |
| "num_tokens": 3432459.0, | |
| "step": 10 | |
| }, | |
| { | |
| "entropy": 0.43693508704503375, | |
| "epoch": 0.0192, | |
| "grad_norm": 2.8125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.506, | |
| "mean_token_accuracy": 0.8290777782599131, | |
| "num_tokens": 6859685.0, | |
| "step": 20 | |
| }, | |
| { | |
| "entropy": 0.44068048397699994, | |
| "epoch": 0.0288, | |
| "grad_norm": 2.421875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4912, | |
| "mean_token_accuracy": 0.8306132018566131, | |
| "num_tokens": 10280810.0, | |
| "step": 30 | |
| }, | |
| { | |
| "entropy": 0.4448391616344452, | |
| "epoch": 0.0384, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4852, | |
| "mean_token_accuracy": 0.8322072466214497, | |
| "num_tokens": 13708851.0, | |
| "step": 40 | |
| }, | |
| { | |
| "entropy": 0.4470527251561483, | |
| "epoch": 0.048, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4798, | |
| "mean_token_accuracy": 0.8338870048522949, | |
| "num_tokens": 17136948.0, | |
| "step": 50 | |
| }, | |
| { | |
| "entropy": 0.44311814606189726, | |
| "epoch": 0.0576, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4701, | |
| "mean_token_accuracy": 0.8359324594338735, | |
| "num_tokens": 20560658.0, | |
| "step": 60 | |
| }, | |
| { | |
| "entropy": 0.4470181296269099, | |
| "epoch": 0.0672, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4702, | |
| "mean_token_accuracy": 0.8356486141681672, | |
| "num_tokens": 23987248.0, | |
| "step": 70 | |
| }, | |
| { | |
| "entropy": 0.449398942788442, | |
| "epoch": 0.0768, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.47, | |
| "mean_token_accuracy": 0.8356277287006378, | |
| "num_tokens": 27418078.0, | |
| "step": 80 | |
| }, | |
| { | |
| "entropy": 0.4466124544541041, | |
| "epoch": 0.0864, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4647, | |
| "mean_token_accuracy": 0.8362693071365357, | |
| "num_tokens": 30842206.0, | |
| "step": 90 | |
| }, | |
| { | |
| "entropy": 0.44751456181208293, | |
| "epoch": 0.096, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.465, | |
| "mean_token_accuracy": 0.8364668329556783, | |
| "num_tokens": 34270915.0, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 0.44619213143984476, | |
| "epoch": 0.1056, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.462, | |
| "mean_token_accuracy": 0.8371777753035228, | |
| "num_tokens": 37699757.0, | |
| "step": 110 | |
| }, | |
| { | |
| "entropy": 0.4475706567366918, | |
| "epoch": 0.1152, | |
| "grad_norm": 1.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4618, | |
| "mean_token_accuracy": 0.8366606632868449, | |
| "num_tokens": 41127680.0, | |
| "step": 120 | |
| }, | |
| { | |
| "entropy": 0.44157906572024025, | |
| "epoch": 0.1248, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4539, | |
| "mean_token_accuracy": 0.8387815574804942, | |
| "num_tokens": 44550205.0, | |
| "step": 130 | |
| }, | |
| { | |
| "entropy": 0.44636616806189217, | |
| "epoch": 0.1344, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4597, | |
| "mean_token_accuracy": 0.8369020839532216, | |
| "num_tokens": 47978530.0, | |
| "step": 140 | |
| }, | |
| { | |
| "entropy": 0.45083456734816235, | |
| "epoch": 0.144, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4615, | |
| "mean_token_accuracy": 0.8364426136016846, | |
| "num_tokens": 51415497.0, | |
| "step": 150 | |
| }, | |
| { | |
| "entropy": 0.4423963377873103, | |
| "epoch": 0.1536, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4531, | |
| "mean_token_accuracy": 0.8387805402278901, | |
| "num_tokens": 54843112.0, | |
| "step": 160 | |
| }, | |
| { | |
| "entropy": 0.44047041336695353, | |
| "epoch": 0.1632, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4509, | |
| "mean_token_accuracy": 0.8391756375630697, | |
| "num_tokens": 58269937.0, | |
| "step": 170 | |
| }, | |
| { | |
| "entropy": 0.44535795946915946, | |
| "epoch": 0.1728, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4549, | |
| "mean_token_accuracy": 0.8376253386338551, | |
| "num_tokens": 61698122.0, | |
| "step": 180 | |
| }, | |
| { | |
| "entropy": 0.4441406190395355, | |
| "epoch": 0.1824, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4523, | |
| "mean_token_accuracy": 0.8387903730074565, | |
| "num_tokens": 65129853.0, | |
| "step": 190 | |
| }, | |
| { | |
| "entropy": 0.4387974033753077, | |
| "epoch": 0.192, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4475, | |
| "mean_token_accuracy": 0.8403141776720683, | |
| "num_tokens": 68554676.0, | |
| "step": 200 | |
| }, | |
| { | |
| "entropy": 0.43539145588874817, | |
| "epoch": 0.2016, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4451, | |
| "mean_token_accuracy": 0.8406339287757874, | |
| "num_tokens": 71978060.0, | |
| "step": 210 | |
| }, | |
| { | |
| "entropy": 0.4371789425611496, | |
| "epoch": 0.2112, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4457, | |
| "mean_token_accuracy": 0.8400953491528829, | |
| "num_tokens": 75401498.0, | |
| "step": 220 | |
| }, | |
| { | |
| "entropy": 0.4436704327662786, | |
| "epoch": 0.2208, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4512, | |
| "mean_token_accuracy": 0.8383757730325063, | |
| "num_tokens": 78829143.0, | |
| "step": 230 | |
| }, | |
| { | |
| "entropy": 0.43721583088239035, | |
| "epoch": 0.2304, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4437, | |
| "mean_token_accuracy": 0.8410045862197876, | |
| "num_tokens": 82255415.0, | |
| "step": 240 | |
| }, | |
| { | |
| "entropy": 0.43779849211374916, | |
| "epoch": 0.24, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4449, | |
| "mean_token_accuracy": 0.840403014421463, | |
| "num_tokens": 85678092.0, | |
| "step": 250 | |
| }, | |
| { | |
| "entropy": 0.4423576871554057, | |
| "epoch": 0.2496, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4496, | |
| "mean_token_accuracy": 0.8392611801624298, | |
| "num_tokens": 89108181.0, | |
| "step": 260 | |
| }, | |
| { | |
| "entropy": 0.44207868178685505, | |
| "epoch": 0.2592, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.449, | |
| "mean_token_accuracy": 0.8386734426021576, | |
| "num_tokens": 92534098.0, | |
| "step": 270 | |
| }, | |
| { | |
| "entropy": 0.43932537039120995, | |
| "epoch": 0.2688, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4453, | |
| "mean_token_accuracy": 0.8403779149055481, | |
| "num_tokens": 95965587.0, | |
| "step": 280 | |
| }, | |
| { | |
| "entropy": 0.44096320470174155, | |
| "epoch": 0.2784, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.447, | |
| "mean_token_accuracy": 0.8391348044077556, | |
| "num_tokens": 99394676.0, | |
| "step": 290 | |
| }, | |
| { | |
| "entropy": 0.4383016347885132, | |
| "epoch": 0.288, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4447, | |
| "mean_token_accuracy": 0.840146021048228, | |
| "num_tokens": 102821016.0, | |
| "step": 300 | |
| }, | |
| { | |
| "entropy": 0.4389273832241694, | |
| "epoch": 0.2976, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4452, | |
| "mean_token_accuracy": 0.8401629229386648, | |
| "num_tokens": 106250675.0, | |
| "step": 310 | |
| }, | |
| { | |
| "entropy": 0.43793109953403475, | |
| "epoch": 0.3072, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4423, | |
| "mean_token_accuracy": 0.8408861041069031, | |
| "num_tokens": 109676233.0, | |
| "step": 320 | |
| }, | |
| { | |
| "entropy": 0.4382920225461324, | |
| "epoch": 0.3168, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4438, | |
| "mean_token_accuracy": 0.840533846616745, | |
| "num_tokens": 113104836.0, | |
| "step": 330 | |
| }, | |
| { | |
| "entropy": 0.4332722157239914, | |
| "epoch": 0.3264, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4384, | |
| "mean_token_accuracy": 0.8416322549184163, | |
| "num_tokens": 116528992.0, | |
| "step": 340 | |
| }, | |
| { | |
| "entropy": 0.43754682640234627, | |
| "epoch": 0.336, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.443, | |
| "mean_token_accuracy": 0.8403245389461518, | |
| "num_tokens": 119955123.0, | |
| "step": 350 | |
| }, | |
| { | |
| "entropy": 0.42634722888469695, | |
| "epoch": 0.3456, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4304, | |
| "mean_token_accuracy": 0.844380776087443, | |
| "num_tokens": 123374023.0, | |
| "step": 360 | |
| }, | |
| { | |
| "entropy": 0.43683901329835256, | |
| "epoch": 0.3552, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4426, | |
| "mean_token_accuracy": 0.8407382468382517, | |
| "num_tokens": 126801597.0, | |
| "step": 370 | |
| }, | |
| { | |
| "entropy": 0.4341103653113047, | |
| "epoch": 0.3648, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4394, | |
| "mean_token_accuracy": 0.8416337351004283, | |
| "num_tokens": 130226608.0, | |
| "step": 380 | |
| }, | |
| { | |
| "entropy": 0.43337511718273164, | |
| "epoch": 0.3744, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4371, | |
| "mean_token_accuracy": 0.8418285946051279, | |
| "num_tokens": 133650147.0, | |
| "step": 390 | |
| }, | |
| { | |
| "entropy": 0.43345692853132883, | |
| "epoch": 0.384, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4371, | |
| "mean_token_accuracy": 0.8420754154523213, | |
| "num_tokens": 137072559.0, | |
| "step": 400 | |
| }, | |
| { | |
| "entropy": 0.43710677921772, | |
| "epoch": 0.3936, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4418, | |
| "mean_token_accuracy": 0.8403918604056041, | |
| "num_tokens": 140502777.0, | |
| "step": 410 | |
| }, | |
| { | |
| "entropy": 0.4345085640748342, | |
| "epoch": 0.4032, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4382, | |
| "mean_token_accuracy": 0.8418921589851379, | |
| "num_tokens": 143932092.0, | |
| "step": 420 | |
| }, | |
| { | |
| "entropy": 0.43460349341233573, | |
| "epoch": 0.4128, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4391, | |
| "mean_token_accuracy": 0.8413500209649404, | |
| "num_tokens": 147358021.0, | |
| "step": 430 | |
| }, | |
| { | |
| "entropy": 0.4344378610452016, | |
| "epoch": 0.4224, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4384, | |
| "mean_token_accuracy": 0.8416643917560578, | |
| "num_tokens": 150785208.0, | |
| "step": 440 | |
| }, | |
| { | |
| "entropy": 0.43424378136793773, | |
| "epoch": 0.432, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4378, | |
| "mean_token_accuracy": 0.8417747735977172, | |
| "num_tokens": 154214667.0, | |
| "step": 450 | |
| }, | |
| { | |
| "entropy": 0.43412678241729735, | |
| "epoch": 0.4416, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.439, | |
| "mean_token_accuracy": 0.8415433506170908, | |
| "num_tokens": 157644805.0, | |
| "step": 460 | |
| }, | |
| { | |
| "entropy": 0.4340047796567281, | |
| "epoch": 0.4512, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4372, | |
| "mean_token_accuracy": 0.8419170657793681, | |
| "num_tokens": 161074326.0, | |
| "step": 470 | |
| }, | |
| { | |
| "entropy": 0.4271635631720225, | |
| "epoch": 0.4608, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4295, | |
| "mean_token_accuracy": 0.8433916787306468, | |
| "num_tokens": 164493009.0, | |
| "step": 480 | |
| }, | |
| { | |
| "entropy": 0.4347446064154307, | |
| "epoch": 0.4704, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4385, | |
| "mean_token_accuracy": 0.8410797854264577, | |
| "num_tokens": 167922637.0, | |
| "step": 490 | |
| }, | |
| { | |
| "entropy": 0.43026507596174873, | |
| "epoch": 0.48, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.433, | |
| "mean_token_accuracy": 0.8427020668983459, | |
| "num_tokens": 171349238.0, | |
| "step": 500 | |
| }, | |
| { | |
| "entropy": 0.43025335570176443, | |
| "epoch": 0.4896, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4332, | |
| "mean_token_accuracy": 0.8427554865678152, | |
| "num_tokens": 174774665.0, | |
| "step": 510 | |
| }, | |
| { | |
| "entropy": 0.43068666458129884, | |
| "epoch": 0.4992, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4343, | |
| "mean_token_accuracy": 0.8422266582647959, | |
| "num_tokens": 178199588.0, | |
| "step": 520 | |
| }, | |
| { | |
| "entropy": 0.4314758092164993, | |
| "epoch": 0.5088, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4341, | |
| "mean_token_accuracy": 0.8423931996027628, | |
| "num_tokens": 181622373.0, | |
| "step": 530 | |
| }, | |
| { | |
| "entropy": 0.43214026689529417, | |
| "epoch": 0.5184, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4352, | |
| "mean_token_accuracy": 0.8417826076348622, | |
| "num_tokens": 185046141.0, | |
| "step": 540 | |
| }, | |
| { | |
| "entropy": 0.42929191191991173, | |
| "epoch": 0.528, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4315, | |
| "mean_token_accuracy": 0.8429112037022909, | |
| "num_tokens": 188471598.0, | |
| "step": 550 | |
| }, | |
| { | |
| "entropy": 0.43347863058249153, | |
| "epoch": 0.5376, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4362, | |
| "mean_token_accuracy": 0.8421526908874511, | |
| "num_tokens": 191901663.0, | |
| "step": 560 | |
| }, | |
| { | |
| "entropy": 0.4340634206930796, | |
| "epoch": 0.5472, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4385, | |
| "mean_token_accuracy": 0.8407556653022766, | |
| "num_tokens": 195330705.0, | |
| "step": 570 | |
| }, | |
| { | |
| "entropy": 0.4317493091026942, | |
| "epoch": 0.5568, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4341, | |
| "mean_token_accuracy": 0.8426395455996195, | |
| "num_tokens": 198759569.0, | |
| "step": 580 | |
| }, | |
| { | |
| "entropy": 0.4264296998580297, | |
| "epoch": 0.5664, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4288, | |
| "mean_token_accuracy": 0.843881368637085, | |
| "num_tokens": 202180759.0, | |
| "step": 590 | |
| }, | |
| { | |
| "entropy": 0.42934685150782265, | |
| "epoch": 0.576, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4319, | |
| "mean_token_accuracy": 0.8431523183981577, | |
| "num_tokens": 205609433.0, | |
| "step": 600 | |
| }, | |
| { | |
| "entropy": 0.43238858282566073, | |
| "epoch": 0.5856, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4351, | |
| "mean_token_accuracy": 0.8418097396691641, | |
| "num_tokens": 209040734.0, | |
| "step": 610 | |
| }, | |
| { | |
| "entropy": 0.4318026860555013, | |
| "epoch": 0.5952, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4355, | |
| "mean_token_accuracy": 0.8424915373325348, | |
| "num_tokens": 212473401.0, | |
| "step": 620 | |
| }, | |
| { | |
| "entropy": 0.4235608865817388, | |
| "epoch": 0.6048, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4256, | |
| "mean_token_accuracy": 0.8451377809047699, | |
| "num_tokens": 215896965.0, | |
| "step": 630 | |
| }, | |
| { | |
| "entropy": 0.42819432020187376, | |
| "epoch": 0.6144, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4307, | |
| "mean_token_accuracy": 0.8432986001173656, | |
| "num_tokens": 219323070.0, | |
| "step": 640 | |
| }, | |
| { | |
| "entropy": 0.43294126689434054, | |
| "epoch": 0.624, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4357, | |
| "mean_token_accuracy": 0.8416835029919942, | |
| "num_tokens": 222753922.0, | |
| "step": 650 | |
| }, | |
| { | |
| "entropy": 0.42949473758538564, | |
| "epoch": 0.6336, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4329, | |
| "mean_token_accuracy": 0.8425247291723887, | |
| "num_tokens": 226181564.0, | |
| "step": 660 | |
| }, | |
| { | |
| "entropy": 0.431626628835996, | |
| "epoch": 0.6432, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4342, | |
| "mean_token_accuracy": 0.8420288483301799, | |
| "num_tokens": 229609454.0, | |
| "step": 670 | |
| }, | |
| { | |
| "entropy": 0.42529774804910025, | |
| "epoch": 0.6528, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4283, | |
| "mean_token_accuracy": 0.8438542902469635, | |
| "num_tokens": 233030434.0, | |
| "step": 680 | |
| }, | |
| { | |
| "entropy": 0.4295430819193522, | |
| "epoch": 0.6624, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4319, | |
| "mean_token_accuracy": 0.8431335310141246, | |
| "num_tokens": 236458053.0, | |
| "step": 690 | |
| }, | |
| { | |
| "entropy": 0.4359436571598053, | |
| "epoch": 0.672, | |
| "grad_norm": 0.875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4387, | |
| "mean_token_accuracy": 0.8408014853795369, | |
| "num_tokens": 239892329.0, | |
| "step": 700 | |
| }, | |
| { | |
| "entropy": 0.43046695590019224, | |
| "epoch": 0.6816, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4329, | |
| "mean_token_accuracy": 0.8423032621542613, | |
| "num_tokens": 243321186.0, | |
| "step": 710 | |
| }, | |
| { | |
| "entropy": 0.427196944753329, | |
| "epoch": 0.6912, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4296, | |
| "mean_token_accuracy": 0.8436582644780477, | |
| "num_tokens": 246748277.0, | |
| "step": 720 | |
| }, | |
| { | |
| "entropy": 0.4301902174949646, | |
| "epoch": 0.7008, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4319, | |
| "mean_token_accuracy": 0.8430530607700348, | |
| "num_tokens": 250176097.0, | |
| "step": 730 | |
| }, | |
| { | |
| "entropy": 0.42852813402811685, | |
| "epoch": 0.7104, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4308, | |
| "mean_token_accuracy": 0.8430961946646373, | |
| "num_tokens": 253603786.0, | |
| "step": 740 | |
| }, | |
| { | |
| "entropy": 0.4267232229312261, | |
| "epoch": 0.72, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4294, | |
| "mean_token_accuracy": 0.8436786532402039, | |
| "num_tokens": 257030085.0, | |
| "step": 750 | |
| }, | |
| { | |
| "entropy": 0.42847318251927696, | |
| "epoch": 0.7296, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4313, | |
| "mean_token_accuracy": 0.8432875255743663, | |
| "num_tokens": 260459662.0, | |
| "step": 760 | |
| }, | |
| { | |
| "entropy": 0.42785159647464754, | |
| "epoch": 0.7392, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4296, | |
| "mean_token_accuracy": 0.8437132080396016, | |
| "num_tokens": 263886799.0, | |
| "step": 770 | |
| }, | |
| { | |
| "entropy": 0.42436840136845905, | |
| "epoch": 0.7488, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4271, | |
| "mean_token_accuracy": 0.8438882827758789, | |
| "num_tokens": 267311788.0, | |
| "step": 780 | |
| }, | |
| { | |
| "entropy": 0.42350135842959086, | |
| "epoch": 0.7584, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4244, | |
| "mean_token_accuracy": 0.8450363477071127, | |
| "num_tokens": 270733591.0, | |
| "step": 790 | |
| }, | |
| { | |
| "entropy": 0.42643423279126486, | |
| "epoch": 0.768, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4289, | |
| "mean_token_accuracy": 0.8439076920350392, | |
| "num_tokens": 274158074.0, | |
| "step": 800 | |
| }, | |
| { | |
| "entropy": 0.4293264577786128, | |
| "epoch": 0.7776, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4324, | |
| "mean_token_accuracy": 0.8420809169610342, | |
| "num_tokens": 277581909.0, | |
| "step": 810 | |
| }, | |
| { | |
| "entropy": 0.42917039692401887, | |
| "epoch": 0.7872, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4308, | |
| "mean_token_accuracy": 0.842545215288798, | |
| "num_tokens": 281007702.0, | |
| "step": 820 | |
| }, | |
| { | |
| "entropy": 0.4337611397107442, | |
| "epoch": 0.7968, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4356, | |
| "mean_token_accuracy": 0.8417014559110005, | |
| "num_tokens": 284442105.0, | |
| "step": 830 | |
| }, | |
| { | |
| "entropy": 0.42585750023523966, | |
| "epoch": 0.8064, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4281, | |
| "mean_token_accuracy": 0.8437936822573344, | |
| "num_tokens": 287868793.0, | |
| "step": 840 | |
| }, | |
| { | |
| "entropy": 0.4264136056105296, | |
| "epoch": 0.816, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4291, | |
| "mean_token_accuracy": 0.8439127763112386, | |
| "num_tokens": 291295752.0, | |
| "step": 850 | |
| }, | |
| { | |
| "entropy": 0.4276336113611857, | |
| "epoch": 0.8256, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4294, | |
| "mean_token_accuracy": 0.8433744112650553, | |
| "num_tokens": 294724526.0, | |
| "step": 860 | |
| }, | |
| { | |
| "entropy": 0.4295493682225545, | |
| "epoch": 0.8352, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4314, | |
| "mean_token_accuracy": 0.8426620582739512, | |
| "num_tokens": 298153923.0, | |
| "step": 870 | |
| }, | |
| { | |
| "entropy": 0.42792819142341615, | |
| "epoch": 0.8448, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4304, | |
| "mean_token_accuracy": 0.8432036856810252, | |
| "num_tokens": 301584253.0, | |
| "step": 880 | |
| }, | |
| { | |
| "entropy": 0.42749512096246084, | |
| "epoch": 0.8544, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4299, | |
| "mean_token_accuracy": 0.8432926038901011, | |
| "num_tokens": 305013666.0, | |
| "step": 890 | |
| }, | |
| { | |
| "entropy": 0.4297857642173767, | |
| "epoch": 0.864, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4318, | |
| "mean_token_accuracy": 0.8426412324110667, | |
| "num_tokens": 308440989.0, | |
| "step": 900 | |
| }, | |
| { | |
| "entropy": 0.42787257631619774, | |
| "epoch": 0.8736, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4303, | |
| "mean_token_accuracy": 0.8433160742123922, | |
| "num_tokens": 311870076.0, | |
| "step": 910 | |
| }, | |
| { | |
| "entropy": 0.42855414350827536, | |
| "epoch": 0.8832, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4303, | |
| "mean_token_accuracy": 0.8432195166746775, | |
| "num_tokens": 315296851.0, | |
| "step": 920 | |
| }, | |
| { | |
| "entropy": 0.42333943645159405, | |
| "epoch": 0.8928, | |
| "grad_norm": 0.625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4242, | |
| "mean_token_accuracy": 0.8448341071605683, | |
| "num_tokens": 318720357.0, | |
| "step": 930 | |
| }, | |
| { | |
| "entropy": 0.4254674275716146, | |
| "epoch": 0.9024, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4272, | |
| "mean_token_accuracy": 0.844523819287618, | |
| "num_tokens": 322150685.0, | |
| "step": 940 | |
| }, | |
| { | |
| "entropy": 0.4280929406483968, | |
| "epoch": 0.912, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4305, | |
| "mean_token_accuracy": 0.8432438095410665, | |
| "num_tokens": 325578800.0, | |
| "step": 950 | |
| }, | |
| { | |
| "entropy": 0.4216255853573481, | |
| "epoch": 0.9216, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4233, | |
| "mean_token_accuracy": 0.8448065340518951, | |
| "num_tokens": 328999744.0, | |
| "step": 960 | |
| }, | |
| { | |
| "entropy": 0.42576794425646464, | |
| "epoch": 0.9312, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4274, | |
| "mean_token_accuracy": 0.8436802566051483, | |
| "num_tokens": 332424272.0, | |
| "step": 970 | |
| }, | |
| { | |
| "entropy": 0.41938706636428835, | |
| "epoch": 0.9408, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4206, | |
| "mean_token_accuracy": 0.8458850880463918, | |
| "num_tokens": 335842047.0, | |
| "step": 980 | |
| }, | |
| { | |
| "entropy": 0.4206093430519104, | |
| "epoch": 0.9504, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4237, | |
| "mean_token_accuracy": 0.8452507853507996, | |
| "num_tokens": 339266824.0, | |
| "step": 990 | |
| }, | |
| { | |
| "entropy": 0.42829200327396394, | |
| "epoch": 0.96, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4304, | |
| "mean_token_accuracy": 0.842687439918518, | |
| "num_tokens": 342692717.0, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1042, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.762403376047587e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |