Image-Text-to-Text
Transformers
Safetensors
qwen3_vl
qwen3-vl
video-language-model
egocentric-video
ms-swift
sft
conversational
Instructions to use egotools-dev/egotools-8b-v3_3 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use egotools-dev/egotools-8b-v3_3 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="egotools-dev/egotools-8b-v3_3") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("egotools-dev/egotools-8b-v3_3") model = AutoModelForImageTextToText.from_pretrained("egotools-dev/egotools-8b-v3_3") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use egotools-dev/egotools-8b-v3_3 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "egotools-dev/egotools-8b-v3_3" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "egotools-dev/egotools-8b-v3_3", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/egotools-dev/egotools-8b-v3_3
- SGLang
How to use egotools-dev/egotools-8b-v3_3 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "egotools-dev/egotools-8b-v3_3" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "egotools-dev/egotools-8b-v3_3", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "egotools-dev/egotools-8b-v3_3" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "egotools-dev/egotools-8b-v3_3", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use egotools-dev/egotools-8b-v3_3 with Docker Model Runner:
docker model run hf.co/egotools-dev/egotools-8b-v3_3
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6618863761720905, | |
| "eval_steps": 300.0, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0011031439602868175, | |
| "grad_norm": 80.42967325823248, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.6655548810958862, | |
| "step": 1, | |
| "token_acc": 0.6208588957055214 | |
| }, | |
| { | |
| "epoch": 0.005515719801434087, | |
| "grad_norm": 12.936447236966638, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.6539018154144287, | |
| "step": 5, | |
| "token_acc": 0.6644312612844984 | |
| }, | |
| { | |
| "epoch": 0.011031439602868174, | |
| "grad_norm": 7.485145348211075, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.1399970054626465, | |
| "step": 10, | |
| "token_acc": 0.716547901821061 | |
| }, | |
| { | |
| "epoch": 0.01654715940430226, | |
| "grad_norm": 7.10761756326815, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.1828346252441406, | |
| "step": 15, | |
| "token_acc": 0.6928796755295178 | |
| }, | |
| { | |
| "epoch": 0.02206287920573635, | |
| "grad_norm": 5.659684098393193, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.0604573249816895, | |
| "step": 20, | |
| "token_acc": 0.7371653156472611 | |
| }, | |
| { | |
| "epoch": 0.027578599007170437, | |
| "grad_norm": 6.432203040875114, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.0374173164367675, | |
| "step": 25, | |
| "token_acc": 0.7369089984748348 | |
| }, | |
| { | |
| "epoch": 0.03309431880860452, | |
| "grad_norm": 6.0637637354614125, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9596109390258789, | |
| "step": 30, | |
| "token_acc": 0.7449127906976745 | |
| }, | |
| { | |
| "epoch": 0.03861003861003861, | |
| "grad_norm": 5.9792048911606335, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9912214279174805, | |
| "step": 35, | |
| "token_acc": 0.7413617886178862 | |
| }, | |
| { | |
| "epoch": 0.0441257584114727, | |
| "grad_norm": 6.031366828095079, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.0473726272583008, | |
| "step": 40, | |
| "token_acc": 0.727144535840188 | |
| }, | |
| { | |
| "epoch": 0.049641478212906785, | |
| "grad_norm": 6.186527950833231, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9992570877075195, | |
| "step": 45, | |
| "token_acc": 0.7360217714002969 | |
| }, | |
| { | |
| "epoch": 0.05515719801434087, | |
| "grad_norm": 5.923085345386395, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.002680206298828, | |
| "step": 50, | |
| "token_acc": 0.7391618497109826 | |
| }, | |
| { | |
| "epoch": 0.06067291781577496, | |
| "grad_norm": 4.9260097226563255, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8863202095031738, | |
| "step": 55, | |
| "token_acc": 0.7527737578388809 | |
| }, | |
| { | |
| "epoch": 0.06618863761720904, | |
| "grad_norm": 5.560254131314214, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9248697280883789, | |
| "step": 60, | |
| "token_acc": 0.7579394848712178 | |
| }, | |
| { | |
| "epoch": 0.07170435741864313, | |
| "grad_norm": 5.180260599645047, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.004658317565918, | |
| "step": 65, | |
| "token_acc": 0.7360208062418726 | |
| }, | |
| { | |
| "epoch": 0.07722007722007722, | |
| "grad_norm": 6.299333554712354, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9439926147460938, | |
| "step": 70, | |
| "token_acc": 0.751812046988253 | |
| }, | |
| { | |
| "epoch": 0.0827357970215113, | |
| "grad_norm": 5.622631000909693, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8676441192626954, | |
| "step": 75, | |
| "token_acc": 0.760814889336016 | |
| }, | |
| { | |
| "epoch": 0.0882515168229454, | |
| "grad_norm": 5.613397002114218, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8931824684143066, | |
| "step": 80, | |
| "token_acc": 0.7616038882138517 | |
| }, | |
| { | |
| "epoch": 0.09376723662437948, | |
| "grad_norm": 5.897499862699952, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.0003661155700683, | |
| "step": 85, | |
| "token_acc": 0.7361477572559367 | |
| }, | |
| { | |
| "epoch": 0.09928295642581357, | |
| "grad_norm": 5.838989842949948, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8919829368591309, | |
| "step": 90, | |
| "token_acc": 0.7569311663479924 | |
| }, | |
| { | |
| "epoch": 0.10479867622724766, | |
| "grad_norm": 5.732772113211347, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9857375144958496, | |
| "step": 95, | |
| "token_acc": 0.7319040543409042 | |
| }, | |
| { | |
| "epoch": 0.11031439602868175, | |
| "grad_norm": 5.887155914575355, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9643180847167969, | |
| "step": 100, | |
| "token_acc": 0.7468728678644531 | |
| }, | |
| { | |
| "epoch": 0.11583011583011583, | |
| "grad_norm": 5.409153898947996, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9775169372558594, | |
| "step": 105, | |
| "token_acc": 0.7443522360534809 | |
| }, | |
| { | |
| "epoch": 0.12134583563154992, | |
| "grad_norm": 5.738347427048291, | |
| "learning_rate": 2.3e-06, | |
| "loss": 1.0265106201171874, | |
| "step": 110, | |
| "token_acc": 0.7265641025641025 | |
| }, | |
| { | |
| "epoch": 0.126861555432984, | |
| "grad_norm": 5.38681474950728, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9541057586669922, | |
| "step": 115, | |
| "token_acc": 0.747302805082714 | |
| }, | |
| { | |
| "epoch": 0.13237727523441808, | |
| "grad_norm": 5.856303394018206, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8228609085083007, | |
| "step": 120, | |
| "token_acc": 0.7779299014238773 | |
| }, | |
| { | |
| "epoch": 0.13789299503585217, | |
| "grad_norm": 6.568516319919617, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9721288681030273, | |
| "step": 125, | |
| "token_acc": 0.7395016151361329 | |
| }, | |
| { | |
| "epoch": 0.14340871483728626, | |
| "grad_norm": 5.54045872938136, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9882600784301758, | |
| "step": 130, | |
| "token_acc": 0.7350409836065573 | |
| }, | |
| { | |
| "epoch": 0.14892443463872035, | |
| "grad_norm": 5.612320181227532, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9342703819274902, | |
| "step": 135, | |
| "token_acc": 0.7464104263309035 | |
| }, | |
| { | |
| "epoch": 0.15444015444015444, | |
| "grad_norm": 5.925987985108847, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9266027450561524, | |
| "step": 140, | |
| "token_acc": 0.7487753673897831 | |
| }, | |
| { | |
| "epoch": 0.15995587424158852, | |
| "grad_norm": 5.59653045493395, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8937458038330078, | |
| "step": 145, | |
| "token_acc": 0.7624970664163342 | |
| }, | |
| { | |
| "epoch": 0.1654715940430226, | |
| "grad_norm": 5.374039667260155, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8537099838256836, | |
| "step": 150, | |
| "token_acc": 0.7640309304065852 | |
| }, | |
| { | |
| "epoch": 0.1709873138444567, | |
| "grad_norm": 5.376937826461383, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8238465309143066, | |
| "step": 155, | |
| "token_acc": 0.7761599210266535 | |
| }, | |
| { | |
| "epoch": 0.1765030336458908, | |
| "grad_norm": 6.0245180163205285, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8189101219177246, | |
| "step": 160, | |
| "token_acc": 0.7774302840761012 | |
| }, | |
| { | |
| "epoch": 0.18201875344732488, | |
| "grad_norm": 5.571460118689288, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8788368225097656, | |
| "step": 165, | |
| "token_acc": 0.760459995106435 | |
| }, | |
| { | |
| "epoch": 0.18753447324875896, | |
| "grad_norm": 5.088793082374291, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8897226333618165, | |
| "step": 170, | |
| "token_acc": 0.7589285714285714 | |
| }, | |
| { | |
| "epoch": 0.19305019305019305, | |
| "grad_norm": 5.578076711526945, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8561611175537109, | |
| "step": 175, | |
| "token_acc": 0.7607285429141717 | |
| }, | |
| { | |
| "epoch": 0.19856591285162714, | |
| "grad_norm": 5.81713562547314, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8790461540222168, | |
| "step": 180, | |
| "token_acc": 0.753576372865713 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 5.254765297480428, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7894742965698243, | |
| "step": 185, | |
| "token_acc": 0.7727930535455861 | |
| }, | |
| { | |
| "epoch": 0.20959735245449532, | |
| "grad_norm": 5.076375360468776, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8446966171264648, | |
| "step": 190, | |
| "token_acc": 0.7672064777327935 | |
| }, | |
| { | |
| "epoch": 0.2151130722559294, | |
| "grad_norm": 5.763028979469674, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.961794662475586, | |
| "step": 195, | |
| "token_acc": 0.7398701589433624 | |
| }, | |
| { | |
| "epoch": 0.2206287920573635, | |
| "grad_norm": 5.351427081900536, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9339935302734375, | |
| "step": 200, | |
| "token_acc": 0.7431607506217499 | |
| }, | |
| { | |
| "epoch": 0.22614451185879758, | |
| "grad_norm": 5.713651299205875, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.918415641784668, | |
| "step": 205, | |
| "token_acc": 0.7481903926299627 | |
| }, | |
| { | |
| "epoch": 0.23166023166023167, | |
| "grad_norm": 5.8891737450130535, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9010303497314454, | |
| "step": 210, | |
| "token_acc": 0.7571428571428571 | |
| }, | |
| { | |
| "epoch": 0.23717595146166576, | |
| "grad_norm": 5.4902683255452915, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8169702529907227, | |
| "step": 215, | |
| "token_acc": 0.7799597180261832 | |
| }, | |
| { | |
| "epoch": 0.24269167126309985, | |
| "grad_norm": 5.777757326601303, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9132566452026367, | |
| "step": 220, | |
| "token_acc": 0.7451829400303096 | |
| }, | |
| { | |
| "epoch": 0.24820739106453393, | |
| "grad_norm": 5.440176086672954, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9655065536499023, | |
| "step": 225, | |
| "token_acc": 0.7356965174129353 | |
| }, | |
| { | |
| "epoch": 0.253723110865968, | |
| "grad_norm": 5.65109550423067, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9338722229003906, | |
| "step": 230, | |
| "token_acc": 0.740830755634114 | |
| }, | |
| { | |
| "epoch": 0.2592388306674021, | |
| "grad_norm": 5.558767495868624, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9302779197692871, | |
| "step": 235, | |
| "token_acc": 0.7472228044435129 | |
| }, | |
| { | |
| "epoch": 0.26475455046883617, | |
| "grad_norm": 5.0652164247691, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8661215782165528, | |
| "step": 240, | |
| "token_acc": 0.7648968235566891 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 5.301144006085399, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9218810081481934, | |
| "step": 245, | |
| "token_acc": 0.7493386243386243 | |
| }, | |
| { | |
| "epoch": 0.27578599007170435, | |
| "grad_norm": 5.6495062752144625, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7810210227966309, | |
| "step": 250, | |
| "token_acc": 0.7860394537177542 | |
| }, | |
| { | |
| "epoch": 0.28130170987313846, | |
| "grad_norm": 5.522127957882286, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8038553237915039, | |
| "step": 255, | |
| "token_acc": 0.7727514635444386 | |
| }, | |
| { | |
| "epoch": 0.2868174296745725, | |
| "grad_norm": 5.2610053652109405, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8923781394958497, | |
| "step": 260, | |
| "token_acc": 0.7542778918548939 | |
| }, | |
| { | |
| "epoch": 0.29233314947600664, | |
| "grad_norm": 5.613909953834712, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8282554626464844, | |
| "step": 265, | |
| "token_acc": 0.7708383377372088 | |
| }, | |
| { | |
| "epoch": 0.2978488692774407, | |
| "grad_norm": 4.960174308621677, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.939533805847168, | |
| "step": 270, | |
| "token_acc": 0.745945945945946 | |
| }, | |
| { | |
| "epoch": 0.3033645890788748, | |
| "grad_norm": 5.338393166552948, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7566696166992187, | |
| "step": 275, | |
| "token_acc": 0.7783882783882784 | |
| }, | |
| { | |
| "epoch": 0.3088803088803089, | |
| "grad_norm": 5.85539301408898, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7774906158447266, | |
| "step": 280, | |
| "token_acc": 0.778774673160728 | |
| }, | |
| { | |
| "epoch": 0.314396028681743, | |
| "grad_norm": 5.5554378762103305, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8302905082702636, | |
| "step": 285, | |
| "token_acc": 0.7736331119059785 | |
| }, | |
| { | |
| "epoch": 0.31991174848317705, | |
| "grad_norm": 5.64607545742485, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8949955940246582, | |
| "step": 290, | |
| "token_acc": 0.7522200563136235 | |
| }, | |
| { | |
| "epoch": 0.32542746828461117, | |
| "grad_norm": 5.545223764081705, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9496533393859863, | |
| "step": 295, | |
| "token_acc": 0.739816799844085 | |
| }, | |
| { | |
| "epoch": 0.3309431880860452, | |
| "grad_norm": 5.102348588101583, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8521288871765137, | |
| "step": 300, | |
| "token_acc": 0.7637759710930443 | |
| }, | |
| { | |
| "epoch": 0.33645890788747934, | |
| "grad_norm": 5.799379698214005, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8561582565307617, | |
| "step": 305, | |
| "token_acc": 0.7558797909407665 | |
| }, | |
| { | |
| "epoch": 0.3419746276889134, | |
| "grad_norm": 5.245842611420519, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.855079460144043, | |
| "step": 310, | |
| "token_acc": 0.7621547591874002 | |
| }, | |
| { | |
| "epoch": 0.3474903474903475, | |
| "grad_norm": 5.339700948065717, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8615094184875488, | |
| "step": 315, | |
| "token_acc": 0.7575689616505943 | |
| }, | |
| { | |
| "epoch": 0.3530060672917816, | |
| "grad_norm": 5.608312178321082, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8664688110351563, | |
| "step": 320, | |
| "token_acc": 0.7657678244972578 | |
| }, | |
| { | |
| "epoch": 0.35852178709321564, | |
| "grad_norm": 5.41193963454708, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.83935546875, | |
| "step": 325, | |
| "token_acc": 0.7757989753598439 | |
| }, | |
| { | |
| "epoch": 0.36403750689464975, | |
| "grad_norm": 5.220504527588121, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8449893951416015, | |
| "step": 330, | |
| "token_acc": 0.772705078125 | |
| }, | |
| { | |
| "epoch": 0.3695532266960838, | |
| "grad_norm": 5.4190449827537135, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8168981552124024, | |
| "step": 335, | |
| "token_acc": 0.7692491630798661 | |
| }, | |
| { | |
| "epoch": 0.37506894649751793, | |
| "grad_norm": 5.590435008250206, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8648731231689453, | |
| "step": 340, | |
| "token_acc": 0.7575426322693485 | |
| }, | |
| { | |
| "epoch": 0.380584666298952, | |
| "grad_norm": 5.174005161957214, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9896360397338867, | |
| "step": 345, | |
| "token_acc": 0.7302983932670237 | |
| }, | |
| { | |
| "epoch": 0.3861003861003861, | |
| "grad_norm": 5.511715259575185, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8663737297058105, | |
| "step": 350, | |
| "token_acc": 0.7572226268511775 | |
| }, | |
| { | |
| "epoch": 0.39161610590182017, | |
| "grad_norm": 5.295113853870287, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8019681930541992, | |
| "step": 355, | |
| "token_acc": 0.7747044917257683 | |
| }, | |
| { | |
| "epoch": 0.3971318257032543, | |
| "grad_norm": 5.2190425564256415, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8304034233093261, | |
| "step": 360, | |
| "token_acc": 0.7567389875082183 | |
| }, | |
| { | |
| "epoch": 0.40264754550468834, | |
| "grad_norm": 5.594923827868745, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8835367202758789, | |
| "step": 365, | |
| "token_acc": 0.7492839832562238 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 5.399822330777811, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8703033447265625, | |
| "step": 370, | |
| "token_acc": 0.7619047619047619 | |
| }, | |
| { | |
| "epoch": 0.4136789851075565, | |
| "grad_norm": 5.718517969581787, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7746540069580078, | |
| "step": 375, | |
| "token_acc": 0.7788509162951956 | |
| }, | |
| { | |
| "epoch": 0.41919470490899063, | |
| "grad_norm": 5.358207310874379, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7805415630340576, | |
| "step": 380, | |
| "token_acc": 0.7823351786612984 | |
| }, | |
| { | |
| "epoch": 0.4247104247104247, | |
| "grad_norm": 5.240031526062139, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.892514419555664, | |
| "step": 385, | |
| "token_acc": 0.7574438845625286 | |
| }, | |
| { | |
| "epoch": 0.4302261445118588, | |
| "grad_norm": 5.090203604118294, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7661482810974121, | |
| "step": 390, | |
| "token_acc": 0.7836170737725142 | |
| }, | |
| { | |
| "epoch": 0.43574186431329287, | |
| "grad_norm": 5.270675577305129, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7520760059356689, | |
| "step": 395, | |
| "token_acc": 0.7893167028199566 | |
| }, | |
| { | |
| "epoch": 0.441257584114727, | |
| "grad_norm": 5.4625302980431165, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8551373481750488, | |
| "step": 400, | |
| "token_acc": 0.7630287450338864 | |
| }, | |
| { | |
| "epoch": 0.44677330391616105, | |
| "grad_norm": 5.249069454600804, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.910746955871582, | |
| "step": 405, | |
| "token_acc": 0.7516659262549977 | |
| }, | |
| { | |
| "epoch": 0.45228902371759516, | |
| "grad_norm": 5.236854652942137, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8199291229248047, | |
| "step": 410, | |
| "token_acc": 0.7682183767974653 | |
| }, | |
| { | |
| "epoch": 0.4578047435190292, | |
| "grad_norm": 6.027185729170618, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8450562477111816, | |
| "step": 415, | |
| "token_acc": 0.7607934655775963 | |
| }, | |
| { | |
| "epoch": 0.46332046332046334, | |
| "grad_norm": 4.870989267188852, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9044759750366211, | |
| "step": 420, | |
| "token_acc": 0.7498462169366413 | |
| }, | |
| { | |
| "epoch": 0.4688361831218974, | |
| "grad_norm": 5.291951695042281, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8467486381530762, | |
| "step": 425, | |
| "token_acc": 0.7576541612764123 | |
| }, | |
| { | |
| "epoch": 0.4743519029233315, | |
| "grad_norm": 5.62593903563829, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8833629608154296, | |
| "step": 430, | |
| "token_acc": 0.7509285051067781 | |
| }, | |
| { | |
| "epoch": 0.4798676227247656, | |
| "grad_norm": 5.128799172526207, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8988676071166992, | |
| "step": 435, | |
| "token_acc": 0.7490849938999593 | |
| }, | |
| { | |
| "epoch": 0.4853833425261997, | |
| "grad_norm": 5.239699648307152, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7166192054748535, | |
| "step": 440, | |
| "token_acc": 0.7969401947148818 | |
| }, | |
| { | |
| "epoch": 0.49089906232763375, | |
| "grad_norm": 5.4681083200632905, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9467007637023925, | |
| "step": 445, | |
| "token_acc": 0.7396415494314897 | |
| }, | |
| { | |
| "epoch": 0.49641478212906787, | |
| "grad_norm": 4.685989886939226, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7727291584014893, | |
| "step": 450, | |
| "token_acc": 0.7793472966390648 | |
| }, | |
| { | |
| "epoch": 0.5019305019305019, | |
| "grad_norm": 5.538988406196133, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8206657409667969, | |
| "step": 455, | |
| "token_acc": 0.7731393129770993 | |
| }, | |
| { | |
| "epoch": 0.507446221731936, | |
| "grad_norm": 4.63939429197835, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8562620162963868, | |
| "step": 460, | |
| "token_acc": 0.7662946428571429 | |
| }, | |
| { | |
| "epoch": 0.5129619415333702, | |
| "grad_norm": 5.1240560151191445, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7872621536254882, | |
| "step": 465, | |
| "token_acc": 0.7787717601547389 | |
| }, | |
| { | |
| "epoch": 0.5184776613348042, | |
| "grad_norm": 5.189600840792027, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8223024368286133, | |
| "step": 470, | |
| "token_acc": 0.7725631768953068 | |
| }, | |
| { | |
| "epoch": 0.5239933811362383, | |
| "grad_norm": 4.625349854858132, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7645779609680176, | |
| "step": 475, | |
| "token_acc": 0.7779833487511564 | |
| }, | |
| { | |
| "epoch": 0.5295091009376723, | |
| "grad_norm": 5.392461676265602, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.863577938079834, | |
| "step": 480, | |
| "token_acc": 0.7583280288563548 | |
| }, | |
| { | |
| "epoch": 0.5350248207391064, | |
| "grad_norm": 5.809387834675999, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7966668128967285, | |
| "step": 485, | |
| "token_acc": 0.7756534365924492 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 5.304450067691333, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9444230079650879, | |
| "step": 490, | |
| "token_acc": 0.7450345423143351 | |
| }, | |
| { | |
| "epoch": 0.5460562603419746, | |
| "grad_norm": 5.583804788469441, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9105037689208985, | |
| "step": 495, | |
| "token_acc": 0.7439771335238873 | |
| }, | |
| { | |
| "epoch": 0.5515719801434087, | |
| "grad_norm": 6.054938244093431, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9297395706176758, | |
| "step": 500, | |
| "token_acc": 0.7482532288799492 | |
| }, | |
| { | |
| "epoch": 0.5570876999448428, | |
| "grad_norm": 5.391503075184599, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.846977424621582, | |
| "step": 505, | |
| "token_acc": 0.7518848526387937 | |
| }, | |
| { | |
| "epoch": 0.5626034197462769, | |
| "grad_norm": 5.030658472043065, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8991237640380859, | |
| "step": 510, | |
| "token_acc": 0.7539121114683816 | |
| }, | |
| { | |
| "epoch": 0.568119139547711, | |
| "grad_norm": 5.104958695838692, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8426759719848633, | |
| "step": 515, | |
| "token_acc": 0.7644656228727025 | |
| }, | |
| { | |
| "epoch": 0.573634859349145, | |
| "grad_norm": 6.009882223493915, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7240866661071778, | |
| "step": 520, | |
| "token_acc": 0.7948649371489703 | |
| }, | |
| { | |
| "epoch": 0.5791505791505791, | |
| "grad_norm": 5.637457445639113, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8636373519897461, | |
| "step": 525, | |
| "token_acc": 0.7649465787679018 | |
| }, | |
| { | |
| "epoch": 0.5846662989520133, | |
| "grad_norm": 5.3912830874814555, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8545609474182129, | |
| "step": 530, | |
| "token_acc": 0.7600548446069469 | |
| }, | |
| { | |
| "epoch": 0.5901820187534473, | |
| "grad_norm": 5.223533669476021, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7941525936126709, | |
| "step": 535, | |
| "token_acc": 0.7761698022190062 | |
| }, | |
| { | |
| "epoch": 0.5956977385548814, | |
| "grad_norm": 5.115121860566828, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8650318145751953, | |
| "step": 540, | |
| "token_acc": 0.7646005817856344 | |
| }, | |
| { | |
| "epoch": 0.6012134583563155, | |
| "grad_norm": 4.715835836641467, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8254315376281738, | |
| "step": 545, | |
| "token_acc": 0.7698187377156585 | |
| }, | |
| { | |
| "epoch": 0.6067291781577496, | |
| "grad_norm": 5.200096789536537, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7865041255950928, | |
| "step": 550, | |
| "token_acc": 0.7788415509813308 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 5.62598549880936, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.9002869606018067, | |
| "step": 555, | |
| "token_acc": 0.7615146147032772 | |
| }, | |
| { | |
| "epoch": 0.6177606177606177, | |
| "grad_norm": 6.376332687516541, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8603617668151855, | |
| "step": 560, | |
| "token_acc": 0.758905299739357 | |
| }, | |
| { | |
| "epoch": 0.6232763375620518, | |
| "grad_norm": 5.370356585251875, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8701972961425781, | |
| "step": 565, | |
| "token_acc": 0.7542935653766888 | |
| }, | |
| { | |
| "epoch": 0.628792057363486, | |
| "grad_norm": 4.884591590707582, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.840721321105957, | |
| "step": 570, | |
| "token_acc": 0.7689443714410863 | |
| }, | |
| { | |
| "epoch": 0.63430777716492, | |
| "grad_norm": 5.138799986301503, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.7376739501953125, | |
| "step": 575, | |
| "token_acc": 0.7904834996162702 | |
| }, | |
| { | |
| "epoch": 0.6398234969663541, | |
| "grad_norm": 5.3450150515914885, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.755347728729248, | |
| "step": 580, | |
| "token_acc": 0.7790931989924433 | |
| }, | |
| { | |
| "epoch": 0.6453392167677882, | |
| "grad_norm": 4.895436117128604, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8265247344970703, | |
| "step": 585, | |
| "token_acc": 0.7729456991830851 | |
| }, | |
| { | |
| "epoch": 0.6508549365692223, | |
| "grad_norm": 5.143509534036041, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8215790748596191, | |
| "step": 590, | |
| "token_acc": 0.7608391608391608 | |
| }, | |
| { | |
| "epoch": 0.6563706563706564, | |
| "grad_norm": 5.082286518166302, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8152825355529785, | |
| "step": 595, | |
| "token_acc": 0.7685413399058929 | |
| }, | |
| { | |
| "epoch": 0.6618863761720905, | |
| "grad_norm": 5.326424315651092, | |
| "learning_rate": 2.3e-06, | |
| "loss": 0.8500395774841308, | |
| "step": 600, | |
| "token_acc": 0.7704613957801993 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 907, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1514130748997632.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |