Instructions to use roonbug/2b63aec8 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use roonbug/2b63aec8 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="roonbug/2b63aec8") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("roonbug/2b63aec8") model = AutoModelForImageTextToText.from_pretrained("roonbug/2b63aec8") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use roonbug/2b63aec8 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "roonbug/2b63aec8" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "roonbug/2b63aec8", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/roonbug/2b63aec8
- SGLang
How to use roonbug/2b63aec8 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "roonbug/2b63aec8" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "roonbug/2b63aec8", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "roonbug/2b63aec8" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "roonbug/2b63aec8", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use roonbug/2b63aec8 with Docker Model Runner:
docker model run hf.co/roonbug/2b63aec8
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8, | |
| "eval_steps": 100, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 1.142920307815075, | |
| "epoch": 0.016, | |
| "grad_norm": 290.0, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 42.6658, | |
| "mean_token_accuracy": 0.5620782226324081, | |
| "num_tokens": 195524.0, | |
| "step": 10 | |
| }, | |
| { | |
| "entropy": 1.148210159689188, | |
| "epoch": 0.032, | |
| "grad_norm": 239.0, | |
| "learning_rate": 1.2666666666666669e-06, | |
| "loss": 41.9984, | |
| "mean_token_accuracy": 0.5613080382347106, | |
| "num_tokens": 390903.0, | |
| "step": 20 | |
| }, | |
| { | |
| "entropy": 1.1933260083198547, | |
| "epoch": 0.048, | |
| "grad_norm": 249.0, | |
| "learning_rate": 1.9333333333333336e-06, | |
| "loss": 40.6208, | |
| "mean_token_accuracy": 0.5657517908141017, | |
| "num_tokens": 589868.0, | |
| "step": 30 | |
| }, | |
| { | |
| "entropy": 1.2957281917333603, | |
| "epoch": 0.064, | |
| "grad_norm": 139.0, | |
| "learning_rate": 2.6e-06, | |
| "loss": 37.9032, | |
| "mean_token_accuracy": 0.5714796105399728, | |
| "num_tokens": 791190.0, | |
| "step": 40 | |
| }, | |
| { | |
| "entropy": 1.5075685508549213, | |
| "epoch": 0.08, | |
| "grad_norm": 94.0, | |
| "learning_rate": 3.266666666666667e-06, | |
| "loss": 35.7561, | |
| "mean_token_accuracy": 0.5766569443047047, | |
| "num_tokens": 989860.0, | |
| "step": 50 | |
| }, | |
| { | |
| "entropy": 1.7984249681234359, | |
| "epoch": 0.096, | |
| "grad_norm": 50.75, | |
| "learning_rate": 3.9333333333333335e-06, | |
| "loss": 33.4379, | |
| "mean_token_accuracy": 0.5814697606489062, | |
| "num_tokens": 1181777.0, | |
| "step": 60 | |
| }, | |
| { | |
| "entropy": 1.8387351341545581, | |
| "epoch": 0.112, | |
| "grad_norm": 43.0, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 30.4219, | |
| "mean_token_accuracy": 0.5971228444948793, | |
| "num_tokens": 1385513.0, | |
| "step": 70 | |
| }, | |
| { | |
| "entropy": 1.7275233700871468, | |
| "epoch": 0.128, | |
| "grad_norm": 33.5, | |
| "learning_rate": 5.2666666666666665e-06, | |
| "loss": 28.4703, | |
| "mean_token_accuracy": 0.6095364252105355, | |
| "num_tokens": 1582368.0, | |
| "step": 80 | |
| }, | |
| { | |
| "entropy": 1.7214979872107505, | |
| "epoch": 0.144, | |
| "grad_norm": 27.0, | |
| "learning_rate": 5.933333333333335e-06, | |
| "loss": 26.677, | |
| "mean_token_accuracy": 0.6243448719382286, | |
| "num_tokens": 1773764.0, | |
| "step": 90 | |
| }, | |
| { | |
| "entropy": 1.6311134904623033, | |
| "epoch": 0.16, | |
| "grad_norm": 22.0, | |
| "learning_rate": 6.600000000000001e-06, | |
| "loss": 25.7683, | |
| "mean_token_accuracy": 0.6301404371857643, | |
| "num_tokens": 1970077.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_biology_entropy": 1.5580159120559693, | |
| "eval_biology_loss": 1.5081593990325928, | |
| "eval_biology_mean_token_accuracy": 0.6457349667549134, | |
| "eval_biology_num_tokens": 1970077.0, | |
| "eval_biology_runtime": 48.7413, | |
| "eval_biology_samples_per_second": 10.258, | |
| "eval_biology_steps_per_second": 2.565, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_chemistry_entropy": 1.206756212234497, | |
| "eval_chemistry_loss": 1.1218774318695068, | |
| "eval_chemistry_mean_token_accuracy": 0.7205783066749573, | |
| "eval_chemistry_num_tokens": 1970077.0, | |
| "eval_chemistry_runtime": 60.3159, | |
| "eval_chemistry_samples_per_second": 8.29, | |
| "eval_chemistry_steps_per_second": 2.072, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_math_entropy": 0.9672308325767517, | |
| "eval_math_loss": 1.159799337387085, | |
| "eval_math_mean_token_accuracy": 0.7189845342636109, | |
| "eval_math_num_tokens": 1970077.0, | |
| "eval_math_runtime": 61.8237, | |
| "eval_math_samples_per_second": 8.088, | |
| "eval_math_steps_per_second": 2.022, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_physics_entropy": 1.1670387201309205, | |
| "eval_physics_loss": 1.1291608810424805, | |
| "eval_physics_mean_token_accuracy": 0.7211072521209717, | |
| "eval_physics_num_tokens": 1970077.0, | |
| "eval_physics_runtime": 70.4586, | |
| "eval_physics_samples_per_second": 7.096, | |
| "eval_physics_steps_per_second": 1.774, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 1.5482715763151647, | |
| "epoch": 0.176, | |
| "grad_norm": 21.125, | |
| "learning_rate": 7.266666666666668e-06, | |
| "loss": 24.5868, | |
| "mean_token_accuracy": 0.6385629490017891, | |
| "num_tokens": 2168354.0, | |
| "step": 110 | |
| }, | |
| { | |
| "entropy": 1.5266574397683144, | |
| "epoch": 0.192, | |
| "grad_norm": 22.875, | |
| "learning_rate": 7.933333333333334e-06, | |
| "loss": 24.2707, | |
| "mean_token_accuracy": 0.6432460084557533, | |
| "num_tokens": 2365822.0, | |
| "step": 120 | |
| }, | |
| { | |
| "entropy": 1.5192069873213767, | |
| "epoch": 0.208, | |
| "grad_norm": 20.875, | |
| "learning_rate": 8.6e-06, | |
| "loss": 24.1355, | |
| "mean_token_accuracy": 0.6436416517943144, | |
| "num_tokens": 2558762.0, | |
| "step": 130 | |
| }, | |
| { | |
| "entropy": 1.4698147468268872, | |
| "epoch": 0.224, | |
| "grad_norm": 20.125, | |
| "learning_rate": 9.266666666666667e-06, | |
| "loss": 23.5154, | |
| "mean_token_accuracy": 0.6499760080128908, | |
| "num_tokens": 2755347.0, | |
| "step": 140 | |
| }, | |
| { | |
| "entropy": 1.4506230603903532, | |
| "epoch": 0.24, | |
| "grad_norm": 19.625, | |
| "learning_rate": 9.933333333333334e-06, | |
| "loss": 23.2013, | |
| "mean_token_accuracy": 0.6523264441639185, | |
| "num_tokens": 2947346.0, | |
| "step": 150 | |
| }, | |
| { | |
| "entropy": 1.4590953961014748, | |
| "epoch": 0.256, | |
| "grad_norm": 18.5, | |
| "learning_rate": 1.0600000000000002e-05, | |
| "loss": 23.3227, | |
| "mean_token_accuracy": 0.6508617259562015, | |
| "num_tokens": 3139957.0, | |
| "step": 160 | |
| }, | |
| { | |
| "entropy": 1.419396448880434, | |
| "epoch": 0.272, | |
| "grad_norm": 19.75, | |
| "learning_rate": 1.1266666666666668e-05, | |
| "loss": 22.7352, | |
| "mean_token_accuracy": 0.6572458431124687, | |
| "num_tokens": 3335951.0, | |
| "step": 170 | |
| }, | |
| { | |
| "entropy": 1.4005608204752207, | |
| "epoch": 0.288, | |
| "grad_norm": 19.75, | |
| "learning_rate": 1.1933333333333335e-05, | |
| "loss": 22.3969, | |
| "mean_token_accuracy": 0.6585959013551473, | |
| "num_tokens": 3539731.0, | |
| "step": 180 | |
| }, | |
| { | |
| "entropy": 1.391934547200799, | |
| "epoch": 0.304, | |
| "grad_norm": 18.75, | |
| "learning_rate": 1.2600000000000001e-05, | |
| "loss": 22.31, | |
| "mean_token_accuracy": 0.6621056370437145, | |
| "num_tokens": 3733488.0, | |
| "step": 190 | |
| }, | |
| { | |
| "entropy": 1.4028674490749835, | |
| "epoch": 0.32, | |
| "grad_norm": 22.25, | |
| "learning_rate": 1.3266666666666668e-05, | |
| "loss": 22.5559, | |
| "mean_token_accuracy": 0.6576981086283922, | |
| "num_tokens": 3920545.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_biology_entropy": 1.3209806289672852, | |
| "eval_biology_loss": 1.338399887084961, | |
| "eval_biology_mean_token_accuracy": 0.6720403518676757, | |
| "eval_biology_num_tokens": 3920545.0, | |
| "eval_biology_runtime": 48.5853, | |
| "eval_biology_samples_per_second": 10.291, | |
| "eval_biology_steps_per_second": 2.573, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_chemistry_entropy": 1.0033348879814148, | |
| "eval_chemistry_loss": 0.9935092926025391, | |
| "eval_chemistry_mean_token_accuracy": 0.7448974308967591, | |
| "eval_chemistry_num_tokens": 3920545.0, | |
| "eval_chemistry_runtime": 60.24, | |
| "eval_chemistry_samples_per_second": 8.3, | |
| "eval_chemistry_steps_per_second": 2.075, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_math_entropy": 0.8341804294586181, | |
| "eval_math_loss": 1.0635857582092285, | |
| "eval_math_mean_token_accuracy": 0.7432106451988221, | |
| "eval_math_num_tokens": 3920545.0, | |
| "eval_math_runtime": 61.8174, | |
| "eval_math_samples_per_second": 8.088, | |
| "eval_math_steps_per_second": 2.022, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_physics_entropy": 0.9652358031272888, | |
| "eval_physics_loss": 0.9950281977653503, | |
| "eval_physics_mean_token_accuracy": 0.7510108857154846, | |
| "eval_physics_num_tokens": 3920545.0, | |
| "eval_physics_runtime": 70.411, | |
| "eval_physics_samples_per_second": 7.101, | |
| "eval_physics_steps_per_second": 1.775, | |
| "step": 200 | |
| }, | |
| { | |
| "entropy": 1.3548175282776356, | |
| "epoch": 0.336, | |
| "grad_norm": 19.625, | |
| "learning_rate": 1.3933333333333334e-05, | |
| "loss": 21.7763, | |
| "mean_token_accuracy": 0.6656343434005976, | |
| "num_tokens": 4114077.0, | |
| "step": 210 | |
| }, | |
| { | |
| "entropy": 1.3656601022928954, | |
| "epoch": 0.352, | |
| "grad_norm": 20.625, | |
| "learning_rate": 1.46e-05, | |
| "loss": 22.0972, | |
| "mean_token_accuracy": 0.6638848338276148, | |
| "num_tokens": 4306949.0, | |
| "step": 220 | |
| }, | |
| { | |
| "entropy": 1.3525194190442562, | |
| "epoch": 0.368, | |
| "grad_norm": 18.125, | |
| "learning_rate": 1.5266666666666667e-05, | |
| "loss": 21.7293, | |
| "mean_token_accuracy": 0.6680811226367951, | |
| "num_tokens": 4504001.0, | |
| "step": 230 | |
| }, | |
| { | |
| "entropy": 1.3454820621758699, | |
| "epoch": 0.384, | |
| "grad_norm": 21.25, | |
| "learning_rate": 1.5933333333333336e-05, | |
| "loss": 21.7032, | |
| "mean_token_accuracy": 0.6671383358538151, | |
| "num_tokens": 4693812.0, | |
| "step": 240 | |
| }, | |
| { | |
| "entropy": 1.3525703553110362, | |
| "epoch": 0.4, | |
| "grad_norm": 17.5, | |
| "learning_rate": 1.66e-05, | |
| "loss": 21.7856, | |
| "mean_token_accuracy": 0.666401931643486, | |
| "num_tokens": 4887094.0, | |
| "step": 250 | |
| }, | |
| { | |
| "entropy": 1.351718918606639, | |
| "epoch": 0.416, | |
| "grad_norm": 19.0, | |
| "learning_rate": 1.726666666666667e-05, | |
| "loss": 21.9058, | |
| "mean_token_accuracy": 0.6651136819273233, | |
| "num_tokens": 5085369.0, | |
| "step": 260 | |
| }, | |
| { | |
| "entropy": 1.3526419658213853, | |
| "epoch": 0.432, | |
| "grad_norm": 20.875, | |
| "learning_rate": 1.7933333333333333e-05, | |
| "loss": 21.7813, | |
| "mean_token_accuracy": 0.6668458927422762, | |
| "num_tokens": 5271275.0, | |
| "step": 270 | |
| }, | |
| { | |
| "entropy": 1.3480545241385697, | |
| "epoch": 0.448, | |
| "grad_norm": 22.875, | |
| "learning_rate": 1.86e-05, | |
| "loss": 21.627, | |
| "mean_token_accuracy": 0.6677324704825878, | |
| "num_tokens": 5460559.0, | |
| "step": 280 | |
| }, | |
| { | |
| "entropy": 1.301166184991598, | |
| "epoch": 0.464, | |
| "grad_norm": 21.25, | |
| "learning_rate": 1.926666666666667e-05, | |
| "loss": 20.889, | |
| "mean_token_accuracy": 0.676617132872343, | |
| "num_tokens": 5653809.0, | |
| "step": 290 | |
| }, | |
| { | |
| "entropy": 1.318466317281127, | |
| "epoch": 0.48, | |
| "grad_norm": 17.125, | |
| "learning_rate": 1.9933333333333334e-05, | |
| "loss": 21.2936, | |
| "mean_token_accuracy": 0.6712827417999506, | |
| "num_tokens": 5850176.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_biology_entropy": 1.2827796216011047, | |
| "eval_biology_loss": 1.275201678276062, | |
| "eval_biology_mean_token_accuracy": 0.6830832781791687, | |
| "eval_biology_num_tokens": 5850176.0, | |
| "eval_biology_runtime": 48.4915, | |
| "eval_biology_samples_per_second": 10.311, | |
| "eval_biology_steps_per_second": 2.578, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_chemistry_entropy": 0.983495129108429, | |
| "eval_chemistry_loss": 0.9488818645477295, | |
| "eval_chemistry_mean_token_accuracy": 0.7523409638404847, | |
| "eval_chemistry_num_tokens": 5850176.0, | |
| "eval_chemistry_runtime": 60.1707, | |
| "eval_chemistry_samples_per_second": 8.31, | |
| "eval_chemistry_steps_per_second": 2.077, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_math_entropy": 0.8216862387657166, | |
| "eval_math_loss": 1.0297818183898926, | |
| "eval_math_mean_token_accuracy": 0.7488151121139527, | |
| "eval_math_num_tokens": 5850176.0, | |
| "eval_math_runtime": 61.6905, | |
| "eval_math_samples_per_second": 8.105, | |
| "eval_math_steps_per_second": 2.026, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_physics_entropy": 0.9433758721351624, | |
| "eval_physics_loss": 0.9520999193191528, | |
| "eval_physics_mean_token_accuracy": 0.7585058889389038, | |
| "eval_physics_num_tokens": 5850176.0, | |
| "eval_physics_runtime": 70.301, | |
| "eval_physics_samples_per_second": 7.112, | |
| "eval_physics_steps_per_second": 1.778, | |
| "step": 300 | |
| }, | |
| { | |
| "entropy": 1.2579400472342968, | |
| "epoch": 0.496, | |
| "grad_norm": 17.75, | |
| "learning_rate": 1.9933333333333334e-05, | |
| "loss": 20.2011, | |
| "mean_token_accuracy": 0.6842056062072516, | |
| "num_tokens": 6046503.0, | |
| "step": 310 | |
| }, | |
| { | |
| "entropy": 1.3082518883049488, | |
| "epoch": 0.512, | |
| "grad_norm": 18.125, | |
| "learning_rate": 1.985925925925926e-05, | |
| "loss": 21.0658, | |
| "mean_token_accuracy": 0.6749501373618841, | |
| "num_tokens": 6240456.0, | |
| "step": 320 | |
| }, | |
| { | |
| "entropy": 1.3003981616348028, | |
| "epoch": 0.528, | |
| "grad_norm": 18.125, | |
| "learning_rate": 1.9785185185185187e-05, | |
| "loss": 20.9809, | |
| "mean_token_accuracy": 0.6757604543119669, | |
| "num_tokens": 6430555.0, | |
| "step": 330 | |
| }, | |
| { | |
| "entropy": 1.2986273631453513, | |
| "epoch": 0.544, | |
| "grad_norm": 17.0, | |
| "learning_rate": 1.971111111111111e-05, | |
| "loss": 20.8809, | |
| "mean_token_accuracy": 0.6782271713018417, | |
| "num_tokens": 6626006.0, | |
| "step": 340 | |
| }, | |
| { | |
| "entropy": 1.284830729290843, | |
| "epoch": 0.56, | |
| "grad_norm": 17.25, | |
| "learning_rate": 1.963703703703704e-05, | |
| "loss": 20.8197, | |
| "mean_token_accuracy": 0.6767117112874985, | |
| "num_tokens": 6820754.0, | |
| "step": 350 | |
| }, | |
| { | |
| "entropy": 1.2683125745505095, | |
| "epoch": 0.576, | |
| "grad_norm": 17.0, | |
| "learning_rate": 1.9562962962962964e-05, | |
| "loss": 20.4541, | |
| "mean_token_accuracy": 0.6809794403612613, | |
| "num_tokens": 7021844.0, | |
| "step": 360 | |
| }, | |
| { | |
| "entropy": 1.2863252360373736, | |
| "epoch": 0.592, | |
| "grad_norm": 18.875, | |
| "learning_rate": 1.948888888888889e-05, | |
| "loss": 20.8043, | |
| "mean_token_accuracy": 0.676701345667243, | |
| "num_tokens": 7213951.0, | |
| "step": 370 | |
| }, | |
| { | |
| "entropy": 1.2630502216517925, | |
| "epoch": 0.608, | |
| "grad_norm": 18.75, | |
| "learning_rate": 1.9414814814814817e-05, | |
| "loss": 20.4041, | |
| "mean_token_accuracy": 0.6803740747272968, | |
| "num_tokens": 7416773.0, | |
| "step": 380 | |
| }, | |
| { | |
| "entropy": 1.2804703898727894, | |
| "epoch": 0.624, | |
| "grad_norm": 19.25, | |
| "learning_rate": 1.9340740740740743e-05, | |
| "loss": 20.6218, | |
| "mean_token_accuracy": 0.6788272958248853, | |
| "num_tokens": 7612843.0, | |
| "step": 390 | |
| }, | |
| { | |
| "entropy": 1.2843346055597067, | |
| "epoch": 0.64, | |
| "grad_norm": 18.0, | |
| "learning_rate": 1.926666666666667e-05, | |
| "loss": 20.7171, | |
| "mean_token_accuracy": 0.6782444745302201, | |
| "num_tokens": 7801633.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_biology_entropy": 1.226506398677826, | |
| "eval_biology_loss": 1.2382104396820068, | |
| "eval_biology_mean_token_accuracy": 0.6894095778465271, | |
| "eval_biology_num_tokens": 7801633.0, | |
| "eval_biology_runtime": 48.5507, | |
| "eval_biology_samples_per_second": 10.299, | |
| "eval_biology_steps_per_second": 2.575, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_chemistry_entropy": 0.9317227191925049, | |
| "eval_chemistry_loss": 0.9207452535629272, | |
| "eval_chemistry_mean_token_accuracy": 0.7581370029449462, | |
| "eval_chemistry_num_tokens": 7801633.0, | |
| "eval_chemistry_runtime": 60.2113, | |
| "eval_chemistry_samples_per_second": 8.304, | |
| "eval_chemistry_steps_per_second": 2.076, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_math_entropy": 0.7863595089912414, | |
| "eval_math_loss": 1.010460376739502, | |
| "eval_math_mean_token_accuracy": 0.7535392093658447, | |
| "eval_math_num_tokens": 7801633.0, | |
| "eval_math_runtime": 61.807, | |
| "eval_math_samples_per_second": 8.09, | |
| "eval_math_steps_per_second": 2.022, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_physics_entropy": 0.8958085932731629, | |
| "eval_physics_loss": 0.9257401823997498, | |
| "eval_physics_mean_token_accuracy": 0.7637984156608582, | |
| "eval_physics_num_tokens": 7801633.0, | |
| "eval_physics_runtime": 70.3663, | |
| "eval_physics_samples_per_second": 7.106, | |
| "eval_physics_steps_per_second": 1.776, | |
| "step": 400 | |
| }, | |
| { | |
| "entropy": 1.278659427165985, | |
| "epoch": 0.656, | |
| "grad_norm": 18.25, | |
| "learning_rate": 1.9192592592592593e-05, | |
| "loss": 20.6682, | |
| "mean_token_accuracy": 0.6772829819470644, | |
| "num_tokens": 7995843.0, | |
| "step": 410 | |
| }, | |
| { | |
| "entropy": 1.2931427203118802, | |
| "epoch": 0.672, | |
| "grad_norm": 18.625, | |
| "learning_rate": 1.911851851851852e-05, | |
| "loss": 20.8656, | |
| "mean_token_accuracy": 0.6753748003393412, | |
| "num_tokens": 8183103.0, | |
| "step": 420 | |
| }, | |
| { | |
| "entropy": 1.2739692747592926, | |
| "epoch": 0.688, | |
| "grad_norm": 16.75, | |
| "learning_rate": 1.9044444444444446e-05, | |
| "loss": 20.5407, | |
| "mean_token_accuracy": 0.6812681049108505, | |
| "num_tokens": 8385976.0, | |
| "step": 430 | |
| }, | |
| { | |
| "entropy": 1.2659825466573238, | |
| "epoch": 0.704, | |
| "grad_norm": 16.25, | |
| "learning_rate": 1.8970370370370372e-05, | |
| "loss": 20.4243, | |
| "mean_token_accuracy": 0.6820976916700602, | |
| "num_tokens": 8578431.0, | |
| "step": 440 | |
| }, | |
| { | |
| "entropy": 1.220404140278697, | |
| "epoch": 0.72, | |
| "grad_norm": 16.75, | |
| "learning_rate": 1.8896296296296295e-05, | |
| "loss": 19.6546, | |
| "mean_token_accuracy": 0.6908745598047972, | |
| "num_tokens": 8781342.0, | |
| "step": 450 | |
| }, | |
| { | |
| "entropy": 1.2406103231012822, | |
| "epoch": 0.736, | |
| "grad_norm": 16.75, | |
| "learning_rate": 1.8822222222222225e-05, | |
| "loss": 19.9745, | |
| "mean_token_accuracy": 0.6853331789374352, | |
| "num_tokens": 8977918.0, | |
| "step": 460 | |
| }, | |
| { | |
| "entropy": 1.2618801843374967, | |
| "epoch": 0.752, | |
| "grad_norm": 17.125, | |
| "learning_rate": 1.874814814814815e-05, | |
| "loss": 20.4041, | |
| "mean_token_accuracy": 0.6825968738645315, | |
| "num_tokens": 9169322.0, | |
| "step": 470 | |
| }, | |
| { | |
| "entropy": 1.2232345014810562, | |
| "epoch": 0.768, | |
| "grad_norm": 19.25, | |
| "learning_rate": 1.8674074074074075e-05, | |
| "loss": 19.7045, | |
| "mean_token_accuracy": 0.6888250291347504, | |
| "num_tokens": 9368141.0, | |
| "step": 480 | |
| }, | |
| { | |
| "entropy": 1.25159954726696, | |
| "epoch": 0.784, | |
| "grad_norm": 18.25, | |
| "learning_rate": 1.86e-05, | |
| "loss": 20.2036, | |
| "mean_token_accuracy": 0.6849453710019588, | |
| "num_tokens": 9565236.0, | |
| "step": 490 | |
| }, | |
| { | |
| "entropy": 1.264250884205103, | |
| "epoch": 0.8, | |
| "grad_norm": 19.25, | |
| "learning_rate": 1.8525925925925928e-05, | |
| "loss": 20.5299, | |
| "mean_token_accuracy": 0.6811827480792999, | |
| "num_tokens": 9761227.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_biology_entropy": 1.2163097896575927, | |
| "eval_biology_loss": 1.2177292108535767, | |
| "eval_biology_mean_token_accuracy": 0.6932459664344788, | |
| "eval_biology_num_tokens": 9761227.0, | |
| "eval_biology_runtime": 48.5438, | |
| "eval_biology_samples_per_second": 10.3, | |
| "eval_biology_steps_per_second": 2.575, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_chemistry_entropy": 0.9239063205718994, | |
| "eval_chemistry_loss": 0.9047155380249023, | |
| "eval_chemistry_mean_token_accuracy": 0.761792631149292, | |
| "eval_chemistry_num_tokens": 9761227.0, | |
| "eval_chemistry_runtime": 59.9546, | |
| "eval_chemistry_samples_per_second": 8.34, | |
| "eval_chemistry_steps_per_second": 2.085, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_math_entropy": 0.7864464523792267, | |
| "eval_math_loss": 0.9939978122711182, | |
| "eval_math_mean_token_accuracy": 0.7574145245552063, | |
| "eval_math_num_tokens": 9761227.0, | |
| "eval_math_runtime": 61.7812, | |
| "eval_math_samples_per_second": 8.093, | |
| "eval_math_steps_per_second": 2.023, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_physics_entropy": 0.889360978603363, | |
| "eval_physics_loss": 0.9096766710281372, | |
| "eval_physics_mean_token_accuracy": 0.7674052910804748, | |
| "eval_physics_num_tokens": 9761227.0, | |
| "eval_physics_runtime": 70.5356, | |
| "eval_physics_samples_per_second": 7.089, | |
| "eval_physics_steps_per_second": 1.772, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0976936565348991e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |