Audio-Text-to-Text
Transformers
Safetensors
gemma4
image-text-to-text
audio
multimodal
gemma
gemma-4
speech-qa
ugandan-languages
low-resource-languages
sunbird
Instructions to use Sunbird/sunbirdtutor-gemma-4-e2b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Sunbird/sunbirdtutor-gemma-4-e2b with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("Sunbird/sunbirdtutor-gemma-4-e2b") model = AutoModelForImageTextToText.from_pretrained("Sunbird/sunbirdtutor-gemma-4-e2b") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 53, | |
| "global_step": 528, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.018957345971563982, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.37855618000030516, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.037914691943127965, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.374945330619812, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05687203791469194, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.45e-05, | |
| "loss": 0.362351393699646, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07582938388625593, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 1.9500000000000003e-05, | |
| "loss": 0.3529045104980469, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0947867298578199, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.45e-05, | |
| "loss": 0.33787591457366944, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1004739336492891, | |
| "eval_text_loss": 1.4636504650115967, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 4.8317, | |
| "eval_text_samples_per_second": 12.418, | |
| "eval_text_steps_per_second": 12.418, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.1004739336492891, | |
| "eval_audio_loss": 0.7069099545478821, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 21.032, | |
| "eval_audio_samples_per_second": 6.324, | |
| "eval_audio_steps_per_second": 6.324, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11374407582938388, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.95e-05, | |
| "loss": 0.33856768608093263, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13270142180094788, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 3.45e-05, | |
| "loss": 0.35529475212097167, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15165876777251186, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.9500000000000005e-05, | |
| "loss": 0.2936519384384155, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17061611374407584, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 4.4500000000000004e-05, | |
| "loss": 0.318320107460022, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1895734597156398, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 0.31594276428222656, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2009478672985782, | |
| "eval_text_loss": 1.329001545906067, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 4.4604, | |
| "eval_text_samples_per_second": 13.452, | |
| "eval_text_steps_per_second": 13.452, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2009478672985782, | |
| "eval_audio_loss": 0.7277711629867554, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 20.1176, | |
| "eval_audio_samples_per_second": 6.611, | |
| "eval_audio_steps_per_second": 6.611, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.20853080568720378, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.9945468268142656e-05, | |
| "loss": 0.31812191009521484, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22748815165876776, | |
| "grad_norm": 1.0, | |
| "learning_rate": 4.975726891929585e-05, | |
| "loss": 0.29132957458496095, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.24644549763033174, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 4.943574200733625e-05, | |
| "loss": 0.3463184595108032, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.26540284360189575, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 4.89826190802705e-05, | |
| "loss": 0.3099159479141235, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2843601895734597, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 4.8400340382046866e-05, | |
| "loss": 0.29767818450927735, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3014218009478673, | |
| "eval_text_loss": 1.2909588813781738, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 4.8675, | |
| "eval_text_samples_per_second": 12.327, | |
| "eval_text_steps_per_second": 12.327, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3014218009478673, | |
| "eval_audio_loss": 0.7182490825653076, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 19.9605, | |
| "eval_audio_samples_per_second": 6.663, | |
| "eval_audio_steps_per_second": 6.663, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3033175355450237, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.769204171088849e-05, | |
| "loss": 0.3072782039642334, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3222748815165877, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.6861537531795094e-05, | |
| "loss": 0.30300824642181395, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3412322274881517, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.591330043415909e-05, | |
| "loss": 0.3216289758682251, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.36018957345971564, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 4.485243704512474e-05, | |
| "loss": 0.3087095499038696, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3791469194312796, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.368466052840636e-05, | |
| "loss": 0.3263149976730347, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3981042654028436, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 4.2416259816670235e-05, | |
| "loss": 0.29808921813964845, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4018957345971564, | |
| "eval_text_loss": 1.2724202871322632, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 4.8821, | |
| "eval_text_samples_per_second": 12.29, | |
| "eval_text_steps_per_second": 12.29, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4018957345971564, | |
| "eval_audio_loss": 0.7229443788528442, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 20.5668, | |
| "eval_audio_samples_per_second": 6.467, | |
| "eval_audio_steps_per_second": 6.467, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.41706161137440756, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.1054065743175786e-05, | |
| "loss": 0.313119101524353, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.43601895734597157, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.9605414255070396e-05, | |
| "loss": 0.2832280874252319, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4549763033175355, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 3.8078106906448683e-05, | |
| "loss": 0.29762136936187744, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.47393364928909953, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 3.648036884393646e-05, | |
| "loss": 0.3122119665145874, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4928909952606635, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 3.4820804511063496e-05, | |
| "loss": 0.2904823780059814, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5023696682464455, | |
| "eval_text_loss": 1.224345088005066, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 4.548, | |
| "eval_text_samples_per_second": 13.193, | |
| "eval_text_steps_per_second": 13.193, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5023696682464455, | |
| "eval_audio_loss": 0.7233917713165283, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 20.862, | |
| "eval_audio_samples_per_second": 6.375, | |
| "eval_audio_steps_per_second": 6.375, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5118483412322274, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 3.3108351309974284e-05, | |
| "loss": 0.3033822298049927, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5308056872037915, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 3.1352231470026584e-05, | |
| "loss": 0.3055255651473999, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5497630331753555, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 2.956190238248425e-05, | |
| "loss": 0.28981497287750246, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5687203791469194, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 2.7747005668771293e-05, | |
| "loss": 0.32351953983306886, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5876777251184834, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 2.591731525657454e-05, | |
| "loss": 0.2949329614639282, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6028436018957346, | |
| "eval_text_loss": 1.2085834741592407, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 5.1662, | |
| "eval_text_samples_per_second": 11.614, | |
| "eval_text_steps_per_second": 11.614, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6028436018957346, | |
| "eval_audio_loss": 0.7195463180541992, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 20.8146, | |
| "eval_audio_samples_per_second": 6.39, | |
| "eval_audio_steps_per_second": 6.39, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6066350710900474, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 2.4082684743425458e-05, | |
| "loss": 0.28318300247192385, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6255924170616114, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 2.2252994331228713e-05, | |
| "loss": 0.286310601234436, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6445497630331753, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.0438097617515745e-05, | |
| "loss": 0.28706789016723633, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6635071090047393, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.8647768529973425e-05, | |
| "loss": 0.31264851093292234, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6824644549763034, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.6891648690025718e-05, | |
| "loss": 0.2824721813201904, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7014218009478673, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.5179195488936504e-05, | |
| "loss": 0.2849812269210815, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7033175355450237, | |
| "eval_text_loss": 1.1984789371490479, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 4.7136, | |
| "eval_text_samples_per_second": 12.729, | |
| "eval_text_steps_per_second": 12.729, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.7033175355450237, | |
| "eval_audio_loss": 0.719244122505188, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 19.3073, | |
| "eval_audio_samples_per_second": 6.889, | |
| "eval_audio_steps_per_second": 6.889, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.7203791469194313, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 1.3519631156063539e-05, | |
| "loss": 0.30515313148498535, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7393364928909952, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1.1921893093551322e-05, | |
| "loss": 0.298003101348877, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7582938388625592, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 1.0394585744929605e-05, | |
| "loss": 0.29361729621887206, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7772511848341233, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8.945934256824218e-06, | |
| "loss": 0.28779690265655516, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7962085308056872, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 7.583740183329768e-06, | |
| "loss": 0.27545139789581297, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8037914691943128, | |
| "eval_text_loss": 1.195946455001831, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 5.0743, | |
| "eval_text_samples_per_second": 11.824, | |
| "eval_text_steps_per_second": 11.824, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8037914691943128, | |
| "eval_audio_loss": 0.7187640070915222, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 20.0259, | |
| "eval_audio_samples_per_second": 6.641, | |
| "eval_audio_steps_per_second": 6.641, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8151658767772512, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 6.315339471593646e-06, | |
| "loss": 0.2728050947189331, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8341232227488151, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 5.147562954875268e-06, | |
| "loss": 0.3111764907836914, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8530805687203792, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 4.086699565840915e-06, | |
| "loss": 0.26120471954345703, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8720379146919431, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.138462468204914e-06, | |
| "loss": 0.2936476469039917, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8909952606635071, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 2.3079582891115144e-06, | |
| "loss": 0.2918030023574829, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9042654028436019, | |
| "eval_text_loss": 1.1954913139343262, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 5.6311, | |
| "eval_text_samples_per_second": 10.655, | |
| "eval_text_steps_per_second": 10.655, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.9042654028436019, | |
| "eval_audio_loss": 0.7186709642410278, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 21.3981, | |
| "eval_audio_samples_per_second": 6.216, | |
| "eval_audio_steps_per_second": 6.216, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.909952606635071, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.5996596179531364e-06, | |
| "loss": 0.29052374362945554, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9289099526066351, | |
| "grad_norm": 0.625, | |
| "learning_rate": 1.0173809197295075e-06, | |
| "loss": 0.27139732837677, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9478672985781991, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 5.642579926637554e-07, | |
| "loss": 0.29912357330322265, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.966824644549763, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 2.4273108070415607e-07, | |
| "loss": 0.2772815465927124, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.985781990521327, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 5.453173185734073e-08, | |
| "loss": 0.29728262424468993, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_text_loss": 1.196468710899353, | |
| "eval_text_model_preparation_time": 0.0155, | |
| "eval_text_runtime": 4.6934, | |
| "eval_text_samples_per_second": 12.784, | |
| "eval_text_steps_per_second": 12.784, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_audio_loss": 0.7178842425346375, | |
| "eval_audio_model_preparation_time": 0.0155, | |
| "eval_audio_runtime": 20.4901, | |
| "eval_audio_samples_per_second": 6.491, | |
| "eval_audio_steps_per_second": 6.491, | |
| "step": 528 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 528, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 11, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.645359034142144e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |