Instructions to use WhirlwindAI/Translate-25T with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use WhirlwindAI/Translate-25T with Transformers:
# Use a pipeline as a high-level helper # Warning: Pipeline type "translation" is no longer supported in transformers v5. # You must load the model directly (see below) or downgrade to v4.x with: # 'pip install "transformers<5.0.0' from transformers import pipeline pipe = pipeline("translation", model="WhirlwindAI/Translate-25T")# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("WhirlwindAI/Translate-25T") model = AutoModelForSeq2SeqLM.from_pretrained("WhirlwindAI/Translate-25T") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 4455, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06734006734006734, | |
| "grad_norm": 1277.759765625, | |
| "learning_rate": 0.00014849999999999998, | |
| "loss": 306.280546875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13468013468013468, | |
| "grad_norm": 45.35179138183594, | |
| "learning_rate": 0.0002985, | |
| "loss": 116.630830078125, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20202020202020202, | |
| "grad_norm": 38.45261764526367, | |
| "learning_rate": 0.00029301997649823735, | |
| "loss": 81.141572265625, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.26936026936026936, | |
| "grad_norm": 30.12755584716797, | |
| "learning_rate": 0.0002859694477085781, | |
| "loss": 71.679775390625, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3367003367003367, | |
| "grad_norm": 26.099262237548828, | |
| "learning_rate": 0.0002789189189189189, | |
| "loss": 67.6156201171875, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3367003367003367, | |
| "eval_loss": 6.426710605621338, | |
| "eval_runtime": 71.9526, | |
| "eval_samples_per_second": 13.342, | |
| "eval_steps_per_second": 3.336, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 25.531190872192383, | |
| "learning_rate": 0.00027186839012925966, | |
| "loss": 65.3996484375, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4713804713804714, | |
| "grad_norm": 25.456987380981445, | |
| "learning_rate": 0.00026481786133960043, | |
| "loss": 63.7711328125, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5387205387205387, | |
| "grad_norm": 24.871572494506836, | |
| "learning_rate": 0.00025776733254994125, | |
| "loss": 62.25517578125, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 29.645320892333984, | |
| "learning_rate": 0.000250716803760282, | |
| "loss": 60.900390625, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6734006734006734, | |
| "grad_norm": 25.431486129760742, | |
| "learning_rate": 0.00024366627497062276, | |
| "loss": 60.542490234375, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6734006734006734, | |
| "eval_loss": 5.928141117095947, | |
| "eval_runtime": 71.83, | |
| "eval_samples_per_second": 13.365, | |
| "eval_steps_per_second": 3.341, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 23.62899398803711, | |
| "learning_rate": 0.00023661574618096356, | |
| "loss": 59.750263671875, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 18.71799087524414, | |
| "learning_rate": 0.00022956521739130433, | |
| "loss": 58.8070458984375, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8754208754208754, | |
| "grad_norm": 21.216167449951172, | |
| "learning_rate": 0.00022251468860164512, | |
| "loss": 58.904775390625, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9427609427609428, | |
| "grad_norm": 25.631328582763672, | |
| "learning_rate": 0.00021546415981198587, | |
| "loss": 57.6883837890625, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0101010101010102, | |
| "grad_norm": 24.354522705078125, | |
| "learning_rate": 0.00020841363102232666, | |
| "loss": 57.590849609375, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0101010101010102, | |
| "eval_loss": 5.705260276794434, | |
| "eval_runtime": 72.7006, | |
| "eval_samples_per_second": 13.205, | |
| "eval_steps_per_second": 3.301, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0774410774410774, | |
| "grad_norm": 28.926172256469727, | |
| "learning_rate": 0.00020136310223266743, | |
| "loss": 54.9418896484375, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.144781144781145, | |
| "grad_norm": 23.21477508544922, | |
| "learning_rate": 0.00019431257344300823, | |
| "loss": 55.0524072265625, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 22.93968391418457, | |
| "learning_rate": 0.00018726204465334897, | |
| "loss": 54.60087890625, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.2794612794612794, | |
| "grad_norm": 21.029342651367188, | |
| "learning_rate": 0.00018021151586368976, | |
| "loss": 53.961689453125, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3468013468013469, | |
| "grad_norm": 22.02585220336914, | |
| "learning_rate": 0.00017316098707403053, | |
| "loss": 54.352119140625, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3468013468013469, | |
| "eval_loss": 5.576821804046631, | |
| "eval_runtime": 70.7614, | |
| "eval_samples_per_second": 13.567, | |
| "eval_steps_per_second": 3.392, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.4141414141414141, | |
| "grad_norm": 22.914306640625, | |
| "learning_rate": 0.00016611045828437133, | |
| "loss": 53.9832177734375, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 22.693891525268555, | |
| "learning_rate": 0.00015905992949471207, | |
| "loss": 53.846845703125, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.5488215488215489, | |
| "grad_norm": 22.133243560791016, | |
| "learning_rate": 0.00015200940070505287, | |
| "loss": 54.00986328125, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.6161616161616161, | |
| "grad_norm": 23.607688903808594, | |
| "learning_rate": 0.00014495887191539364, | |
| "loss": 53.8654638671875, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6835016835016834, | |
| "grad_norm": 22.71261215209961, | |
| "learning_rate": 0.0001379083431257344, | |
| "loss": 53.5878173828125, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.6835016835016834, | |
| "eval_loss": 5.490618705749512, | |
| "eval_runtime": 72.5632, | |
| "eval_samples_per_second": 13.23, | |
| "eval_steps_per_second": 3.307, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.7508417508417509, | |
| "grad_norm": 19.942703247070312, | |
| "learning_rate": 0.0001308578143360752, | |
| "loss": 52.879384765625, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 20.36492919921875, | |
| "learning_rate": 0.00012380728554641597, | |
| "loss": 52.678017578125, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.8855218855218854, | |
| "grad_norm": 20.429353713989258, | |
| "learning_rate": 0.00011675675675675675, | |
| "loss": 53.1516796875, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9528619528619529, | |
| "grad_norm": 20.908979415893555, | |
| "learning_rate": 0.00010970622796709752, | |
| "loss": 52.3822607421875, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.0202020202020203, | |
| "grad_norm": 24.01288604736328, | |
| "learning_rate": 0.0001026556991774383, | |
| "loss": 51.8819384765625, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.0202020202020203, | |
| "eval_loss": 5.448428153991699, | |
| "eval_runtime": 70.4982, | |
| "eval_samples_per_second": 13.617, | |
| "eval_steps_per_second": 3.404, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.0875420875420874, | |
| "grad_norm": 19.735408782958984, | |
| "learning_rate": 9.560517038777907e-05, | |
| "loss": 51.3095654296875, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.154882154882155, | |
| "grad_norm": 23.40346336364746, | |
| "learning_rate": 8.855464159811985e-05, | |
| "loss": 51.257080078125, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 19.5986385345459, | |
| "learning_rate": 8.150411280846062e-05, | |
| "loss": 50.9421484375, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.28956228956229, | |
| "grad_norm": 22.559755325317383, | |
| "learning_rate": 7.44535840188014e-05, | |
| "loss": 50.7065576171875, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.356902356902357, | |
| "grad_norm": 21.577171325683594, | |
| "learning_rate": 6.740305522914217e-05, | |
| "loss": 51.1507177734375, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.356902356902357, | |
| "eval_loss": 5.395638942718506, | |
| "eval_runtime": 73.4284, | |
| "eval_samples_per_second": 13.074, | |
| "eval_steps_per_second": 3.268, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 29.17755889892578, | |
| "learning_rate": 6.0352526439482956e-05, | |
| "loss": 50.8910009765625, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.4915824915824913, | |
| "grad_norm": 20.923023223876953, | |
| "learning_rate": 5.330199764982373e-05, | |
| "loss": 50.6894189453125, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.558922558922559, | |
| "grad_norm": 18.506322860717773, | |
| "learning_rate": 4.625146886016451e-05, | |
| "loss": 51.0062451171875, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.6262626262626263, | |
| "grad_norm": 22.548017501831055, | |
| "learning_rate": 3.920094007050528e-05, | |
| "loss": 50.7424365234375, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.6936026936026938, | |
| "grad_norm": 22.250715255737305, | |
| "learning_rate": 3.2150411280846066e-05, | |
| "loss": 50.8699951171875, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.6936026936026938, | |
| "eval_loss": 5.3642354011535645, | |
| "eval_runtime": 71.4784, | |
| "eval_samples_per_second": 13.431, | |
| "eval_steps_per_second": 3.358, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.760942760942761, | |
| "grad_norm": 21.673240661621094, | |
| "learning_rate": 2.5099882491186838e-05, | |
| "loss": 50.46708984375, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.8282828282828283, | |
| "grad_norm": 21.410390853881836, | |
| "learning_rate": 1.8049353701527613e-05, | |
| "loss": 50.4184521484375, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.8956228956228958, | |
| "grad_norm": 21.75411033630371, | |
| "learning_rate": 1.0998824911868389e-05, | |
| "loss": 50.68787109375, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 21.319902420043945, | |
| "learning_rate": 3.948296122209165e-06, | |
| "loss": 50.4838671875, | |
| "step": 4400 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 4455, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.714151970521088e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |