Instructions to use WhirlwindAI/Translate-25T with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use WhirlwindAI/Translate-25T with Transformers:
# Use a pipeline as a high-level helper # Warning: Pipeline type "translation" is no longer supported in transformers v5. # You must load the model directly (see below) or downgrade to v4.x with: # 'pip install "transformers<5.0.0' from transformers import pipeline pipe = pipeline("translation", model="WhirlwindAI/Translate-25T")# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("WhirlwindAI/Translate-25T") model = AutoModelForSeq2SeqLM.from_pretrained("WhirlwindAI/Translate-25T") - Notebooks
- Google Colab
- Kaggle
checkpoint-4455
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scaler.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +32 -4
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2225188480
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfe87e2f6d6074d8d86b668507a39edce3e7e72b9348c1748c61c17b5edcdb2c
|
| 3 |
size 2225188480
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4450498267
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ffeb7ace51550edaa0d04986ab4cb9c1fcaed2503bc0e748aaf54df166ad8e6
|
| 3 |
size 4450498267
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6538b00e91d4c4de4a0c17d2634d57eabc754fc7df85477b8c6ecad7f0cf41a3
|
| 3 |
size 14645
|
scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4bcb7279143a7253dbdae3618e4ac776d6e4a1395b9ed9fcb9bc00d72d1520b
|
| 3 |
size 1383
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a24e34950d39b6dd56b12cce4107aa557148473c4972111b61f5d0bfe94716d
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -352,6 +352,34 @@
|
|
| 352 |
"eval_samples_per_second": 13.431,
|
| 353 |
"eval_steps_per_second": 3.358,
|
| 354 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
}
|
| 356 |
],
|
| 357 |
"logging_steps": 100,
|
|
@@ -366,12 +394,12 @@
|
|
| 366 |
"should_evaluate": false,
|
| 367 |
"should_log": false,
|
| 368 |
"should_save": true,
|
| 369 |
-
"should_training_stop":
|
| 370 |
},
|
| 371 |
"attributes": {}
|
| 372 |
}
|
| 373 |
},
|
| 374 |
-
"total_flos": 1.
|
| 375 |
"train_batch_size": 8,
|
| 376 |
"trial_name": null,
|
| 377 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 4455,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 352 |
"eval_samples_per_second": 13.431,
|
| 353 |
"eval_steps_per_second": 3.358,
|
| 354 |
"step": 4000
|
| 355 |
+
},
|
| 356 |
+
{
|
| 357 |
+
"epoch": 2.760942760942761,
|
| 358 |
+
"grad_norm": 21.673240661621094,
|
| 359 |
+
"learning_rate": 2.5099882491186838e-05,
|
| 360 |
+
"loss": 50.46708984375,
|
| 361 |
+
"step": 4100
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"epoch": 2.8282828282828283,
|
| 365 |
+
"grad_norm": 21.410390853881836,
|
| 366 |
+
"learning_rate": 1.8049353701527613e-05,
|
| 367 |
+
"loss": 50.4184521484375,
|
| 368 |
+
"step": 4200
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"epoch": 2.8956228956228958,
|
| 372 |
+
"grad_norm": 21.75411033630371,
|
| 373 |
+
"learning_rate": 1.0998824911868389e-05,
|
| 374 |
+
"loss": 50.68787109375,
|
| 375 |
+
"step": 4300
|
| 376 |
+
},
|
| 377 |
+
{
|
| 378 |
+
"epoch": 2.962962962962963,
|
| 379 |
+
"grad_norm": 21.319902420043945,
|
| 380 |
+
"learning_rate": 3.948296122209165e-06,
|
| 381 |
+
"loss": 50.4838671875,
|
| 382 |
+
"step": 4400
|
| 383 |
}
|
| 384 |
],
|
| 385 |
"logging_steps": 100,
|
|
|
|
| 394 |
"should_evaluate": false,
|
| 395 |
"should_log": false,
|
| 396 |
"should_save": true,
|
| 397 |
+
"should_training_stop": true
|
| 398 |
},
|
| 399 |
"attributes": {}
|
| 400 |
}
|
| 401 |
},
|
| 402 |
+
"total_flos": 1.714151970521088e+16,
|
| 403 |
"train_batch_size": 8,
|
| 404 |
"trial_name": null,
|
| 405 |
"trial_params": null
|