diff --git a/byt5_v2_6gb/README.md b/byt5_v2_6gb/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74a085edb5dadbe42257056337b27aa2e1fb5dd5 --- /dev/null +++ b/byt5_v2_6gb/README.md @@ -0,0 +1,206 @@ +--- +base_model: google/byt5-small +library_name: peft +tags: +- base_model:adapter:google/byt5-small +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/byt5_v2_6gb/adapter_config.json b/byt5_v2_6gb/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6bfa1bc7d6e17490e6535bad18b0eb9abf2414 --- /dev/null +++ b/byt5_v2_6gb/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/byt5-small", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v", + "q" + ], + "target_parameters": null, + "task_type": "SEQ_2_SEQ_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/byt5_v2_6gb/adapter_model.safetensors b/byt5_v2_6gb/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6994cb0da316798fc781930d5505c1139f9d4c4 --- /dev/null +++ b/byt5_v2_6gb/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0864f9e7b1f1f4f31a0fe3d19c160d5d275db4255a42506fcd4aaf9a9dd0ac95 +size 2386768 diff --git a/byt5_v2_6gb/added_tokens.json b/byt5_v2_6gb/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..93c190b5690dd55aac16723222a9909e2be0faec --- /dev/null +++ b/byt5_v2_6gb/added_tokens.json @@ -0,0 +1,127 @@ +{ + "": 259, + "": 359, + "": 360, + "": 361, + "": 362, + "": 363, + "": 364, + "": 365, + "": 366, + "": 367, + "": 368, + "": 269, + "": 369, + "": 370, + "": 371, + "": 372, + "": 373, + "": 374, + "": 375, + "": 376, + "": 377, + "": 378, + "": 270, + "": 379, + "": 380, + "": 381, + "": 382, + "": 383, + "": 271, + "": 272, + "": 273, + "": 274, + "": 275, + "": 276, + "": 277, + "": 278, + "": 260, + "": 279, + "": 280, + "": 281, + "": 282, + "": 283, + "": 284, + "": 285, + "": 286, + "": 287, + "": 288, + "": 261, + "": 289, + "": 290, + "": 291, + "": 292, + "": 293, + "": 294, + "": 295, + "": 296, + "": 297, + "": 298, + "": 262, + "": 299, + "": 300, + "": 301, + "": 302, + "": 303, + "": 304, + "": 305, + "": 306, + "": 307, + "": 308, + "": 263, + "": 309, + "": 310, + "": 311, + "": 312, + "": 313, + "": 314, + "": 315, + "": 316, + "": 317, + "": 318, + "": 264, + "": 319, + "": 320, + "": 321, + "": 322, + "": 323, + "": 324, + "": 325, + "": 326, + "": 327, + "": 328, + "": 265, + "": 329, + "": 330, + "": 331, + "": 332, + "": 333, + "": 334, + "": 335, + "": 336, + "": 337, + "": 338, + "": 266, + "": 339, + "": 340, + "": 341, + "": 342, + "": 343, + "": 344, + "": 345, + "": 346, + "": 347, + "": 348, + "": 267, + "": 349, + "": 350, + "": 351, + "": 352, + "": 353, + "": 354, + "": 355, + "": 356, + "": 357, + "": 358, + "": 268 +} diff --git a/byt5_v2_6gb/checkpoint-172/README.md b/byt5_v2_6gb/checkpoint-172/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74a085edb5dadbe42257056337b27aa2e1fb5dd5 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/README.md @@ -0,0 +1,206 @@ +--- +base_model: google/byt5-small +library_name: peft +tags: +- base_model:adapter:google/byt5-small +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-172/adapter_config.json b/byt5_v2_6gb/checkpoint-172/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6bfa1bc7d6e17490e6535bad18b0eb9abf2414 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/byt5-small", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v", + "q" + ], + "target_parameters": null, + "task_type": "SEQ_2_SEQ_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-172/adapter_model.safetensors b/byt5_v2_6gb/checkpoint-172/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6994cb0da316798fc781930d5505c1139f9d4c4 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0864f9e7b1f1f4f31a0fe3d19c160d5d275db4255a42506fcd4aaf9a9dd0ac95 +size 2386768 diff --git a/byt5_v2_6gb/checkpoint-172/added_tokens.json b/byt5_v2_6gb/checkpoint-172/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..93c190b5690dd55aac16723222a9909e2be0faec --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/added_tokens.json @@ -0,0 +1,127 @@ +{ + "": 259, + "": 359, + "": 360, + "": 361, + "": 362, + "": 363, + "": 364, + "": 365, + "": 366, + "": 367, + "": 368, + "": 269, + "": 369, + "": 370, + "": 371, + "": 372, + "": 373, + "": 374, + "": 375, + "": 376, + "": 377, + "": 378, + "": 270, + "": 379, + "": 380, + "": 381, + "": 382, + "": 383, + "": 271, + "": 272, + "": 273, + "": 274, + "": 275, + "": 276, + "": 277, + "": 278, + "": 260, + "": 279, + "": 280, + "": 281, + "": 282, + "": 283, + "": 284, + "": 285, + "": 286, + "": 287, + "": 288, + "": 261, + "": 289, + "": 290, + "": 291, + "": 292, + "": 293, + "": 294, + "": 295, + "": 296, + "": 297, + "": 298, + "": 262, + "": 299, + "": 300, + "": 301, + "": 302, + "": 303, + "": 304, + "": 305, + "": 306, + "": 307, + "": 308, + "": 263, + "": 309, + "": 310, + "": 311, + "": 312, + "": 313, + "": 314, + "": 315, + "": 316, + "": 317, + "": 318, + "": 264, + "": 319, + "": 320, + "": 321, + "": 322, + "": 323, + "": 324, + "": 325, + "": 326, + "": 327, + "": 328, + "": 265, + "": 329, + "": 330, + "": 331, + "": 332, + "": 333, + "": 334, + "": 335, + "": 336, + "": 337, + "": 338, + "": 266, + "": 339, + "": 340, + "": 341, + "": 342, + "": 343, + "": 344, + "": 345, + "": 346, + "": 347, + "": 348, + "": 267, + "": 349, + "": 350, + "": 351, + "": 352, + "": 353, + "": 354, + "": 355, + "": 356, + "": 357, + "": 358, + "": 268 +} diff --git a/byt5_v2_6gb/checkpoint-172/optimizer.pt b/byt5_v2_6gb/checkpoint-172/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0748001d6364b1b99c821965a299d078951b8564 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd8491034c96271e9a0214d88de4bfd4f52ffca9060ea578fb896e949c7b816a +size 4820363 diff --git a/byt5_v2_6gb/checkpoint-172/rng_state.pth b/byt5_v2_6gb/checkpoint-172/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6184acff858d414f6f5518910bd501f2a82a0d4 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a170183755ef8df761929183d87f5a3de5376c6ff8316dacd621f11549d8ae2a +size 14645 diff --git a/byt5_v2_6gb/checkpoint-172/scaler.pt b/byt5_v2_6gb/checkpoint-172/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac4d0604c3a7ed5ca82722253e93067dca857bd --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9779a733270277f15e820d84d3dfdfb3a66fd96b857f3f0109ac7f2b54244d67 +size 1383 diff --git a/byt5_v2_6gb/checkpoint-172/scheduler.pt b/byt5_v2_6gb/checkpoint-172/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..441b5bb2996efcfc62095a43be1058f1c4fad54e --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2af583046aea4350b7b39c8c9cedd88ab619b78382a20d4dd98dabac4eaeaf +size 1465 diff --git a/byt5_v2_6gb/checkpoint-172/tokenizer_config.json b/byt5_v2_6gb/checkpoint-172/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96da6801779a232712edf0b6760664675cffbc24 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/tokenizer_config.json @@ -0,0 +1,1290 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "backend": "custom", + "eos_token": "", + "extra_ids": 0, + "extra_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "is_local": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "ByT5Tokenizer", + "unk_token": "" +} diff --git a/byt5_v2_6gb/checkpoint-172/trainer_state.json b/byt5_v2_6gb/checkpoint-172/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f0007f42403eb7975fb5c9261332c394d7f289da --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/trainer_state.json @@ -0,0 +1,161 @@ +{ + "best_global_step": null, + "best_metric": Infinity, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 172, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.058351568198395334, + "grad_norm": NaN, + "learning_rate": 0.00019790697674418605, + "loss": 891140376730009.6, + "step": 10 + }, + { + "epoch": 0.11670313639679067, + "grad_norm": NaN, + "learning_rate": 0.00019558139534883723, + "loss": 4850718867456.0, + "step": 20 + }, + { + "epoch": 0.175054704595186, + "grad_norm": NaN, + "learning_rate": 0.00019325581395348838, + "loss": 3252219491123.2, + "step": 30 + }, + { + "epoch": 0.23340627279358134, + "grad_norm": NaN, + "learning_rate": 0.00019093023255813956, + "loss": 21253524684.8, + "step": 40 + }, + { + "epoch": 0.29175784099197666, + "grad_norm": NaN, + "learning_rate": 0.00018860465116279072, + "loss": 1242686510071808.0, + "step": 50 + }, + { + "epoch": 0.350109409190372, + "grad_norm": NaN, + "learning_rate": 0.00018627906976744187, + "loss": 87104688226304.0, + "step": 60 + }, + { + "epoch": 0.4084609773887673, + "grad_norm": NaN, + "learning_rate": 0.00018395348837209303, + "loss": 1.3086946598531892e+16, + "step": 70 + }, + { + "epoch": 0.4668125455871627, + "grad_norm": NaN, + "learning_rate": 0.00018162790697674418, + "loss": 14105561792512.0, + "step": 80 + }, + { + "epoch": 0.5251641137855579, + "grad_norm": NaN, + "learning_rate": 0.00017930232558139534, + "loss": 204642777.6, + "step": 90 + }, + { + "epoch": 0.5835156819839533, + "grad_norm": NaN, + "learning_rate": 0.00017697674418604652, + "loss": 268920008841625.6, + "step": 100 + }, + { + "epoch": 0.6418672501823487, + "grad_norm": NaN, + "learning_rate": 0.00017465116279069768, + "loss": 165014200437964.8, + "step": 110 + }, + { + "epoch": 0.700218818380744, + "grad_norm": NaN, + "learning_rate": 0.00017232558139534886, + "loss": 341466650404454.4, + "step": 120 + }, + { + "epoch": 0.7585703865791393, + "grad_norm": NaN, + "learning_rate": 0.00017, + "loss": 1138850092233523.2, + "step": 130 + }, + { + "epoch": 0.8169219547775346, + "grad_norm": NaN, + "learning_rate": 0.00016767441860465117, + "loss": 92024340480.0, + "step": 140 + }, + { + "epoch": 0.87527352297593, + "grad_norm": NaN, + "learning_rate": 0.00016534883720930235, + "loss": 78592881773772.8, + "step": 150 + }, + { + "epoch": 0.9336250911743253, + "grad_norm": NaN, + "learning_rate": 0.0001630232558139535, + "loss": 12534172509798.4, + "step": 160 + }, + { + "epoch": 0.9919766593727206, + "grad_norm": NaN, + "learning_rate": 0.00016069767441860466, + "loss": 186510766768128.0, + "step": 170 + }, + { + "epoch": 1.0, + "eval_loss": NaN, + "eval_runtime": 4.1714, + "eval_samples_per_second": 146.234, + "eval_steps_per_second": 18.459, + "step": 172 + } + ], + "logging_steps": 10, + "max_steps": 860, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1262107090354176.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/byt5_v2_6gb/checkpoint-172/training_args.bin b/byt5_v2_6gb/checkpoint-172/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6c157e8fe71b3035765545ee7a917ab96243a14 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-172/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128ab4c2243d9abce6d1e9c5e2a60d990f973437b7aff444644a9237e7479025 +size 5329 diff --git a/byt5_v2_6gb/checkpoint-344/README.md b/byt5_v2_6gb/checkpoint-344/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74a085edb5dadbe42257056337b27aa2e1fb5dd5 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/README.md @@ -0,0 +1,206 @@ +--- +base_model: google/byt5-small +library_name: peft +tags: +- base_model:adapter:google/byt5-small +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-344/adapter_config.json b/byt5_v2_6gb/checkpoint-344/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6bfa1bc7d6e17490e6535bad18b0eb9abf2414 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/byt5-small", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v", + "q" + ], + "target_parameters": null, + "task_type": "SEQ_2_SEQ_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-344/adapter_model.safetensors b/byt5_v2_6gb/checkpoint-344/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6994cb0da316798fc781930d5505c1139f9d4c4 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0864f9e7b1f1f4f31a0fe3d19c160d5d275db4255a42506fcd4aaf9a9dd0ac95 +size 2386768 diff --git a/byt5_v2_6gb/checkpoint-344/added_tokens.json b/byt5_v2_6gb/checkpoint-344/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..93c190b5690dd55aac16723222a9909e2be0faec --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/added_tokens.json @@ -0,0 +1,127 @@ +{ + "": 259, + "": 359, + "": 360, + "": 361, + "": 362, + "": 363, + "": 364, + "": 365, + "": 366, + "": 367, + "": 368, + "": 269, + "": 369, + "": 370, + "": 371, + "": 372, + "": 373, + "": 374, + "": 375, + "": 376, + "": 377, + "": 378, + "": 270, + "": 379, + "": 380, + "": 381, + "": 382, + "": 383, + "": 271, + "": 272, + "": 273, + "": 274, + "": 275, + "": 276, + "": 277, + "": 278, + "": 260, + "": 279, + "": 280, + "": 281, + "": 282, + "": 283, + "": 284, + "": 285, + "": 286, + "": 287, + "": 288, + "": 261, + "": 289, + "": 290, + "": 291, + "": 292, + "": 293, + "": 294, + "": 295, + "": 296, + "": 297, + "": 298, + "": 262, + "": 299, + "": 300, + "": 301, + "": 302, + "": 303, + "": 304, + "": 305, + "": 306, + "": 307, + "": 308, + "": 263, + "": 309, + "": 310, + "": 311, + "": 312, + "": 313, + "": 314, + "": 315, + "": 316, + "": 317, + "": 318, + "": 264, + "": 319, + "": 320, + "": 321, + "": 322, + "": 323, + "": 324, + "": 325, + "": 326, + "": 327, + "": 328, + "": 265, + "": 329, + "": 330, + "": 331, + "": 332, + "": 333, + "": 334, + "": 335, + "": 336, + "": 337, + "": 338, + "": 266, + "": 339, + "": 340, + "": 341, + "": 342, + "": 343, + "": 344, + "": 345, + "": 346, + "": 347, + "": 348, + "": 267, + "": 349, + "": 350, + "": 351, + "": 352, + "": 353, + "": 354, + "": 355, + "": 356, + "": 357, + "": 358, + "": 268 +} diff --git a/byt5_v2_6gb/checkpoint-344/optimizer.pt b/byt5_v2_6gb/checkpoint-344/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f2db1901b314f5b5d2640b98fc240957ef24697 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9ebdbc9ce8212d4591746a93b259aa7f3a5df9ca3c69ae85ad09b6ceedad5e +size 4820363 diff --git a/byt5_v2_6gb/checkpoint-344/rng_state.pth b/byt5_v2_6gb/checkpoint-344/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bca5017b37f94551eef6c6d3d5d4ea43ae8256e3 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa476bb470aade528c0a25b0ea6aa58e4c3f7e528c42ca564850414c43c2d74 +size 14645 diff --git a/byt5_v2_6gb/checkpoint-344/scaler.pt b/byt5_v2_6gb/checkpoint-344/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac4d0604c3a7ed5ca82722253e93067dca857bd --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9779a733270277f15e820d84d3dfdfb3a66fd96b857f3f0109ac7f2b54244d67 +size 1383 diff --git a/byt5_v2_6gb/checkpoint-344/scheduler.pt b/byt5_v2_6gb/checkpoint-344/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47306ddc0541f2760e0c85ec395754598c3d7695 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25dd443bcbe48cbdf285ad1effe513b1f92290a721e041dbff56d727d54f4d1 +size 1465 diff --git a/byt5_v2_6gb/checkpoint-344/tokenizer_config.json b/byt5_v2_6gb/checkpoint-344/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96da6801779a232712edf0b6760664675cffbc24 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/tokenizer_config.json @@ -0,0 +1,1290 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "backend": "custom", + "eos_token": "", + "extra_ids": 0, + "extra_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "is_local": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "ByT5Tokenizer", + "unk_token": "" +} diff --git a/byt5_v2_6gb/checkpoint-344/trainer_state.json b/byt5_v2_6gb/checkpoint-344/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..04581b69a73582541ed1296d2264dac2172b5cd2 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/trainer_state.json @@ -0,0 +1,288 @@ +{ + "best_global_step": null, + "best_metric": Infinity, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 344, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.058351568198395334, + "grad_norm": NaN, + "learning_rate": 0.00019790697674418605, + "loss": 891140376730009.6, + "step": 10 + }, + { + "epoch": 0.11670313639679067, + "grad_norm": NaN, + "learning_rate": 0.00019558139534883723, + "loss": 4850718867456.0, + "step": 20 + }, + { + "epoch": 0.175054704595186, + "grad_norm": NaN, + "learning_rate": 0.00019325581395348838, + "loss": 3252219491123.2, + "step": 30 + }, + { + "epoch": 0.23340627279358134, + "grad_norm": NaN, + "learning_rate": 0.00019093023255813956, + "loss": 21253524684.8, + "step": 40 + }, + { + "epoch": 0.29175784099197666, + "grad_norm": NaN, + "learning_rate": 0.00018860465116279072, + "loss": 1242686510071808.0, + "step": 50 + }, + { + "epoch": 0.350109409190372, + "grad_norm": NaN, + "learning_rate": 0.00018627906976744187, + "loss": 87104688226304.0, + "step": 60 + }, + { + "epoch": 0.4084609773887673, + "grad_norm": NaN, + "learning_rate": 0.00018395348837209303, + "loss": 1.3086946598531892e+16, + "step": 70 + }, + { + "epoch": 0.4668125455871627, + "grad_norm": NaN, + "learning_rate": 0.00018162790697674418, + "loss": 14105561792512.0, + "step": 80 + }, + { + "epoch": 0.5251641137855579, + "grad_norm": NaN, + "learning_rate": 0.00017930232558139534, + "loss": 204642777.6, + "step": 90 + }, + { + "epoch": 0.5835156819839533, + "grad_norm": NaN, + "learning_rate": 0.00017697674418604652, + "loss": 268920008841625.6, + "step": 100 + }, + { + "epoch": 0.6418672501823487, + "grad_norm": NaN, + "learning_rate": 0.00017465116279069768, + "loss": 165014200437964.8, + "step": 110 + }, + { + "epoch": 0.700218818380744, + "grad_norm": NaN, + "learning_rate": 0.00017232558139534886, + "loss": 341466650404454.4, + "step": 120 + }, + { + "epoch": 0.7585703865791393, + "grad_norm": NaN, + "learning_rate": 0.00017, + "loss": 1138850092233523.2, + "step": 130 + }, + { + "epoch": 0.8169219547775346, + "grad_norm": NaN, + "learning_rate": 0.00016767441860465117, + "loss": 92024340480.0, + "step": 140 + }, + { + "epoch": 0.87527352297593, + "grad_norm": NaN, + "learning_rate": 0.00016534883720930235, + "loss": 78592881773772.8, + "step": 150 + }, + { + "epoch": 0.9336250911743253, + "grad_norm": NaN, + "learning_rate": 0.0001630232558139535, + "loss": 12534172509798.4, + "step": 160 + }, + { + "epoch": 0.9919766593727206, + "grad_norm": NaN, + "learning_rate": 0.00016069767441860466, + "loss": 186510766768128.0, + "step": 170 + }, + { + "epoch": 1.0, + "eval_loss": NaN, + "eval_runtime": 4.1714, + "eval_samples_per_second": 146.234, + "eval_steps_per_second": 18.459, + "step": 172 + }, + { + "epoch": 1.0466812545587163, + "grad_norm": NaN, + "learning_rate": 0.0001583720930232558, + "loss": 1850764532121.6, + "step": 180 + }, + { + "epoch": 1.1050328227571116, + "grad_norm": NaN, + "learning_rate": 0.00015604651162790697, + "loss": 235584067993.6, + "step": 190 + }, + { + "epoch": 1.1633843909555068, + "grad_norm": NaN, + "learning_rate": 0.00015372093023255815, + "loss": 81904845140787.2, + "step": 200 + }, + { + "epoch": 1.2217359591539023, + "grad_norm": NaN, + "learning_rate": 0.0001513953488372093, + "loss": 192153186100838.4, + "step": 210 + }, + { + "epoch": 1.2800875273522976, + "grad_norm": NaN, + "learning_rate": 0.0001490697674418605, + "loss": 1055250916048896.0, + "step": 220 + }, + { + "epoch": 1.3384390955506928, + "grad_norm": NaN, + "learning_rate": 0.00014674418604651164, + "loss": 110138201892454.4, + "step": 230 + }, + { + "epoch": 1.3967906637490883, + "grad_norm": NaN, + "learning_rate": 0.0001444186046511628, + "loss": 32757802598.4, + "step": 240 + }, + { + "epoch": 1.4551422319474836, + "grad_norm": NaN, + "learning_rate": 0.00014209302325581395, + "loss": 4169762986367385.5, + "step": 250 + }, + { + "epoch": 1.5134938001458789, + "grad_norm": NaN, + "learning_rate": 0.00013976744186046513, + "loss": 15430475776.0, + "step": 260 + }, + { + "epoch": 1.5718453683442744, + "grad_norm": NaN, + "learning_rate": 0.0001374418604651163, + "loss": 3311464452875878.5, + "step": 270 + }, + { + "epoch": 1.6301969365426696, + "grad_norm": NaN, + "learning_rate": 0.00013511627906976744, + "loss": 1962224958151065.5, + "step": 280 + }, + { + "epoch": 1.6885485047410649, + "grad_norm": NaN, + "learning_rate": 0.0001327906976744186, + "loss": 73588399302246.4, + "step": 290 + }, + { + "epoch": 1.7469000729394604, + "grad_norm": NaN, + "learning_rate": 0.00013046511627906975, + "loss": 156729173606.4, + "step": 300 + }, + { + "epoch": 1.8052516411378556, + "grad_norm": NaN, + "learning_rate": 0.00012813953488372093, + "loss": 63342667366.4, + "step": 310 + }, + { + "epoch": 1.863603209336251, + "grad_norm": NaN, + "learning_rate": 0.00012581395348837212, + "loss": 40657972363264.0, + "step": 320 + }, + { + "epoch": 1.9219547775346464, + "grad_norm": NaN, + "learning_rate": 0.00012348837209302327, + "loss": 3779488592691.2, + "step": 330 + }, + { + "epoch": 1.9803063457330414, + "grad_norm": NaN, + "learning_rate": 0.00012116279069767443, + "loss": 4767075637657.6, + "step": 340 + }, + { + "epoch": 2.0, + "eval_loss": NaN, + "eval_runtime": 4.0671, + "eval_samples_per_second": 149.985, + "eval_steps_per_second": 18.933, + "step": 344 + } + ], + "logging_steps": 10, + "max_steps": 860, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2524214180708352.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/byt5_v2_6gb/checkpoint-344/training_args.bin b/byt5_v2_6gb/checkpoint-344/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6c157e8fe71b3035765545ee7a917ab96243a14 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-344/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128ab4c2243d9abce6d1e9c5e2a60d990f973437b7aff444644a9237e7479025 +size 5329 diff --git a/byt5_v2_6gb/checkpoint-516/README.md b/byt5_v2_6gb/checkpoint-516/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74a085edb5dadbe42257056337b27aa2e1fb5dd5 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/README.md @@ -0,0 +1,206 @@ +--- +base_model: google/byt5-small +library_name: peft +tags: +- base_model:adapter:google/byt5-small +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-516/adapter_config.json b/byt5_v2_6gb/checkpoint-516/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6bfa1bc7d6e17490e6535bad18b0eb9abf2414 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/byt5-small", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v", + "q" + ], + "target_parameters": null, + "task_type": "SEQ_2_SEQ_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-516/adapter_model.safetensors b/byt5_v2_6gb/checkpoint-516/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6994cb0da316798fc781930d5505c1139f9d4c4 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0864f9e7b1f1f4f31a0fe3d19c160d5d275db4255a42506fcd4aaf9a9dd0ac95 +size 2386768 diff --git a/byt5_v2_6gb/checkpoint-516/added_tokens.json b/byt5_v2_6gb/checkpoint-516/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..93c190b5690dd55aac16723222a9909e2be0faec --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/added_tokens.json @@ -0,0 +1,127 @@ +{ + "": 259, + "": 359, + "": 360, + "": 361, + "": 362, + "": 363, + "": 364, + "": 365, + "": 366, + "": 367, + "": 368, + "": 269, + "": 369, + "": 370, + "": 371, + "": 372, + "": 373, + "": 374, + "": 375, + "": 376, + "": 377, + "": 378, + "": 270, + "": 379, + "": 380, + "": 381, + "": 382, + "": 383, + "": 271, + "": 272, + "": 273, + "": 274, + "": 275, + "": 276, + "": 277, + "": 278, + "": 260, + "": 279, + "": 280, + "": 281, + "": 282, + "": 283, + "": 284, + "": 285, + "": 286, + "": 287, + "": 288, + "": 261, + "": 289, + "": 290, + "": 291, + "": 292, + "": 293, + "": 294, + "": 295, + "": 296, + "": 297, + "": 298, + "": 262, + "": 299, + "": 300, + "": 301, + "": 302, + "": 303, + "": 304, + "": 305, + "": 306, + "": 307, + "": 308, + "": 263, + "": 309, + "": 310, + "": 311, + "": 312, + "": 313, + "": 314, + "": 315, + "": 316, + "": 317, + "": 318, + "": 264, + "": 319, + "": 320, + "": 321, + "": 322, + "": 323, + "": 324, + "": 325, + "": 326, + "": 327, + "": 328, + "": 265, + "": 329, + "": 330, + "": 331, + "": 332, + "": 333, + "": 334, + "": 335, + "": 336, + "": 337, + "": 338, + "": 266, + "": 339, + "": 340, + "": 341, + "": 342, + "": 343, + "": 344, + "": 345, + "": 346, + "": 347, + "": 348, + "": 267, + "": 349, + "": 350, + "": 351, + "": 352, + "": 353, + "": 354, + "": 355, + "": 356, + "": 357, + "": 358, + "": 268 +} diff --git a/byt5_v2_6gb/checkpoint-516/optimizer.pt b/byt5_v2_6gb/checkpoint-516/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..88fb03be4ba403fb46fc0d39a4becc8cfb0c1a53 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca0b04cfc9d510f9f7277c28a4193412ab9ee1e71560d830019992d45571a66e +size 4820363 diff --git a/byt5_v2_6gb/checkpoint-516/rng_state.pth b/byt5_v2_6gb/checkpoint-516/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a61bd8e2acf76b2afb41aa4c023079805f0b47a9 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e56e00981ac8b8ee72e3811515d9f1891c7f4ec8e78a6ff9d4c7db723d5cd8 +size 14645 diff --git a/byt5_v2_6gb/checkpoint-516/scaler.pt b/byt5_v2_6gb/checkpoint-516/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac4d0604c3a7ed5ca82722253e93067dca857bd --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9779a733270277f15e820d84d3dfdfb3a66fd96b857f3f0109ac7f2b54244d67 +size 1383 diff --git a/byt5_v2_6gb/checkpoint-516/scheduler.pt b/byt5_v2_6gb/checkpoint-516/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb1fb38a4b7d8ed940c27f41483f2252e70732a6 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d67b673f1aa652957b29618f77e6e57eea79804790bee80bb4c63f090e93d14d +size 1465 diff --git a/byt5_v2_6gb/checkpoint-516/tokenizer_config.json b/byt5_v2_6gb/checkpoint-516/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96da6801779a232712edf0b6760664675cffbc24 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/tokenizer_config.json @@ -0,0 +1,1290 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "backend": "custom", + "eos_token": "", + "extra_ids": 0, + "extra_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "is_local": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "ByT5Tokenizer", + "unk_token": "" +} diff --git a/byt5_v2_6gb/checkpoint-516/trainer_state.json b/byt5_v2_6gb/checkpoint-516/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8cf9d712f0516c0e8678137fce9481ed27b3ae7a --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/trainer_state.json @@ -0,0 +1,415 @@ +{ + "best_global_step": null, + "best_metric": Infinity, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 516, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.058351568198395334, + "grad_norm": NaN, + "learning_rate": 0.00019790697674418605, + "loss": 891140376730009.6, + "step": 10 + }, + { + "epoch": 0.11670313639679067, + "grad_norm": NaN, + "learning_rate": 0.00019558139534883723, + "loss": 4850718867456.0, + "step": 20 + }, + { + "epoch": 0.175054704595186, + "grad_norm": NaN, + "learning_rate": 0.00019325581395348838, + "loss": 3252219491123.2, + "step": 30 + }, + { + "epoch": 0.23340627279358134, + "grad_norm": NaN, + "learning_rate": 0.00019093023255813956, + "loss": 21253524684.8, + "step": 40 + }, + { + "epoch": 0.29175784099197666, + "grad_norm": NaN, + "learning_rate": 0.00018860465116279072, + "loss": 1242686510071808.0, + "step": 50 + }, + { + "epoch": 0.350109409190372, + "grad_norm": NaN, + "learning_rate": 0.00018627906976744187, + "loss": 87104688226304.0, + "step": 60 + }, + { + "epoch": 0.4084609773887673, + "grad_norm": NaN, + "learning_rate": 0.00018395348837209303, + "loss": 1.3086946598531892e+16, + "step": 70 + }, + { + "epoch": 0.4668125455871627, + "grad_norm": NaN, + "learning_rate": 0.00018162790697674418, + "loss": 14105561792512.0, + "step": 80 + }, + { + "epoch": 0.5251641137855579, + "grad_norm": NaN, + "learning_rate": 0.00017930232558139534, + "loss": 204642777.6, + "step": 90 + }, + { + "epoch": 0.5835156819839533, + "grad_norm": NaN, + "learning_rate": 0.00017697674418604652, + "loss": 268920008841625.6, + "step": 100 + }, + { + "epoch": 0.6418672501823487, + "grad_norm": NaN, + "learning_rate": 0.00017465116279069768, + "loss": 165014200437964.8, + "step": 110 + }, + { + "epoch": 0.700218818380744, + "grad_norm": NaN, + "learning_rate": 0.00017232558139534886, + "loss": 341466650404454.4, + "step": 120 + }, + { + "epoch": 0.7585703865791393, + "grad_norm": NaN, + "learning_rate": 0.00017, + "loss": 1138850092233523.2, + "step": 130 + }, + { + "epoch": 0.8169219547775346, + "grad_norm": NaN, + "learning_rate": 0.00016767441860465117, + "loss": 92024340480.0, + "step": 140 + }, + { + "epoch": 0.87527352297593, + "grad_norm": NaN, + "learning_rate": 0.00016534883720930235, + "loss": 78592881773772.8, + "step": 150 + }, + { + "epoch": 0.9336250911743253, + "grad_norm": NaN, + "learning_rate": 0.0001630232558139535, + "loss": 12534172509798.4, + "step": 160 + }, + { + "epoch": 0.9919766593727206, + "grad_norm": NaN, + "learning_rate": 0.00016069767441860466, + "loss": 186510766768128.0, + "step": 170 + }, + { + "epoch": 1.0, + "eval_loss": NaN, + "eval_runtime": 4.1714, + "eval_samples_per_second": 146.234, + "eval_steps_per_second": 18.459, + "step": 172 + }, + { + "epoch": 1.0466812545587163, + "grad_norm": NaN, + "learning_rate": 0.0001583720930232558, + "loss": 1850764532121.6, + "step": 180 + }, + { + "epoch": 1.1050328227571116, + "grad_norm": NaN, + "learning_rate": 0.00015604651162790697, + "loss": 235584067993.6, + "step": 190 + }, + { + "epoch": 1.1633843909555068, + "grad_norm": NaN, + "learning_rate": 0.00015372093023255815, + "loss": 81904845140787.2, + "step": 200 + }, + { + "epoch": 1.2217359591539023, + "grad_norm": NaN, + "learning_rate": 0.0001513953488372093, + "loss": 192153186100838.4, + "step": 210 + }, + { + "epoch": 1.2800875273522976, + "grad_norm": NaN, + "learning_rate": 0.0001490697674418605, + "loss": 1055250916048896.0, + "step": 220 + }, + { + "epoch": 1.3384390955506928, + "grad_norm": NaN, + "learning_rate": 0.00014674418604651164, + "loss": 110138201892454.4, + "step": 230 + }, + { + "epoch": 1.3967906637490883, + "grad_norm": NaN, + "learning_rate": 0.0001444186046511628, + "loss": 32757802598.4, + "step": 240 + }, + { + "epoch": 1.4551422319474836, + "grad_norm": NaN, + "learning_rate": 0.00014209302325581395, + "loss": 4169762986367385.5, + "step": 250 + }, + { + "epoch": 1.5134938001458789, + "grad_norm": NaN, + "learning_rate": 0.00013976744186046513, + "loss": 15430475776.0, + "step": 260 + }, + { + "epoch": 1.5718453683442744, + "grad_norm": NaN, + "learning_rate": 0.0001374418604651163, + "loss": 3311464452875878.5, + "step": 270 + }, + { + "epoch": 1.6301969365426696, + "grad_norm": NaN, + "learning_rate": 0.00013511627906976744, + "loss": 1962224958151065.5, + "step": 280 + }, + { + "epoch": 1.6885485047410649, + "grad_norm": NaN, + "learning_rate": 0.0001327906976744186, + "loss": 73588399302246.4, + "step": 290 + }, + { + "epoch": 1.7469000729394604, + "grad_norm": NaN, + "learning_rate": 0.00013046511627906975, + "loss": 156729173606.4, + "step": 300 + }, + { + "epoch": 1.8052516411378556, + "grad_norm": NaN, + "learning_rate": 0.00012813953488372093, + "loss": 63342667366.4, + "step": 310 + }, + { + "epoch": 1.863603209336251, + "grad_norm": NaN, + "learning_rate": 0.00012581395348837212, + "loss": 40657972363264.0, + "step": 320 + }, + { + "epoch": 1.9219547775346464, + "grad_norm": NaN, + "learning_rate": 0.00012348837209302327, + "loss": 3779488592691.2, + "step": 330 + }, + { + "epoch": 1.9803063457330414, + "grad_norm": NaN, + "learning_rate": 0.00012116279069767443, + "loss": 4767075637657.6, + "step": 340 + }, + { + "epoch": 2.0, + "eval_loss": NaN, + "eval_runtime": 4.0671, + "eval_samples_per_second": 149.985, + "eval_steps_per_second": 18.933, + "step": 344 + }, + { + "epoch": 2.035010940919037, + "grad_norm": NaN, + "learning_rate": 0.00011883720930232558, + "loss": 155492311629824.0, + "step": 350 + }, + { + "epoch": 2.0933625091174326, + "grad_norm": NaN, + "learning_rate": 0.00011651162790697674, + "loss": 111986648442470.4, + "step": 360 + }, + { + "epoch": 2.1517140773158276, + "grad_norm": NaN, + "learning_rate": 0.00011418604651162792, + "loss": 156752150528.0, + "step": 370 + }, + { + "epoch": 2.210065645514223, + "grad_norm": NaN, + "learning_rate": 0.00011186046511627907, + "loss": 5171589120.0, + "step": 380 + }, + { + "epoch": 2.2684172137126186, + "grad_norm": NaN, + "learning_rate": 0.00010953488372093024, + "loss": 3302979744982630.5, + "step": 390 + }, + { + "epoch": 2.3267687819110137, + "grad_norm": NaN, + "learning_rate": 0.0001072093023255814, + "loss": 3799967334.4, + "step": 400 + }, + { + "epoch": 2.385120350109409, + "grad_norm": NaN, + "learning_rate": 0.00010488372093023255, + "loss": 674909818716160.0, + "step": 410 + }, + { + "epoch": 2.4434719183078046, + "grad_norm": NaN, + "learning_rate": 0.00010255813953488373, + "loss": 1310430384986521.5, + "step": 420 + }, + { + "epoch": 2.5018234865061997, + "grad_norm": NaN, + "learning_rate": 0.0001002325581395349, + "loss": 104982459187.2, + "step": 430 + }, + { + "epoch": 2.560175054704595, + "grad_norm": NaN, + "learning_rate": 9.790697674418605e-05, + "loss": 318152225744486.4, + "step": 440 + }, + { + "epoch": 2.6185266229029907, + "grad_norm": NaN, + "learning_rate": 9.558139534883721e-05, + "loss": 422663597693337.6, + "step": 450 + }, + { + "epoch": 2.6768781911013857, + "grad_norm": NaN, + "learning_rate": 9.325581395348838e-05, + "loss": 592010400078233.6, + "step": 460 + }, + { + "epoch": 2.735229759299781, + "grad_norm": NaN, + "learning_rate": 9.093023255813953e-05, + "loss": 225784900668620.8, + "step": 470 + }, + { + "epoch": 2.7935813274981767, + "grad_norm": NaN, + "learning_rate": 8.86046511627907e-05, + "loss": 22978112716.8, + "step": 480 + }, + { + "epoch": 2.8519328956965717, + "grad_norm": NaN, + "learning_rate": 8.627906976744187e-05, + "loss": 177317523488768.0, + "step": 490 + }, + { + "epoch": 2.910284463894967, + "grad_norm": NaN, + "learning_rate": 8.395348837209302e-05, + "loss": 1558973723443.2, + "step": 500 + }, + { + "epoch": 2.9686360320933627, + "grad_norm": NaN, + "learning_rate": 8.162790697674419e-05, + "loss": 2396209561.6, + "step": 510 + }, + { + "epoch": 3.0, + "eval_loss": NaN, + "eval_runtime": 4.0081, + "eval_samples_per_second": 152.193, + "eval_steps_per_second": 19.211, + "step": 516 + } + ], + "logging_steps": 10, + "max_steps": 860, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3786321271062528.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/byt5_v2_6gb/checkpoint-516/training_args.bin b/byt5_v2_6gb/checkpoint-516/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6c157e8fe71b3035765545ee7a917ab96243a14 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-516/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128ab4c2243d9abce6d1e9c5e2a60d990f973437b7aff444644a9237e7479025 +size 5329 diff --git a/byt5_v2_6gb/checkpoint-688/README.md b/byt5_v2_6gb/checkpoint-688/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74a085edb5dadbe42257056337b27aa2e1fb5dd5 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/README.md @@ -0,0 +1,206 @@ +--- +base_model: google/byt5-small +library_name: peft +tags: +- base_model:adapter:google/byt5-small +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-688/adapter_config.json b/byt5_v2_6gb/checkpoint-688/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6bfa1bc7d6e17490e6535bad18b0eb9abf2414 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/byt5-small", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v", + "q" + ], + "target_parameters": null, + "task_type": "SEQ_2_SEQ_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-688/adapter_model.safetensors b/byt5_v2_6gb/checkpoint-688/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6994cb0da316798fc781930d5505c1139f9d4c4 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0864f9e7b1f1f4f31a0fe3d19c160d5d275db4255a42506fcd4aaf9a9dd0ac95 +size 2386768 diff --git a/byt5_v2_6gb/checkpoint-688/added_tokens.json b/byt5_v2_6gb/checkpoint-688/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..93c190b5690dd55aac16723222a9909e2be0faec --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/added_tokens.json @@ -0,0 +1,127 @@ +{ + "": 259, + "": 359, + "": 360, + "": 361, + "": 362, + "": 363, + "": 364, + "": 365, + "": 366, + "": 367, + "": 368, + "": 269, + "": 369, + "": 370, + "": 371, + "": 372, + "": 373, + "": 374, + "": 375, + "": 376, + "": 377, + "": 378, + "": 270, + "": 379, + "": 380, + "": 381, + "": 382, + "": 383, + "": 271, + "": 272, + "": 273, + "": 274, + "": 275, + "": 276, + "": 277, + "": 278, + "": 260, + "": 279, + "": 280, + "": 281, + "": 282, + "": 283, + "": 284, + "": 285, + "": 286, + "": 287, + "": 288, + "": 261, + "": 289, + "": 290, + "": 291, + "": 292, + "": 293, + "": 294, + "": 295, + "": 296, + "": 297, + "": 298, + "": 262, + "": 299, + "": 300, + "": 301, + "": 302, + "": 303, + "": 304, + "": 305, + "": 306, + "": 307, + "": 308, + "": 263, + "": 309, + "": 310, + "": 311, + "": 312, + "": 313, + "": 314, + "": 315, + "": 316, + "": 317, + "": 318, + "": 264, + "": 319, + "": 320, + "": 321, + "": 322, + "": 323, + "": 324, + "": 325, + "": 326, + "": 327, + "": 328, + "": 265, + "": 329, + "": 330, + "": 331, + "": 332, + "": 333, + "": 334, + "": 335, + "": 336, + "": 337, + "": 338, + "": 266, + "": 339, + "": 340, + "": 341, + "": 342, + "": 343, + "": 344, + "": 345, + "": 346, + "": 347, + "": 348, + "": 267, + "": 349, + "": 350, + "": 351, + "": 352, + "": 353, + "": 354, + "": 355, + "": 356, + "": 357, + "": 358, + "": 268 +} diff --git a/byt5_v2_6gb/checkpoint-688/optimizer.pt b/byt5_v2_6gb/checkpoint-688/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3462319995e92503be4bdd6be6116d2f7fd9f5d9 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d89c5562126a677b12fb22aaacdb089463e29db2cbe597fa9a5cb3612556ae +size 4820363 diff --git a/byt5_v2_6gb/checkpoint-688/rng_state.pth b/byt5_v2_6gb/checkpoint-688/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..88e7ec26d468a0c578bcceced22980427e6cf8e8 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95040b09f9a949e9e1af476ad6672c556a9d8b16350a048bb97b45c416139f0d +size 14645 diff --git a/byt5_v2_6gb/checkpoint-688/scaler.pt b/byt5_v2_6gb/checkpoint-688/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac4d0604c3a7ed5ca82722253e93067dca857bd --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9779a733270277f15e820d84d3dfdfb3a66fd96b857f3f0109ac7f2b54244d67 +size 1383 diff --git a/byt5_v2_6gb/checkpoint-688/scheduler.pt b/byt5_v2_6gb/checkpoint-688/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..91d6fd38b4b6499bd33397bd095ac468cb142f11 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b96c0cac92a1e44cc0062e4863fc4f965b2838a0bf4451f6608b50d8d1a4301e +size 1465 diff --git a/byt5_v2_6gb/checkpoint-688/tokenizer_config.json b/byt5_v2_6gb/checkpoint-688/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96da6801779a232712edf0b6760664675cffbc24 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/tokenizer_config.json @@ -0,0 +1,1290 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "backend": "custom", + "eos_token": "", + "extra_ids": 0, + "extra_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "is_local": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "ByT5Tokenizer", + "unk_token": "" +} diff --git a/byt5_v2_6gb/checkpoint-688/trainer_state.json b/byt5_v2_6gb/checkpoint-688/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a6848913e0aecef959ca50f6082cffdbe0f0187 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/trainer_state.json @@ -0,0 +1,542 @@ +{ + "best_global_step": null, + "best_metric": Infinity, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 688, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.058351568198395334, + "grad_norm": NaN, + "learning_rate": 0.00019790697674418605, + "loss": 891140376730009.6, + "step": 10 + }, + { + "epoch": 0.11670313639679067, + "grad_norm": NaN, + "learning_rate": 0.00019558139534883723, + "loss": 4850718867456.0, + "step": 20 + }, + { + "epoch": 0.175054704595186, + "grad_norm": NaN, + "learning_rate": 0.00019325581395348838, + "loss": 3252219491123.2, + "step": 30 + }, + { + "epoch": 0.23340627279358134, + "grad_norm": NaN, + "learning_rate": 0.00019093023255813956, + "loss": 21253524684.8, + "step": 40 + }, + { + "epoch": 0.29175784099197666, + "grad_norm": NaN, + "learning_rate": 0.00018860465116279072, + "loss": 1242686510071808.0, + "step": 50 + }, + { + "epoch": 0.350109409190372, + "grad_norm": NaN, + "learning_rate": 0.00018627906976744187, + "loss": 87104688226304.0, + "step": 60 + }, + { + "epoch": 0.4084609773887673, + "grad_norm": NaN, + "learning_rate": 0.00018395348837209303, + "loss": 1.3086946598531892e+16, + "step": 70 + }, + { + "epoch": 0.4668125455871627, + "grad_norm": NaN, + "learning_rate": 0.00018162790697674418, + "loss": 14105561792512.0, + "step": 80 + }, + { + "epoch": 0.5251641137855579, + "grad_norm": NaN, + "learning_rate": 0.00017930232558139534, + "loss": 204642777.6, + "step": 90 + }, + { + "epoch": 0.5835156819839533, + "grad_norm": NaN, + "learning_rate": 0.00017697674418604652, + "loss": 268920008841625.6, + "step": 100 + }, + { + "epoch": 0.6418672501823487, + "grad_norm": NaN, + "learning_rate": 0.00017465116279069768, + "loss": 165014200437964.8, + "step": 110 + }, + { + "epoch": 0.700218818380744, + "grad_norm": NaN, + "learning_rate": 0.00017232558139534886, + "loss": 341466650404454.4, + "step": 120 + }, + { + "epoch": 0.7585703865791393, + "grad_norm": NaN, + "learning_rate": 0.00017, + "loss": 1138850092233523.2, + "step": 130 + }, + { + "epoch": 0.8169219547775346, + "grad_norm": NaN, + "learning_rate": 0.00016767441860465117, + "loss": 92024340480.0, + "step": 140 + }, + { + "epoch": 0.87527352297593, + "grad_norm": NaN, + "learning_rate": 0.00016534883720930235, + "loss": 78592881773772.8, + "step": 150 + }, + { + "epoch": 0.9336250911743253, + "grad_norm": NaN, + "learning_rate": 0.0001630232558139535, + "loss": 12534172509798.4, + "step": 160 + }, + { + "epoch": 0.9919766593727206, + "grad_norm": NaN, + "learning_rate": 0.00016069767441860466, + "loss": 186510766768128.0, + "step": 170 + }, + { + "epoch": 1.0, + "eval_loss": NaN, + "eval_runtime": 4.1714, + "eval_samples_per_second": 146.234, + "eval_steps_per_second": 18.459, + "step": 172 + }, + { + "epoch": 1.0466812545587163, + "grad_norm": NaN, + "learning_rate": 0.0001583720930232558, + "loss": 1850764532121.6, + "step": 180 + }, + { + "epoch": 1.1050328227571116, + "grad_norm": NaN, + "learning_rate": 0.00015604651162790697, + "loss": 235584067993.6, + "step": 190 + }, + { + "epoch": 1.1633843909555068, + "grad_norm": NaN, + "learning_rate": 0.00015372093023255815, + "loss": 81904845140787.2, + "step": 200 + }, + { + "epoch": 1.2217359591539023, + "grad_norm": NaN, + "learning_rate": 0.0001513953488372093, + "loss": 192153186100838.4, + "step": 210 + }, + { + "epoch": 1.2800875273522976, + "grad_norm": NaN, + "learning_rate": 0.0001490697674418605, + "loss": 1055250916048896.0, + "step": 220 + }, + { + "epoch": 1.3384390955506928, + "grad_norm": NaN, + "learning_rate": 0.00014674418604651164, + "loss": 110138201892454.4, + "step": 230 + }, + { + "epoch": 1.3967906637490883, + "grad_norm": NaN, + "learning_rate": 0.0001444186046511628, + "loss": 32757802598.4, + "step": 240 + }, + { + "epoch": 1.4551422319474836, + "grad_norm": NaN, + "learning_rate": 0.00014209302325581395, + "loss": 4169762986367385.5, + "step": 250 + }, + { + "epoch": 1.5134938001458789, + "grad_norm": NaN, + "learning_rate": 0.00013976744186046513, + "loss": 15430475776.0, + "step": 260 + }, + { + "epoch": 1.5718453683442744, + "grad_norm": NaN, + "learning_rate": 0.0001374418604651163, + "loss": 3311464452875878.5, + "step": 270 + }, + { + "epoch": 1.6301969365426696, + "grad_norm": NaN, + "learning_rate": 0.00013511627906976744, + "loss": 1962224958151065.5, + "step": 280 + }, + { + "epoch": 1.6885485047410649, + "grad_norm": NaN, + "learning_rate": 0.0001327906976744186, + "loss": 73588399302246.4, + "step": 290 + }, + { + "epoch": 1.7469000729394604, + "grad_norm": NaN, + "learning_rate": 0.00013046511627906975, + "loss": 156729173606.4, + "step": 300 + }, + { + "epoch": 1.8052516411378556, + "grad_norm": NaN, + "learning_rate": 0.00012813953488372093, + "loss": 63342667366.4, + "step": 310 + }, + { + "epoch": 1.863603209336251, + "grad_norm": NaN, + "learning_rate": 0.00012581395348837212, + "loss": 40657972363264.0, + "step": 320 + }, + { + "epoch": 1.9219547775346464, + "grad_norm": NaN, + "learning_rate": 0.00012348837209302327, + "loss": 3779488592691.2, + "step": 330 + }, + { + "epoch": 1.9803063457330414, + "grad_norm": NaN, + "learning_rate": 0.00012116279069767443, + "loss": 4767075637657.6, + "step": 340 + }, + { + "epoch": 2.0, + "eval_loss": NaN, + "eval_runtime": 4.0671, + "eval_samples_per_second": 149.985, + "eval_steps_per_second": 18.933, + "step": 344 + }, + { + "epoch": 2.035010940919037, + "grad_norm": NaN, + "learning_rate": 0.00011883720930232558, + "loss": 155492311629824.0, + "step": 350 + }, + { + "epoch": 2.0933625091174326, + "grad_norm": NaN, + "learning_rate": 0.00011651162790697674, + "loss": 111986648442470.4, + "step": 360 + }, + { + "epoch": 2.1517140773158276, + "grad_norm": NaN, + "learning_rate": 0.00011418604651162792, + "loss": 156752150528.0, + "step": 370 + }, + { + "epoch": 2.210065645514223, + "grad_norm": NaN, + "learning_rate": 0.00011186046511627907, + "loss": 5171589120.0, + "step": 380 + }, + { + "epoch": 2.2684172137126186, + "grad_norm": NaN, + "learning_rate": 0.00010953488372093024, + "loss": 3302979744982630.5, + "step": 390 + }, + { + "epoch": 2.3267687819110137, + "grad_norm": NaN, + "learning_rate": 0.0001072093023255814, + "loss": 3799967334.4, + "step": 400 + }, + { + "epoch": 2.385120350109409, + "grad_norm": NaN, + "learning_rate": 0.00010488372093023255, + "loss": 674909818716160.0, + "step": 410 + }, + { + "epoch": 2.4434719183078046, + "grad_norm": NaN, + "learning_rate": 0.00010255813953488373, + "loss": 1310430384986521.5, + "step": 420 + }, + { + "epoch": 2.5018234865061997, + "grad_norm": NaN, + "learning_rate": 0.0001002325581395349, + "loss": 104982459187.2, + "step": 430 + }, + { + "epoch": 2.560175054704595, + "grad_norm": NaN, + "learning_rate": 9.790697674418605e-05, + "loss": 318152225744486.4, + "step": 440 + }, + { + "epoch": 2.6185266229029907, + "grad_norm": NaN, + "learning_rate": 9.558139534883721e-05, + "loss": 422663597693337.6, + "step": 450 + }, + { + "epoch": 2.6768781911013857, + "grad_norm": NaN, + "learning_rate": 9.325581395348838e-05, + "loss": 592010400078233.6, + "step": 460 + }, + { + "epoch": 2.735229759299781, + "grad_norm": NaN, + "learning_rate": 9.093023255813953e-05, + "loss": 225784900668620.8, + "step": 470 + }, + { + "epoch": 2.7935813274981767, + "grad_norm": NaN, + "learning_rate": 8.86046511627907e-05, + "loss": 22978112716.8, + "step": 480 + }, + { + "epoch": 2.8519328956965717, + "grad_norm": NaN, + "learning_rate": 8.627906976744187e-05, + "loss": 177317523488768.0, + "step": 490 + }, + { + "epoch": 2.910284463894967, + "grad_norm": NaN, + "learning_rate": 8.395348837209302e-05, + "loss": 1558973723443.2, + "step": 500 + }, + { + "epoch": 2.9686360320933627, + "grad_norm": NaN, + "learning_rate": 8.162790697674419e-05, + "loss": 2396209561.6, + "step": 510 + }, + { + "epoch": 3.0, + "eval_loss": NaN, + "eval_runtime": 4.0081, + "eval_samples_per_second": 152.193, + "eval_steps_per_second": 19.211, + "step": 516 + }, + { + "epoch": 3.023340627279358, + "grad_norm": NaN, + "learning_rate": 7.930232558139535e-05, + "loss": 82400054870016.0, + "step": 520 + }, + { + "epoch": 3.0816921954777534, + "grad_norm": NaN, + "learning_rate": 7.697674418604652e-05, + "loss": 1756928000.0, + "step": 530 + }, + { + "epoch": 3.140043763676149, + "grad_norm": NaN, + "learning_rate": 7.465116279069768e-05, + "loss": 540170833100.8, + "step": 540 + }, + { + "epoch": 3.198395331874544, + "grad_norm": NaN, + "learning_rate": 7.232558139534884e-05, + "loss": 772080769433600.0, + "step": 550 + }, + { + "epoch": 3.2567469000729394, + "grad_norm": NaN, + "learning_rate": 7e-05, + "loss": 15627939794124.8, + "step": 560 + }, + { + "epoch": 3.315098468271335, + "grad_norm": NaN, + "learning_rate": 6.767441860465116e-05, + "loss": 1.2879289224737588e+16, + "step": 570 + }, + { + "epoch": 3.37345003646973, + "grad_norm": NaN, + "learning_rate": 6.534883720930233e-05, + "loss": 187756119379148.8, + "step": 580 + }, + { + "epoch": 3.4318016046681254, + "grad_norm": NaN, + "learning_rate": 6.30232558139535e-05, + "loss": 4036548992224461.0, + "step": 590 + }, + { + "epoch": 3.490153172866521, + "grad_norm": NaN, + "learning_rate": 6.0697674418604654e-05, + "loss": 6974699612196045.0, + "step": 600 + }, + { + "epoch": 3.548504741064916, + "grad_norm": NaN, + "learning_rate": 5.8372093023255815e-05, + "loss": 1335860972748.8, + "step": 610 + }, + { + "epoch": 3.6068563092633115, + "grad_norm": NaN, + "learning_rate": 5.6046511627906984e-05, + "loss": 30351064196710.4, + "step": 620 + }, + { + "epoch": 3.6652078774617065, + "grad_norm": NaN, + "learning_rate": 5.3720930232558145e-05, + "loss": 580581492103577.6, + "step": 630 + }, + { + "epoch": 3.723559445660102, + "grad_norm": NaN, + "learning_rate": 5.13953488372093e-05, + "loss": 3143535311821209.5, + "step": 640 + }, + { + "epoch": 3.7819110138584975, + "grad_norm": NaN, + "learning_rate": 4.906976744186046e-05, + "loss": 10500325978931.2, + "step": 650 + }, + { + "epoch": 3.8402625820568925, + "grad_norm": NaN, + "learning_rate": 4.674418604651163e-05, + "loss": 49259429547212.8, + "step": 660 + }, + { + "epoch": 3.898614150255288, + "grad_norm": NaN, + "learning_rate": 4.441860465116279e-05, + "loss": 9668237721.6, + "step": 670 + }, + { + "epoch": 3.9569657184536835, + "grad_norm": NaN, + "learning_rate": 4.209302325581396e-05, + "loss": 622216314512998.4, + "step": 680 + }, + { + "epoch": 4.0, + "eval_loss": NaN, + "eval_runtime": 4.3583, + "eval_samples_per_second": 139.964, + "eval_steps_per_second": 17.668, + "step": 688 + } + ], + "logging_steps": 10, + "max_steps": 860, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5048428361416704.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/byt5_v2_6gb/checkpoint-688/training_args.bin b/byt5_v2_6gb/checkpoint-688/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6c157e8fe71b3035765545ee7a917ab96243a14 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-688/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128ab4c2243d9abce6d1e9c5e2a60d990f973437b7aff444644a9237e7479025 +size 5329 diff --git a/byt5_v2_6gb/checkpoint-860/README.md b/byt5_v2_6gb/checkpoint-860/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74a085edb5dadbe42257056337b27aa2e1fb5dd5 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/README.md @@ -0,0 +1,206 @@ +--- +base_model: google/byt5-small +library_name: peft +tags: +- base_model:adapter:google/byt5-small +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-860/adapter_config.json b/byt5_v2_6gb/checkpoint-860/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6bfa1bc7d6e17490e6535bad18b0eb9abf2414 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/byt5-small", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v", + "q" + ], + "target_parameters": null, + "task_type": "SEQ_2_SEQ_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/byt5_v2_6gb/checkpoint-860/adapter_model.safetensors b/byt5_v2_6gb/checkpoint-860/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6994cb0da316798fc781930d5505c1139f9d4c4 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0864f9e7b1f1f4f31a0fe3d19c160d5d275db4255a42506fcd4aaf9a9dd0ac95 +size 2386768 diff --git a/byt5_v2_6gb/checkpoint-860/added_tokens.json b/byt5_v2_6gb/checkpoint-860/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..93c190b5690dd55aac16723222a9909e2be0faec --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/added_tokens.json @@ -0,0 +1,127 @@ +{ + "": 259, + "": 359, + "": 360, + "": 361, + "": 362, + "": 363, + "": 364, + "": 365, + "": 366, + "": 367, + "": 368, + "": 269, + "": 369, + "": 370, + "": 371, + "": 372, + "": 373, + "": 374, + "": 375, + "": 376, + "": 377, + "": 378, + "": 270, + "": 379, + "": 380, + "": 381, + "": 382, + "": 383, + "": 271, + "": 272, + "": 273, + "": 274, + "": 275, + "": 276, + "": 277, + "": 278, + "": 260, + "": 279, + "": 280, + "": 281, + "": 282, + "": 283, + "": 284, + "": 285, + "": 286, + "": 287, + "": 288, + "": 261, + "": 289, + "": 290, + "": 291, + "": 292, + "": 293, + "": 294, + "": 295, + "": 296, + "": 297, + "": 298, + "": 262, + "": 299, + "": 300, + "": 301, + "": 302, + "": 303, + "": 304, + "": 305, + "": 306, + "": 307, + "": 308, + "": 263, + "": 309, + "": 310, + "": 311, + "": 312, + "": 313, + "": 314, + "": 315, + "": 316, + "": 317, + "": 318, + "": 264, + "": 319, + "": 320, + "": 321, + "": 322, + "": 323, + "": 324, + "": 325, + "": 326, + "": 327, + "": 328, + "": 265, + "": 329, + "": 330, + "": 331, + "": 332, + "": 333, + "": 334, + "": 335, + "": 336, + "": 337, + "": 338, + "": 266, + "": 339, + "": 340, + "": 341, + "": 342, + "": 343, + "": 344, + "": 345, + "": 346, + "": 347, + "": 348, + "": 267, + "": 349, + "": 350, + "": 351, + "": 352, + "": 353, + "": 354, + "": 355, + "": 356, + "": 357, + "": 358, + "": 268 +} diff --git a/byt5_v2_6gb/checkpoint-860/optimizer.pt b/byt5_v2_6gb/checkpoint-860/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e43d40034da1dc101bb7800caf5e8ded003afedf --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5542c3057cc44f9642800930a71e8ab887b407288f76bf3cb2efeb114cfe5578 +size 4820363 diff --git a/byt5_v2_6gb/checkpoint-860/rng_state.pth b/byt5_v2_6gb/checkpoint-860/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9a9f08d253e4879b1fd66448f91483953ecfa8c7 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed1df3f8998f5cefe609394626b7b1098e07e284d83d5becb1cfd36c8c24758 +size 14645 diff --git a/byt5_v2_6gb/checkpoint-860/scaler.pt b/byt5_v2_6gb/checkpoint-860/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac4d0604c3a7ed5ca82722253e93067dca857bd --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9779a733270277f15e820d84d3dfdfb3a66fd96b857f3f0109ac7f2b54244d67 +size 1383 diff --git a/byt5_v2_6gb/checkpoint-860/scheduler.pt b/byt5_v2_6gb/checkpoint-860/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddab8ff5efbcfc6de1465c28055fc15ee3aec604 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad153154ea99e60fb93078827a8601dc207507b974568e0b7adb935b48930e52 +size 1465 diff --git a/byt5_v2_6gb/checkpoint-860/tokenizer_config.json b/byt5_v2_6gb/checkpoint-860/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96da6801779a232712edf0b6760664675cffbc24 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/tokenizer_config.json @@ -0,0 +1,1290 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "backend": "custom", + "eos_token": "", + "extra_ids": 0, + "extra_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "is_local": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "ByT5Tokenizer", + "unk_token": "" +} diff --git a/byt5_v2_6gb/checkpoint-860/trainer_state.json b/byt5_v2_6gb/checkpoint-860/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..516ffa94e64dff2a2469bda2242248e93a21ebda --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_global_step": null, + "best_metric": Infinity, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 860, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.058351568198395334, + "grad_norm": NaN, + "learning_rate": 0.00019790697674418605, + "loss": 891140376730009.6, + "step": 10 + }, + { + "epoch": 0.11670313639679067, + "grad_norm": NaN, + "learning_rate": 0.00019558139534883723, + "loss": 4850718867456.0, + "step": 20 + }, + { + "epoch": 0.175054704595186, + "grad_norm": NaN, + "learning_rate": 0.00019325581395348838, + "loss": 3252219491123.2, + "step": 30 + }, + { + "epoch": 0.23340627279358134, + "grad_norm": NaN, + "learning_rate": 0.00019093023255813956, + "loss": 21253524684.8, + "step": 40 + }, + { + "epoch": 0.29175784099197666, + "grad_norm": NaN, + "learning_rate": 0.00018860465116279072, + "loss": 1242686510071808.0, + "step": 50 + }, + { + "epoch": 0.350109409190372, + "grad_norm": NaN, + "learning_rate": 0.00018627906976744187, + "loss": 87104688226304.0, + "step": 60 + }, + { + "epoch": 0.4084609773887673, + "grad_norm": NaN, + "learning_rate": 0.00018395348837209303, + "loss": 1.3086946598531892e+16, + "step": 70 + }, + { + "epoch": 0.4668125455871627, + "grad_norm": NaN, + "learning_rate": 0.00018162790697674418, + "loss": 14105561792512.0, + "step": 80 + }, + { + "epoch": 0.5251641137855579, + "grad_norm": NaN, + "learning_rate": 0.00017930232558139534, + "loss": 204642777.6, + "step": 90 + }, + { + "epoch": 0.5835156819839533, + "grad_norm": NaN, + "learning_rate": 0.00017697674418604652, + "loss": 268920008841625.6, + "step": 100 + }, + { + "epoch": 0.6418672501823487, + "grad_norm": NaN, + "learning_rate": 0.00017465116279069768, + "loss": 165014200437964.8, + "step": 110 + }, + { + "epoch": 0.700218818380744, + "grad_norm": NaN, + "learning_rate": 0.00017232558139534886, + "loss": 341466650404454.4, + "step": 120 + }, + { + "epoch": 0.7585703865791393, + "grad_norm": NaN, + "learning_rate": 0.00017, + "loss": 1138850092233523.2, + "step": 130 + }, + { + "epoch": 0.8169219547775346, + "grad_norm": NaN, + "learning_rate": 0.00016767441860465117, + "loss": 92024340480.0, + "step": 140 + }, + { + "epoch": 0.87527352297593, + "grad_norm": NaN, + "learning_rate": 0.00016534883720930235, + "loss": 78592881773772.8, + "step": 150 + }, + { + "epoch": 0.9336250911743253, + "grad_norm": NaN, + "learning_rate": 0.0001630232558139535, + "loss": 12534172509798.4, + "step": 160 + }, + { + "epoch": 0.9919766593727206, + "grad_norm": NaN, + "learning_rate": 0.00016069767441860466, + "loss": 186510766768128.0, + "step": 170 + }, + { + "epoch": 1.0, + "eval_loss": NaN, + "eval_runtime": 4.1714, + "eval_samples_per_second": 146.234, + "eval_steps_per_second": 18.459, + "step": 172 + }, + { + "epoch": 1.0466812545587163, + "grad_norm": NaN, + "learning_rate": 0.0001583720930232558, + "loss": 1850764532121.6, + "step": 180 + }, + { + "epoch": 1.1050328227571116, + "grad_norm": NaN, + "learning_rate": 0.00015604651162790697, + "loss": 235584067993.6, + "step": 190 + }, + { + "epoch": 1.1633843909555068, + "grad_norm": NaN, + "learning_rate": 0.00015372093023255815, + "loss": 81904845140787.2, + "step": 200 + }, + { + "epoch": 1.2217359591539023, + "grad_norm": NaN, + "learning_rate": 0.0001513953488372093, + "loss": 192153186100838.4, + "step": 210 + }, + { + "epoch": 1.2800875273522976, + "grad_norm": NaN, + "learning_rate": 0.0001490697674418605, + "loss": 1055250916048896.0, + "step": 220 + }, + { + "epoch": 1.3384390955506928, + "grad_norm": NaN, + "learning_rate": 0.00014674418604651164, + "loss": 110138201892454.4, + "step": 230 + }, + { + "epoch": 1.3967906637490883, + "grad_norm": NaN, + "learning_rate": 0.0001444186046511628, + "loss": 32757802598.4, + "step": 240 + }, + { + "epoch": 1.4551422319474836, + "grad_norm": NaN, + "learning_rate": 0.00014209302325581395, + "loss": 4169762986367385.5, + "step": 250 + }, + { + "epoch": 1.5134938001458789, + "grad_norm": NaN, + "learning_rate": 0.00013976744186046513, + "loss": 15430475776.0, + "step": 260 + }, + { + "epoch": 1.5718453683442744, + "grad_norm": NaN, + "learning_rate": 0.0001374418604651163, + "loss": 3311464452875878.5, + "step": 270 + }, + { + "epoch": 1.6301969365426696, + "grad_norm": NaN, + "learning_rate": 0.00013511627906976744, + "loss": 1962224958151065.5, + "step": 280 + }, + { + "epoch": 1.6885485047410649, + "grad_norm": NaN, + "learning_rate": 0.0001327906976744186, + "loss": 73588399302246.4, + "step": 290 + }, + { + "epoch": 1.7469000729394604, + "grad_norm": NaN, + "learning_rate": 0.00013046511627906975, + "loss": 156729173606.4, + "step": 300 + }, + { + "epoch": 1.8052516411378556, + "grad_norm": NaN, + "learning_rate": 0.00012813953488372093, + "loss": 63342667366.4, + "step": 310 + }, + { + "epoch": 1.863603209336251, + "grad_norm": NaN, + "learning_rate": 0.00012581395348837212, + "loss": 40657972363264.0, + "step": 320 + }, + { + "epoch": 1.9219547775346464, + "grad_norm": NaN, + "learning_rate": 0.00012348837209302327, + "loss": 3779488592691.2, + "step": 330 + }, + { + "epoch": 1.9803063457330414, + "grad_norm": NaN, + "learning_rate": 0.00012116279069767443, + "loss": 4767075637657.6, + "step": 340 + }, + { + "epoch": 2.0, + "eval_loss": NaN, + "eval_runtime": 4.0671, + "eval_samples_per_second": 149.985, + "eval_steps_per_second": 18.933, + "step": 344 + }, + { + "epoch": 2.035010940919037, + "grad_norm": NaN, + "learning_rate": 0.00011883720930232558, + "loss": 155492311629824.0, + "step": 350 + }, + { + "epoch": 2.0933625091174326, + "grad_norm": NaN, + "learning_rate": 0.00011651162790697674, + "loss": 111986648442470.4, + "step": 360 + }, + { + "epoch": 2.1517140773158276, + "grad_norm": NaN, + "learning_rate": 0.00011418604651162792, + "loss": 156752150528.0, + "step": 370 + }, + { + "epoch": 2.210065645514223, + "grad_norm": NaN, + "learning_rate": 0.00011186046511627907, + "loss": 5171589120.0, + "step": 380 + }, + { + "epoch": 2.2684172137126186, + "grad_norm": NaN, + "learning_rate": 0.00010953488372093024, + "loss": 3302979744982630.5, + "step": 390 + }, + { + "epoch": 2.3267687819110137, + "grad_norm": NaN, + "learning_rate": 0.0001072093023255814, + "loss": 3799967334.4, + "step": 400 + }, + { + "epoch": 2.385120350109409, + "grad_norm": NaN, + "learning_rate": 0.00010488372093023255, + "loss": 674909818716160.0, + "step": 410 + }, + { + "epoch": 2.4434719183078046, + "grad_norm": NaN, + "learning_rate": 0.00010255813953488373, + "loss": 1310430384986521.5, + "step": 420 + }, + { + "epoch": 2.5018234865061997, + "grad_norm": NaN, + "learning_rate": 0.0001002325581395349, + "loss": 104982459187.2, + "step": 430 + }, + { + "epoch": 2.560175054704595, + "grad_norm": NaN, + "learning_rate": 9.790697674418605e-05, + "loss": 318152225744486.4, + "step": 440 + }, + { + "epoch": 2.6185266229029907, + "grad_norm": NaN, + "learning_rate": 9.558139534883721e-05, + "loss": 422663597693337.6, + "step": 450 + }, + { + "epoch": 2.6768781911013857, + "grad_norm": NaN, + "learning_rate": 9.325581395348838e-05, + "loss": 592010400078233.6, + "step": 460 + }, + { + "epoch": 2.735229759299781, + "grad_norm": NaN, + "learning_rate": 9.093023255813953e-05, + "loss": 225784900668620.8, + "step": 470 + }, + { + "epoch": 2.7935813274981767, + "grad_norm": NaN, + "learning_rate": 8.86046511627907e-05, + "loss": 22978112716.8, + "step": 480 + }, + { + "epoch": 2.8519328956965717, + "grad_norm": NaN, + "learning_rate": 8.627906976744187e-05, + "loss": 177317523488768.0, + "step": 490 + }, + { + "epoch": 2.910284463894967, + "grad_norm": NaN, + "learning_rate": 8.395348837209302e-05, + "loss": 1558973723443.2, + "step": 500 + }, + { + "epoch": 2.9686360320933627, + "grad_norm": NaN, + "learning_rate": 8.162790697674419e-05, + "loss": 2396209561.6, + "step": 510 + }, + { + "epoch": 3.0, + "eval_loss": NaN, + "eval_runtime": 4.0081, + "eval_samples_per_second": 152.193, + "eval_steps_per_second": 19.211, + "step": 516 + }, + { + "epoch": 3.023340627279358, + "grad_norm": NaN, + "learning_rate": 7.930232558139535e-05, + "loss": 82400054870016.0, + "step": 520 + }, + { + "epoch": 3.0816921954777534, + "grad_norm": NaN, + "learning_rate": 7.697674418604652e-05, + "loss": 1756928000.0, + "step": 530 + }, + { + "epoch": 3.140043763676149, + "grad_norm": NaN, + "learning_rate": 7.465116279069768e-05, + "loss": 540170833100.8, + "step": 540 + }, + { + "epoch": 3.198395331874544, + "grad_norm": NaN, + "learning_rate": 7.232558139534884e-05, + "loss": 772080769433600.0, + "step": 550 + }, + { + "epoch": 3.2567469000729394, + "grad_norm": NaN, + "learning_rate": 7e-05, + "loss": 15627939794124.8, + "step": 560 + }, + { + "epoch": 3.315098468271335, + "grad_norm": NaN, + "learning_rate": 6.767441860465116e-05, + "loss": 1.2879289224737588e+16, + "step": 570 + }, + { + "epoch": 3.37345003646973, + "grad_norm": NaN, + "learning_rate": 6.534883720930233e-05, + "loss": 187756119379148.8, + "step": 580 + }, + { + "epoch": 3.4318016046681254, + "grad_norm": NaN, + "learning_rate": 6.30232558139535e-05, + "loss": 4036548992224461.0, + "step": 590 + }, + { + "epoch": 3.490153172866521, + "grad_norm": NaN, + "learning_rate": 6.0697674418604654e-05, + "loss": 6974699612196045.0, + "step": 600 + }, + { + "epoch": 3.548504741064916, + "grad_norm": NaN, + "learning_rate": 5.8372093023255815e-05, + "loss": 1335860972748.8, + "step": 610 + }, + { + "epoch": 3.6068563092633115, + "grad_norm": NaN, + "learning_rate": 5.6046511627906984e-05, + "loss": 30351064196710.4, + "step": 620 + }, + { + "epoch": 3.6652078774617065, + "grad_norm": NaN, + "learning_rate": 5.3720930232558145e-05, + "loss": 580581492103577.6, + "step": 630 + }, + { + "epoch": 3.723559445660102, + "grad_norm": NaN, + "learning_rate": 5.13953488372093e-05, + "loss": 3143535311821209.5, + "step": 640 + }, + { + "epoch": 3.7819110138584975, + "grad_norm": NaN, + "learning_rate": 4.906976744186046e-05, + "loss": 10500325978931.2, + "step": 650 + }, + { + "epoch": 3.8402625820568925, + "grad_norm": NaN, + "learning_rate": 4.674418604651163e-05, + "loss": 49259429547212.8, + "step": 660 + }, + { + "epoch": 3.898614150255288, + "grad_norm": NaN, + "learning_rate": 4.441860465116279e-05, + "loss": 9668237721.6, + "step": 670 + }, + { + "epoch": 3.9569657184536835, + "grad_norm": NaN, + "learning_rate": 4.209302325581396e-05, + "loss": 622216314512998.4, + "step": 680 + }, + { + "epoch": 4.0, + "eval_loss": NaN, + "eval_runtime": 4.3583, + "eval_samples_per_second": 139.964, + "eval_steps_per_second": 17.668, + "step": 688 + }, + { + "epoch": 4.011670313639679, + "grad_norm": NaN, + "learning_rate": 3.9767441860465115e-05, + "loss": 475751671398.4, + "step": 690 + }, + { + "epoch": 4.070021881838074, + "grad_norm": NaN, + "learning_rate": 3.7441860465116276e-05, + "loss": 676074399098470.4, + "step": 700 + }, + { + "epoch": 4.12837345003647, + "grad_norm": NaN, + "learning_rate": 3.5116279069767445e-05, + "loss": 667001762611.2, + "step": 710 + }, + { + "epoch": 4.186725018234865, + "grad_norm": NaN, + "learning_rate": 3.2790697674418606e-05, + "loss": 191485388.8, + "step": 720 + }, + { + "epoch": 4.24507658643326, + "grad_norm": NaN, + "learning_rate": 3.0465116279069768e-05, + "loss": 236621293158.4, + "step": 730 + }, + { + "epoch": 4.303428154631655, + "grad_norm": NaN, + "learning_rate": 2.813953488372093e-05, + "loss": 29009564716236.8, + "step": 740 + }, + { + "epoch": 4.361779722830051, + "grad_norm": NaN, + "learning_rate": 2.5813953488372094e-05, + "loss": 298283062788096.0, + "step": 750 + }, + { + "epoch": 4.420131291028446, + "grad_norm": NaN, + "learning_rate": 2.3488372093023256e-05, + "loss": 2359767775313920.0, + "step": 760 + }, + { + "epoch": 4.478482859226841, + "grad_norm": NaN, + "learning_rate": 2.116279069767442e-05, + "loss": 247663480012.8, + "step": 770 + }, + { + "epoch": 4.536834427425237, + "grad_norm": NaN, + "learning_rate": 1.8837209302325582e-05, + "loss": 43075475236454.4, + "step": 780 + }, + { + "epoch": 4.595185995623632, + "grad_norm": NaN, + "learning_rate": 1.6511627906976744e-05, + "loss": 1454101658127564.8, + "step": 790 + }, + { + "epoch": 4.653537563822027, + "grad_norm": NaN, + "learning_rate": 1.4186046511627907e-05, + "loss": 189785296076.8, + "step": 800 + }, + { + "epoch": 4.711889132020423, + "grad_norm": NaN, + "learning_rate": 1.186046511627907e-05, + "loss": 487613703756185.6, + "step": 810 + }, + { + "epoch": 4.770240700218818, + "grad_norm": NaN, + "learning_rate": 9.534883720930234e-06, + "loss": 903332554080256.0, + "step": 820 + }, + { + "epoch": 4.828592268417213, + "grad_norm": NaN, + "learning_rate": 7.209302325581396e-06, + "loss": 358981017010176.0, + "step": 830 + }, + { + "epoch": 4.886943836615609, + "grad_norm": NaN, + "learning_rate": 4.883720930232559e-06, + "loss": 2114847265259520.0, + "step": 840 + }, + { + "epoch": 4.945295404814004, + "grad_norm": NaN, + "learning_rate": 2.558139534883721e-06, + "loss": 134010254236057.6, + "step": 850 + }, + { + "epoch": 5.0, + "grad_norm": NaN, + "learning_rate": 2.3255813953488374e-07, + "loss": 1453672054023782.5, + "step": 860 + }, + { + "epoch": 5.0, + "eval_loss": NaN, + "eval_runtime": 4.1566, + "eval_samples_per_second": 146.756, + "eval_steps_per_second": 18.525, + "step": 860 + } + ], + "logging_steps": 10, + "max_steps": 860, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6310535451770880.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/byt5_v2_6gb/checkpoint-860/training_args.bin b/byt5_v2_6gb/checkpoint-860/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6c157e8fe71b3035765545ee7a917ab96243a14 --- /dev/null +++ b/byt5_v2_6gb/checkpoint-860/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128ab4c2243d9abce6d1e9c5e2a60d990f973437b7aff444644a9237e7479025 +size 5329 diff --git a/byt5_v2_6gb/tokenizer_config.json b/byt5_v2_6gb/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96da6801779a232712edf0b6760664675cffbc24 --- /dev/null +++ b/byt5_v2_6gb/tokenizer_config.json @@ -0,0 +1,1290 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "backend": "custom", + "eos_token": "", + "extra_ids": 0, + "extra_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "is_local": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "ByT5Tokenizer", + "unk_token": "" +} diff --git a/byt5_v2_6gb/training_args.bin b/byt5_v2_6gb/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6c157e8fe71b3035765545ee7a917ab96243a14 --- /dev/null +++ b/byt5_v2_6gb/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128ab4c2243d9abce6d1e9c5e2a60d990f973437b7aff444644a9237e7479025 +size 5329 diff --git a/cnn.json b/cnn.json index d08fb345c6578122ffa0d97c653f2728275cdea0..b748bfff6961d03a80880d9a3449ea88f79ef077 100644 --- a/cnn.json +++ b/cnn.json @@ -14,123 +14,156 @@ "char_to_idx": { "[PAD]": 0, "[UNK]": 1, - " ": 2, - "&": 3, - "'": 4, - "(": 5, - ")": 6, - "+": 7, - "-": 8, - ".": 9, - "/": 10, - "0": 11, - "1": 12, - "2": 13, - "3": 14, - "7": 15, - "8": 16, - "9": 17, - ":": 18, - ";": 19, - "<": 20, - ">": 21, - "?": 22, - "A": 23, - "C": 24, - "E": 25, - "F": 26, - "H": 27, - "I": 28, - "K": 29, - "L": 30, - "M": 31, - "N": 32, - "P": 33, - "Q": 34, - "R": 35, - "S": 36, - "T": 37, - "U": 38, - "V": 39, - "Z": 40, - "[": 41, - "]": 42, - "a": 43, - "b": 44, - "c": 45, - "d": 46, - "e": 47, - "f": 48, - "g": 49, - "h": 50, - "i": 51, - "k": 52, - "l": 53, - "m": 54, - "n": 55, - "o": 56, - "p": 57, - "q": 58, - "r": 59, - "s": 60, - "t": 61, - "u": 62, - "v": 63, - "x": 64, - "y": 65, - "z": 66, - "{": 67, - "|": 68, - "}": 69, - "·": 70, - "Æ": 71, - "ê": 72, - "ð": 73, - "þ": 74, - "ń": 75, - "ś": 76, - "š": 77, - "Ɔ": 78, - "Ǝ": 79, - "Ɯ": 80, - "Ƨ": 81, - "Θ": 82, - "Λ": 83, - "Ξ": 84, - "Σ": 85, - "θ": 86, - "ς": 87, - "σ": 88, - "φ": 89, - "χ": 90, - "И": 91, - "Ч": 92, - "Э": 93, - "Я": 94, - "ḥ": 95, - "ṇ": 96, - "ṛ": 97, - "ṣ": 98, - "ṭ": 99, - "ṿ": 100, - "ẹ": 101, - "•": 102, - "𐌀": 103, - "𐌂": 104, - "𐌄": 105, - "𐌅": 106, - "𐌆": 107, - "𐌉": 108, - "𐌋": 109, - "𐌌": 110, - "𐌍": 111, - "𐌐": 112, - "𐌓": 113, - "𐌔": 114, - "𐌕": 115, - "𐌖": 116, - "𐌸": 117 + "[MASK]": 2, + "\t": 3, + "\n": 4, + "\r": 5, + " ": 6, + "\"": 7, + "%": 8, + "&": 9, + "'": 10, + "(": 11, + ")": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "[": 34, + "\\": 35, + "]": 36, + "a": 37, + "b": 38, + "c": 39, + "d": 40, + "e": 41, + "f": 42, + "g": 43, + "h": 44, + "i": 45, + "j": 46, + "k": 47, + "l": 48, + "m": 49, + "n": 50, + "o": 51, + "p": 52, + "q": 53, + "r": 54, + "s": 55, + "t": 56, + "u": 57, + "v": 58, + "x": 59, + "y": 60, + "z": 61, + "{": 62, + "|": 63, + "}": 64, + "²": 65, + "·": 66, + "¹": 67, + "ç": 68, + "ê": 69, + "ð": 70, + "ñ": 71, + "þ": 72, + "ą": 73, + "ę": 74, + "į": 75, + "ļ": 76, + "ł": 77, + "ś": 78, + "ş": 79, + "š": 80, + "ţ": 81, + "ǝ": 82, + "ș": 83, + "ț": 84, + "ɛ": 85, + "ί": 86, + "α": 87, + "ε": 88, + "ζ": 89, + "η": 90, + "θ": 91, + "ι": 92, + "κ": 93, + "μ": 94, + "ν": 95, + "ξ": 96, + "ο": 97, + "ρ": 98, + "ς": 99, + "σ": 100, + "υ": 101, + "φ": 102, + "χ": 103, + "а": 104, + "з": 105, + "и": 106, + "м": 107, + "н": 108, + "о": 109, + "п": 110, + "т": 111, + "ч": 112, + "ш": 113, + "я": 114, + "і": 115, + "ִ": 116, + "۰": 117, + "۱": 118, + "۲": 119, + "۷": 120, + "۹": 121, + "ง": 122, + "จ": 123, + "ด": 124, + "ท": 125, + "า": 126, + "เ": 127, + "ḟ": 128, + "ḥ": 129, + "ṇ": 130, + "ṭ": 131, + "ṿ": 132, + "ạ": 133, + "ẹ": 134, + "„": 135, + "‡": 136, + "•": 137, + "…": 138, + "ⅎ": 139, + "∃": 140, + "コ": 141, + "ネ": 142, + "マ": 143, + "ヨ": 144, + "・": 145, + "ㅋ": 146, + "ꞇ": 147, + "・": 148, + "𐌏": 149, + "𐌙": 150 } }, - "vocab_size": 118 + "vocab_size": 151 } \ No newline at end of file diff --git a/cnn.onnx b/cnn.onnx index 037b1a6e745d03eb0c289059357365299f4d4214..f1c0c2503089b2e72c0f8445e658d38a288a80f3 100644 --- a/cnn.onnx +++ b/cnn.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:483f8e2dbe4e11c49d6a5b5c8f541c52ad926cc894c552f45c5777ff6b54215b -size 121332 +oid sha256:8ab7e95ee4bed8f8ccb5ba7e79e884ee000d980622f6ca7aa5b3691c24c80276 +size 125559 diff --git a/cnn_meta.json b/cnn_meta.json index d08fb345c6578122ffa0d97c653f2728275cdea0..b748bfff6961d03a80880d9a3449ea88f79ef077 100644 --- a/cnn_meta.json +++ b/cnn_meta.json @@ -14,123 +14,156 @@ "char_to_idx": { "[PAD]": 0, "[UNK]": 1, - " ": 2, - "&": 3, - "'": 4, - "(": 5, - ")": 6, - "+": 7, - "-": 8, - ".": 9, - "/": 10, - "0": 11, - "1": 12, - "2": 13, - "3": 14, - "7": 15, - "8": 16, - "9": 17, - ":": 18, - ";": 19, - "<": 20, - ">": 21, - "?": 22, - "A": 23, - "C": 24, - "E": 25, - "F": 26, - "H": 27, - "I": 28, - "K": 29, - "L": 30, - "M": 31, - "N": 32, - "P": 33, - "Q": 34, - "R": 35, - "S": 36, - "T": 37, - "U": 38, - "V": 39, - "Z": 40, - "[": 41, - "]": 42, - "a": 43, - "b": 44, - "c": 45, - "d": 46, - "e": 47, - "f": 48, - "g": 49, - "h": 50, - "i": 51, - "k": 52, - "l": 53, - "m": 54, - "n": 55, - "o": 56, - "p": 57, - "q": 58, - "r": 59, - "s": 60, - "t": 61, - "u": 62, - "v": 63, - "x": 64, - "y": 65, - "z": 66, - "{": 67, - "|": 68, - "}": 69, - "·": 70, - "Æ": 71, - "ê": 72, - "ð": 73, - "þ": 74, - "ń": 75, - "ś": 76, - "š": 77, - "Ɔ": 78, - "Ǝ": 79, - "Ɯ": 80, - "Ƨ": 81, - "Θ": 82, - "Λ": 83, - "Ξ": 84, - "Σ": 85, - "θ": 86, - "ς": 87, - "σ": 88, - "φ": 89, - "χ": 90, - "И": 91, - "Ч": 92, - "Э": 93, - "Я": 94, - "ḥ": 95, - "ṇ": 96, - "ṛ": 97, - "ṣ": 98, - "ṭ": 99, - "ṿ": 100, - "ẹ": 101, - "•": 102, - "𐌀": 103, - "𐌂": 104, - "𐌄": 105, - "𐌅": 106, - "𐌆": 107, - "𐌉": 108, - "𐌋": 109, - "𐌌": 110, - "𐌍": 111, - "𐌐": 112, - "𐌓": 113, - "𐌔": 114, - "𐌕": 115, - "𐌖": 116, - "𐌸": 117 + "[MASK]": 2, + "\t": 3, + "\n": 4, + "\r": 5, + " ": 6, + "\"": 7, + "%": 8, + "&": 9, + "'": 10, + "(": 11, + ")": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "[": 34, + "\\": 35, + "]": 36, + "a": 37, + "b": 38, + "c": 39, + "d": 40, + "e": 41, + "f": 42, + "g": 43, + "h": 44, + "i": 45, + "j": 46, + "k": 47, + "l": 48, + "m": 49, + "n": 50, + "o": 51, + "p": 52, + "q": 53, + "r": 54, + "s": 55, + "t": 56, + "u": 57, + "v": 58, + "x": 59, + "y": 60, + "z": 61, + "{": 62, + "|": 63, + "}": 64, + "²": 65, + "·": 66, + "¹": 67, + "ç": 68, + "ê": 69, + "ð": 70, + "ñ": 71, + "þ": 72, + "ą": 73, + "ę": 74, + "į": 75, + "ļ": 76, + "ł": 77, + "ś": 78, + "ş": 79, + "š": 80, + "ţ": 81, + "ǝ": 82, + "ș": 83, + "ț": 84, + "ɛ": 85, + "ί": 86, + "α": 87, + "ε": 88, + "ζ": 89, + "η": 90, + "θ": 91, + "ι": 92, + "κ": 93, + "μ": 94, + "ν": 95, + "ξ": 96, + "ο": 97, + "ρ": 98, + "ς": 99, + "σ": 100, + "υ": 101, + "φ": 102, + "χ": 103, + "а": 104, + "з": 105, + "и": 106, + "м": 107, + "н": 108, + "о": 109, + "п": 110, + "т": 111, + "ч": 112, + "ш": 113, + "я": 114, + "і": 115, + "ִ": 116, + "۰": 117, + "۱": 118, + "۲": 119, + "۷": 120, + "۹": 121, + "ง": 122, + "จ": 123, + "ด": 124, + "ท": 125, + "า": 126, + "เ": 127, + "ḟ": 128, + "ḥ": 129, + "ṇ": 130, + "ṭ": 131, + "ṿ": 132, + "ạ": 133, + "ẹ": 134, + "„": 135, + "‡": 136, + "•": 137, + "…": 138, + "ⅎ": 139, + "∃": 140, + "コ": 141, + "ネ": 142, + "マ": 143, + "ヨ": 144, + "・": 145, + "ㅋ": 146, + "ꞇ": 147, + "・": 148, + "𐌏": 149, + "𐌙": 150 } }, - "vocab_size": 118 + "vocab_size": 151 } \ No newline at end of file diff --git a/cnn_weights.pt b/cnn_weights.pt index fe05047b1459b67c39c8c37fee5209709937b358..7e133dfeded641f2773ac2e0d672350e7565216f 100644 --- a/cnn_weights.pt +++ b/cnn_weights.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c5e83b00f49a6e548876cb9018b37a564dd2a54324757ee794c922d402e6001 -size 123525 +oid sha256:a162376ceb7c17d57737a05eb7d5816404bad6dfc8a2ab01b8eaba14e49bd5ca +size 127749 diff --git a/metrics.json b/metrics.json index ac16e040cd6922d0617fe9c41637c8a08959a691..0ed19f50f2a0c2a45789cc6b3ea20c154fa93d4b 100644 --- a/metrics.json +++ b/metrics.json @@ -1,105 +1,105 @@ { "cnn": { "arch": "cnn", - "params": 29895, - "train_time_s": 135.16, - "train_samples": 3958, - "val_samples": 990, - "val_f1_macro": 0.765, + "params": 30951, + "train_time_s": 4.03, + "train_samples": 1497, + "val_samples": 375, + "val_f1_macro": 0.7427, "per_class": { "boundary": { - "precision": 0.5714, - "recall": 0.8421, - "f1": 0.6809, - "support": 19 + "precision": 0.6667, + "recall": 0.6667, + "f1": 0.6667, + "support": 3 }, "commercial": { - "precision": 0.85, - "recall": 0.8193, - "f1": 0.8344, - "support": 249 + "precision": 0.8762, + "recall": 0.902, + "f1": 0.8889, + "support": 102 }, "dedicatory": { - "precision": 0.8701, - "recall": 0.8221, - "f1": 0.8454, - "support": 163 + "precision": 0.9608, + "recall": 0.8167, + "f1": 0.8829, + "support": 60 }, "funerary": { - "precision": 0.8434, - "recall": 0.8172, - "f1": 0.8301, - "support": 290 + "precision": 0.8627, + "recall": 0.8889, + "f1": 0.8756, + "support": 99 }, "legal": { - "precision": 0.5814, - "recall": 0.7353, - "f1": 0.6494, - "support": 34 + "precision": 0.2, + "recall": 0.2, + "f1": 0.2, + "support": 5 }, "ownership": { - "precision": 0.8759, - "recall": 0.8355, - "f1": 0.8552, - "support": 152 + "precision": 0.9565, + "recall": 0.9462, + "f1": 0.9514, + "support": 93 }, "votive": { - "precision": 0.6061, - "recall": 0.7229, - "f1": 0.6593, - "support": 83 + "precision": 0.6471, + "recall": 0.8462, + "f1": 0.7333, + "support": 13 } } }, "transformer": { "arch": "transformer", - "params": 280967, - "train_time_s": 80.46, - "train_samples": 3958, - "val_samples": 990, - "val_f1_macro": 0.5324, + "params": 285191, + "train_time_s": 80.71, + "train_samples": 1497, + "val_samples": 375, + "val_f1_macro": 0.4719, "per_class": { "boundary": { - "precision": 0.3659, - "recall": 0.7895, - "f1": 0.5, - "support": 19 + "precision": 0.2857, + "recall": 0.6667, + "f1": 0.4, + "support": 3 }, "commercial": { - "precision": 0.6335, - "recall": 0.7149, - "f1": 0.6717, - "support": 249 + "precision": 0.8302, + "recall": 0.4314, + "f1": 0.5677, + "support": 102 }, "dedicatory": { - "precision": 0.5472, - "recall": 0.3558, - "f1": 0.4312, - "support": 163 + "precision": 0.3814, + "recall": 0.6167, + "f1": 0.4713, + "support": 60 }, "funerary": { - "precision": 0.6367, - "recall": 0.5862, - "f1": 0.6104, - "support": 290 + "precision": 0.5256, + "recall": 0.4141, + "f1": 0.4633, + "support": 99 }, "legal": { - "precision": 0.3014, - "recall": 0.6471, - "f1": 0.4112, - "support": 34 + "precision": 0.16, + "recall": 0.8, + "f1": 0.2667, + "support": 5 }, "ownership": { - "precision": 0.6936, - "recall": 0.7895, - "f1": 0.7385, - "support": 152 + "precision": 0.8068, + "recall": 0.7634, + "f1": 0.7845, + "support": 93 }, "votive": { - "precision": 0.4898, - "recall": 0.2892, - "f1": 0.3636, - "support": 83 + "precision": 0.2593, + "recall": 0.5385, + "f1": 0.35, + "support": 13 } } } diff --git a/transformer.json b/transformer.json index 676db40ee0774401b89569f8d080967aab59e53d..43d2488dd290afb5d9bf585e546537ac42146096 100644 --- a/transformer.json +++ b/transformer.json @@ -14,123 +14,156 @@ "char_to_idx": { "[PAD]": 0, "[UNK]": 1, - " ": 2, - "&": 3, - "'": 4, - "(": 5, - ")": 6, - "+": 7, - "-": 8, - ".": 9, - "/": 10, - "0": 11, - "1": 12, - "2": 13, - "3": 14, - "7": 15, - "8": 16, - "9": 17, - ":": 18, - ";": 19, - "<": 20, - ">": 21, - "?": 22, - "A": 23, - "C": 24, - "E": 25, - "F": 26, - "H": 27, - "I": 28, - "K": 29, - "L": 30, - "M": 31, - "N": 32, - "P": 33, - "Q": 34, - "R": 35, - "S": 36, - "T": 37, - "U": 38, - "V": 39, - "Z": 40, - "[": 41, - "]": 42, - "a": 43, - "b": 44, - "c": 45, - "d": 46, - "e": 47, - "f": 48, - "g": 49, - "h": 50, - "i": 51, - "k": 52, - "l": 53, - "m": 54, - "n": 55, - "o": 56, - "p": 57, - "q": 58, - "r": 59, - "s": 60, - "t": 61, - "u": 62, - "v": 63, - "x": 64, - "y": 65, - "z": 66, - "{": 67, - "|": 68, - "}": 69, - "·": 70, - "Æ": 71, - "ê": 72, - "ð": 73, - "þ": 74, - "ń": 75, - "ś": 76, - "š": 77, - "Ɔ": 78, - "Ǝ": 79, - "Ɯ": 80, - "Ƨ": 81, - "Θ": 82, - "Λ": 83, - "Ξ": 84, - "Σ": 85, - "θ": 86, - "ς": 87, - "σ": 88, - "φ": 89, - "χ": 90, - "И": 91, - "Ч": 92, - "Э": 93, - "Я": 94, - "ḥ": 95, - "ṇ": 96, - "ṛ": 97, - "ṣ": 98, - "ṭ": 99, - "ṿ": 100, - "ẹ": 101, - "•": 102, - "𐌀": 103, - "𐌂": 104, - "𐌄": 105, - "𐌅": 106, - "𐌆": 107, - "𐌉": 108, - "𐌋": 109, - "𐌌": 110, - "𐌍": 111, - "𐌐": 112, - "𐌓": 113, - "𐌔": 114, - "𐌕": 115, - "𐌖": 116, - "𐌸": 117 + "[MASK]": 2, + "\t": 3, + "\n": 4, + "\r": 5, + " ": 6, + "\"": 7, + "%": 8, + "&": 9, + "'": 10, + "(": 11, + ")": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "[": 34, + "\\": 35, + "]": 36, + "a": 37, + "b": 38, + "c": 39, + "d": 40, + "e": 41, + "f": 42, + "g": 43, + "h": 44, + "i": 45, + "j": 46, + "k": 47, + "l": 48, + "m": 49, + "n": 50, + "o": 51, + "p": 52, + "q": 53, + "r": 54, + "s": 55, + "t": 56, + "u": 57, + "v": 58, + "x": 59, + "y": 60, + "z": 61, + "{": 62, + "|": 63, + "}": 64, + "²": 65, + "·": 66, + "¹": 67, + "ç": 68, + "ê": 69, + "ð": 70, + "ñ": 71, + "þ": 72, + "ą": 73, + "ę": 74, + "į": 75, + "ļ": 76, + "ł": 77, + "ś": 78, + "ş": 79, + "š": 80, + "ţ": 81, + "ǝ": 82, + "ș": 83, + "ț": 84, + "ɛ": 85, + "ί": 86, + "α": 87, + "ε": 88, + "ζ": 89, + "η": 90, + "θ": 91, + "ι": 92, + "κ": 93, + "μ": 94, + "ν": 95, + "ξ": 96, + "ο": 97, + "ρ": 98, + "ς": 99, + "σ": 100, + "υ": 101, + "φ": 102, + "χ": 103, + "а": 104, + "з": 105, + "и": 106, + "м": 107, + "н": 108, + "о": 109, + "п": 110, + "т": 111, + "ч": 112, + "ш": 113, + "я": 114, + "і": 115, + "ִ": 116, + "۰": 117, + "۱": 118, + "۲": 119, + "۷": 120, + "۹": 121, + "ง": 122, + "จ": 123, + "ด": 124, + "ท": 125, + "า": 126, + "เ": 127, + "ḟ": 128, + "ḥ": 129, + "ṇ": 130, + "ṭ": 131, + "ṿ": 132, + "ạ": 133, + "ẹ": 134, + "„": 135, + "‡": 136, + "•": 137, + "…": 138, + "ⅎ": 139, + "∃": 140, + "コ": 141, + "ネ": 142, + "マ": 143, + "ヨ": 144, + "・": 145, + "ㅋ": 146, + "ꞇ": 147, + "・": 148, + "𐌏": 149, + "𐌙": 150 } }, - "vocab_size": 118 + "vocab_size": 151 } \ No newline at end of file diff --git a/transformer.onnx b/transformer.onnx index 4728122b2946feeed17a3b97832088e66c945cc9..a3e693e095f60014d9373443d6ba4ac32b193034 100644 --- a/transformer.onnx +++ b/transformer.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e8a4d17a84c004080a83768d1276edd887ea5bb097a0efa321c91508af1bfb19 -size 1264237 +oid sha256:90fa8e5a6802546b24d332c5b89291227b8bc1b97a09007f53853d5dc88d4073 +size 1281134 diff --git a/transformer_meta.json b/transformer_meta.json index 676db40ee0774401b89569f8d080967aab59e53d..43d2488dd290afb5d9bf585e546537ac42146096 100644 --- a/transformer_meta.json +++ b/transformer_meta.json @@ -14,123 +14,156 @@ "char_to_idx": { "[PAD]": 0, "[UNK]": 1, - " ": 2, - "&": 3, - "'": 4, - "(": 5, - ")": 6, - "+": 7, - "-": 8, - ".": 9, - "/": 10, - "0": 11, - "1": 12, - "2": 13, - "3": 14, - "7": 15, - "8": 16, - "9": 17, - ":": 18, - ";": 19, - "<": 20, - ">": 21, - "?": 22, - "A": 23, - "C": 24, - "E": 25, - "F": 26, - "H": 27, - "I": 28, - "K": 29, - "L": 30, - "M": 31, - "N": 32, - "P": 33, - "Q": 34, - "R": 35, - "S": 36, - "T": 37, - "U": 38, - "V": 39, - "Z": 40, - "[": 41, - "]": 42, - "a": 43, - "b": 44, - "c": 45, - "d": 46, - "e": 47, - "f": 48, - "g": 49, - "h": 50, - "i": 51, - "k": 52, - "l": 53, - "m": 54, - "n": 55, - "o": 56, - "p": 57, - "q": 58, - "r": 59, - "s": 60, - "t": 61, - "u": 62, - "v": 63, - "x": 64, - "y": 65, - "z": 66, - "{": 67, - "|": 68, - "}": 69, - "·": 70, - "Æ": 71, - "ê": 72, - "ð": 73, - "þ": 74, - "ń": 75, - "ś": 76, - "š": 77, - "Ɔ": 78, - "Ǝ": 79, - "Ɯ": 80, - "Ƨ": 81, - "Θ": 82, - "Λ": 83, - "Ξ": 84, - "Σ": 85, - "θ": 86, - "ς": 87, - "σ": 88, - "φ": 89, - "χ": 90, - "И": 91, - "Ч": 92, - "Э": 93, - "Я": 94, - "ḥ": 95, - "ṇ": 96, - "ṛ": 97, - "ṣ": 98, - "ṭ": 99, - "ṿ": 100, - "ẹ": 101, - "•": 102, - "𐌀": 103, - "𐌂": 104, - "𐌄": 105, - "𐌅": 106, - "𐌆": 107, - "𐌉": 108, - "𐌋": 109, - "𐌌": 110, - "𐌍": 111, - "𐌐": 112, - "𐌓": 113, - "𐌔": 114, - "𐌕": 115, - "𐌖": 116, - "𐌸": 117 + "[MASK]": 2, + "\t": 3, + "\n": 4, + "\r": 5, + " ": 6, + "\"": 7, + "%": 8, + "&": 9, + "'": 10, + "(": 11, + ")": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "[": 34, + "\\": 35, + "]": 36, + "a": 37, + "b": 38, + "c": 39, + "d": 40, + "e": 41, + "f": 42, + "g": 43, + "h": 44, + "i": 45, + "j": 46, + "k": 47, + "l": 48, + "m": 49, + "n": 50, + "o": 51, + "p": 52, + "q": 53, + "r": 54, + "s": 55, + "t": 56, + "u": 57, + "v": 58, + "x": 59, + "y": 60, + "z": 61, + "{": 62, + "|": 63, + "}": 64, + "²": 65, + "·": 66, + "¹": 67, + "ç": 68, + "ê": 69, + "ð": 70, + "ñ": 71, + "þ": 72, + "ą": 73, + "ę": 74, + "į": 75, + "ļ": 76, + "ł": 77, + "ś": 78, + "ş": 79, + "š": 80, + "ţ": 81, + "ǝ": 82, + "ș": 83, + "ț": 84, + "ɛ": 85, + "ί": 86, + "α": 87, + "ε": 88, + "ζ": 89, + "η": 90, + "θ": 91, + "ι": 92, + "κ": 93, + "μ": 94, + "ν": 95, + "ξ": 96, + "ο": 97, + "ρ": 98, + "ς": 99, + "σ": 100, + "υ": 101, + "φ": 102, + "χ": 103, + "а": 104, + "з": 105, + "и": 106, + "м": 107, + "н": 108, + "о": 109, + "п": 110, + "т": 111, + "ч": 112, + "ш": 113, + "я": 114, + "і": 115, + "ִ": 116, + "۰": 117, + "۱": 118, + "۲": 119, + "۷": 120, + "۹": 121, + "ง": 122, + "จ": 123, + "ด": 124, + "ท": 125, + "า": 126, + "เ": 127, + "ḟ": 128, + "ḥ": 129, + "ṇ": 130, + "ṭ": 131, + "ṿ": 132, + "ạ": 133, + "ẹ": 134, + "„": 135, + "‡": 136, + "•": 137, + "…": 138, + "ⅎ": 139, + "∃": 140, + "コ": 141, + "ネ": 142, + "マ": 143, + "ヨ": 144, + "・": 145, + "ㅋ": 146, + "ꞇ": 147, + "・": 148, + "𐌏": 149, + "𐌙": 150 } }, - "vocab_size": 118 + "vocab_size": 151 } \ No newline at end of file diff --git a/transformer_weights.pt b/transformer_weights.pt index fb403a5a55ed87dc0d1bc74b8576fea944f7a7cc..2e96be2729a9f9fba06f183880c3aa544eded57b 100644 --- a/transformer_weights.pt +++ b/transformer_weights.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fbddf66ca2998c7456d9da68f4bdeb7c790a6eaa1e640fbf555638f3aaf80e2 -size 1200167 +oid sha256:15fe4bb1fe801977ef5fa1edbb4f668729944da7f84eed9f7615a5f61d5067f5 +size 1217063 diff --git a/v2/classifier_legacy_cnn.onnx b/v2/classifier_legacy_cnn.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f1c0c2503089b2e72c0f8445e658d38a288a80f3 --- /dev/null +++ b/v2/classifier_legacy_cnn.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab7e95ee4bed8f8ccb5ba7e79e884ee000d980622f6ca7aa5b3691c24c80276 +size 125559 diff --git a/v2/classifier_legacy_transformer.onnx b/v2/classifier_legacy_transformer.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a3e693e095f60014d9373443d6ba4ac32b193034 --- /dev/null +++ b/v2/classifier_legacy_transformer.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90fa8e5a6802546b24d332c5b89291227b8bc1b97a09007f53853d5dc88d4073 +size 1281134 diff --git a/v2/metadata.json b/v2/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e9e9645f6875544cc62a161998b1f0c4153900ad --- /dev/null +++ b/v2/metadata.json @@ -0,0 +1,32 @@ +{ + "version": "2.0.0", + "baseline_milestone": "8,091_verified", + "models": { + "classifier_legacy_cnn": { + "file": "classifier_legacy_cnn.onnx", + "type": "character_cnn", + "input_shape": [1, 128], + "labels": ["boundary", "commercial", "dedicatory", "funerary", "legal", "ownership", "votive"], + "portable": true, + "requires_embeddings": false + }, + "classifier_legacy_transformer": { + "file": "classifier_legacy_transformer.onnx", + "type": "micro_transformer", + "input_shape": [1, 128], + "labels": ["boundary", "commercial", "dedicatory", "funerary", "legal", "ownership", "votive"], + "portable": true, + "requires_embeddings": false + }, + "classifier_sota_mlp": { + "file": "classifier_sota_mlp.onnx", + "type": "embedding_mlp", + "input_shape": [1, 3072], + "labels": ["boundary", "commercial", "dedicatory", "funerary", "legal", "ownership", "votive"], + "portable": false, + "requires_embeddings": true, + "embedding_model": "text-embedding-004" + } + }, + "deployment_notes": "For 99% accuracy (SOTA), the frontend must fetch 3072-dim embeddings from the /api/embed endpoint before calling the MLP. For zero-latency offline use, use the CNN." +}