Text Classification
Transformers
Safetensors
distilbert
Generated from Trainer
text-embeddings-inference
Instructions to use gerbejon/digilog-eform-classifier with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use gerbejon/digilog-eform-classifier with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="gerbejon/digilog-eform-classifier")# Load model directly from transformers import AutoTokenizer, AutoModelForSequenceClassification tokenizer = AutoTokenizer.from_pretrained("gerbejon/digilog-eform-classifier") model = AutoModelForSequenceClassification.from_pretrained("gerbejon/digilog-eform-classifier") - Notebooks
- Google Colab
- Kaggle
Upload folder using huggingface_hub
Browse files- checkpoint-1000/config.json +50 -0
- checkpoint-1000/model.safetensors +3 -0
- checkpoint-1000/optimizer.pt +3 -0
- checkpoint-1000/rng_state.pth +3 -0
- checkpoint-1000/scheduler.pt +3 -0
- checkpoint-1000/trainer_state.json +47 -0
- checkpoint-1000/training_args.bin +3 -0
- checkpoint-1500/config.json +50 -0
- checkpoint-1500/model.safetensors +3 -0
- checkpoint-1500/optimizer.pt +3 -0
- checkpoint-1500/rng_state.pth +3 -0
- checkpoint-1500/scheduler.pt +3 -0
- checkpoint-1500/trainer_state.json +54 -0
- checkpoint-1500/training_args.bin +3 -0
- checkpoint-2000/config.json +50 -0
- checkpoint-2000/model.safetensors +3 -0
- checkpoint-2000/optimizer.pt +3 -0
- checkpoint-2000/rng_state.pth +3 -0
- checkpoint-2000/scheduler.pt +3 -0
- checkpoint-2000/trainer_state.json +61 -0
- checkpoint-2000/training_args.bin +3 -0
- checkpoint-2500/config.json +50 -0
- checkpoint-2500/model.safetensors +3 -0
- checkpoint-2500/optimizer.pt +3 -0
- checkpoint-2500/rng_state.pth +3 -0
- checkpoint-2500/scheduler.pt +3 -0
- checkpoint-2500/trainer_state.json +68 -0
- checkpoint-2500/training_args.bin +3 -0
- checkpoint-2901/config.json +50 -0
- checkpoint-2901/model.safetensors +3 -0
- checkpoint-2901/optimizer.pt +3 -0
- checkpoint-2901/rng_state.pth +3 -0
- checkpoint-2901/scheduler.pt +3 -0
- checkpoint-2901/trainer_state.json +68 -0
- checkpoint-2901/training_args.bin +3 -0
- checkpoint-500/model.safetensors +1 -1
- checkpoint-500/optimizer.pt +1 -1
- checkpoint-500/rng_state.pth +1 -1
- checkpoint-500/scheduler.pt +1 -1
- checkpoint-500/trainer_state.json +7 -7
- checkpoint-500/training_args.bin +1 -1
checkpoint-1000/config.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "distilbert-base-multilingual-cased",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "Diverses",
|
| 13 |
+
"1": "Bau",
|
| 14 |
+
"2": "Steuern Finanzen",
|
| 15 |
+
"3": "SBB Verkehr",
|
| 16 |
+
"4": "Living Moving",
|
| 17 |
+
"5": "Ausweise",
|
| 18 |
+
"6": "Kontakt",
|
| 19 |
+
"7": "Newsletter",
|
| 20 |
+
"8": "Tiere",
|
| 21 |
+
"9": "Vereine Busines"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"label2id": {
|
| 25 |
+
"Ausweise": "5",
|
| 26 |
+
"Bau": "1",
|
| 27 |
+
"Diverses": "0",
|
| 28 |
+
"Kontakt": "6",
|
| 29 |
+
"Living Moving": "4",
|
| 30 |
+
"Newsletter": "7",
|
| 31 |
+
"SBB Verkehr": "3",
|
| 32 |
+
"Steuern Finanzen": "2",
|
| 33 |
+
"Tiere": "8",
|
| 34 |
+
"Vereine Busines": "9"
|
| 35 |
+
},
|
| 36 |
+
"max_position_embeddings": 512,
|
| 37 |
+
"model_type": "distilbert",
|
| 38 |
+
"n_heads": 12,
|
| 39 |
+
"n_layers": 6,
|
| 40 |
+
"output_past": true,
|
| 41 |
+
"pad_token_id": 0,
|
| 42 |
+
"problem_type": "single_label_classification",
|
| 43 |
+
"qa_dropout": 0.1,
|
| 44 |
+
"seq_classif_dropout": 0.2,
|
| 45 |
+
"sinusoidal_pos_embds": false,
|
| 46 |
+
"tie_weights_": true,
|
| 47 |
+
"torch_dtype": "float32",
|
| 48 |
+
"transformers_version": "4.49.0",
|
| 49 |
+
"vocab_size": 119547
|
| 50 |
+
}
|
checkpoint-1000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c310f6b53cd09e9a7cf60f7e2a73a9507d23a87a2ad3689e413d9f845f926091
|
| 3 |
+
size 541341984
|
checkpoint-1000/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6902649a4d25559dc28f73c87b36fe477420775afa1baff5bded7173e499dba7
|
| 3 |
+
size 1082746042
|
checkpoint-1000/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91e4a3878be61db94decb268c93b6db245495aa6dd7cd695f4e8977e34757871
|
| 3 |
+
size 14244
|
checkpoint-1000/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfb7096f51f7fe636183113ecd99621958f06feb8078fab0f07288854f035563
|
| 3 |
+
size 1064
|
checkpoint-1000/trainer_state.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0341261633919339,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 1000,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.5170630816959669,
|
| 13 |
+
"grad_norm": 0.07793747633695602,
|
| 14 |
+
"learning_rate": 4.138228197173389e-05,
|
| 15 |
+
"loss": 0.5111,
|
| 16 |
+
"step": 500
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 1.0341261633919339,
|
| 20 |
+
"grad_norm": 0.1947256475687027,
|
| 21 |
+
"learning_rate": 3.276456394346777e-05,
|
| 22 |
+
"loss": 0.0911,
|
| 23 |
+
"step": 1000
|
| 24 |
+
}
|
| 25 |
+
],
|
| 26 |
+
"logging_steps": 500,
|
| 27 |
+
"max_steps": 2901,
|
| 28 |
+
"num_input_tokens_seen": 0,
|
| 29 |
+
"num_train_epochs": 3,
|
| 30 |
+
"save_steps": 500,
|
| 31 |
+
"stateful_callbacks": {
|
| 32 |
+
"TrainerControl": {
|
| 33 |
+
"args": {
|
| 34 |
+
"should_epoch_stop": false,
|
| 35 |
+
"should_evaluate": false,
|
| 36 |
+
"should_log": false,
|
| 37 |
+
"should_save": true,
|
| 38 |
+
"should_training_stop": false
|
| 39 |
+
},
|
| 40 |
+
"attributes": {}
|
| 41 |
+
}
|
| 42 |
+
},
|
| 43 |
+
"total_flos": 1059095463014400.0,
|
| 44 |
+
"train_batch_size": 8,
|
| 45 |
+
"trial_name": null,
|
| 46 |
+
"trial_params": null
|
| 47 |
+
}
|
checkpoint-1000/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
|
| 3 |
+
size 5304
|
checkpoint-1500/config.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "distilbert-base-multilingual-cased",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "Diverses",
|
| 13 |
+
"1": "Bau",
|
| 14 |
+
"2": "Steuern Finanzen",
|
| 15 |
+
"3": "SBB Verkehr",
|
| 16 |
+
"4": "Living Moving",
|
| 17 |
+
"5": "Ausweise",
|
| 18 |
+
"6": "Kontakt",
|
| 19 |
+
"7": "Newsletter",
|
| 20 |
+
"8": "Tiere",
|
| 21 |
+
"9": "Vereine Busines"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"label2id": {
|
| 25 |
+
"Ausweise": "5",
|
| 26 |
+
"Bau": "1",
|
| 27 |
+
"Diverses": "0",
|
| 28 |
+
"Kontakt": "6",
|
| 29 |
+
"Living Moving": "4",
|
| 30 |
+
"Newsletter": "7",
|
| 31 |
+
"SBB Verkehr": "3",
|
| 32 |
+
"Steuern Finanzen": "2",
|
| 33 |
+
"Tiere": "8",
|
| 34 |
+
"Vereine Busines": "9"
|
| 35 |
+
},
|
| 36 |
+
"max_position_embeddings": 512,
|
| 37 |
+
"model_type": "distilbert",
|
| 38 |
+
"n_heads": 12,
|
| 39 |
+
"n_layers": 6,
|
| 40 |
+
"output_past": true,
|
| 41 |
+
"pad_token_id": 0,
|
| 42 |
+
"problem_type": "single_label_classification",
|
| 43 |
+
"qa_dropout": 0.1,
|
| 44 |
+
"seq_classif_dropout": 0.2,
|
| 45 |
+
"sinusoidal_pos_embds": false,
|
| 46 |
+
"tie_weights_": true,
|
| 47 |
+
"torch_dtype": "float32",
|
| 48 |
+
"transformers_version": "4.49.0",
|
| 49 |
+
"vocab_size": 119547
|
| 50 |
+
}
|
checkpoint-1500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c2177f9065876f3427b822be0441d8cee99e4d044e0bcba5857a27ff70fe3b0
|
| 3 |
+
size 541341984
|
checkpoint-1500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba174cbeb794b3831391b9c0ed3103d7e929b88f5f961793354401071dfdcb80
|
| 3 |
+
size 1082746042
|
checkpoint-1500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4074593b796aacaad21e5712f624c0a7ff11d8d686144821e03175c218a2e71e
|
| 3 |
+
size 14244
|
checkpoint-1500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:088125f0fea80fe70ff2e523109f7063ed52a22b63d9fd81290043f690dfdd23
|
| 3 |
+
size 1064
|
checkpoint-1500/trainer_state.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.5511892450879006,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 1500,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.5170630816959669,
|
| 13 |
+
"grad_norm": 0.07793747633695602,
|
| 14 |
+
"learning_rate": 4.138228197173389e-05,
|
| 15 |
+
"loss": 0.5111,
|
| 16 |
+
"step": 500
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 1.0341261633919339,
|
| 20 |
+
"grad_norm": 0.1947256475687027,
|
| 21 |
+
"learning_rate": 3.276456394346777e-05,
|
| 22 |
+
"loss": 0.0911,
|
| 23 |
+
"step": 1000
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 1.5511892450879006,
|
| 27 |
+
"grad_norm": 0.8741845488548279,
|
| 28 |
+
"learning_rate": 2.4146845915201654e-05,
|
| 29 |
+
"loss": 0.0476,
|
| 30 |
+
"step": 1500
|
| 31 |
+
}
|
| 32 |
+
],
|
| 33 |
+
"logging_steps": 500,
|
| 34 |
+
"max_steps": 2901,
|
| 35 |
+
"num_input_tokens_seen": 0,
|
| 36 |
+
"num_train_epochs": 3,
|
| 37 |
+
"save_steps": 500,
|
| 38 |
+
"stateful_callbacks": {
|
| 39 |
+
"TrainerControl": {
|
| 40 |
+
"args": {
|
| 41 |
+
"should_epoch_stop": false,
|
| 42 |
+
"should_evaluate": false,
|
| 43 |
+
"should_log": false,
|
| 44 |
+
"should_save": true,
|
| 45 |
+
"should_training_stop": false
|
| 46 |
+
},
|
| 47 |
+
"attributes": {}
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
"total_flos": 1589040653414400.0,
|
| 51 |
+
"train_batch_size": 8,
|
| 52 |
+
"trial_name": null,
|
| 53 |
+
"trial_params": null
|
| 54 |
+
}
|
checkpoint-1500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
|
| 3 |
+
size 5304
|
checkpoint-2000/config.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "distilbert-base-multilingual-cased",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "Diverses",
|
| 13 |
+
"1": "Bau",
|
| 14 |
+
"2": "Steuern Finanzen",
|
| 15 |
+
"3": "SBB Verkehr",
|
| 16 |
+
"4": "Living Moving",
|
| 17 |
+
"5": "Ausweise",
|
| 18 |
+
"6": "Kontakt",
|
| 19 |
+
"7": "Newsletter",
|
| 20 |
+
"8": "Tiere",
|
| 21 |
+
"9": "Vereine Busines"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"label2id": {
|
| 25 |
+
"Ausweise": "5",
|
| 26 |
+
"Bau": "1",
|
| 27 |
+
"Diverses": "0",
|
| 28 |
+
"Kontakt": "6",
|
| 29 |
+
"Living Moving": "4",
|
| 30 |
+
"Newsletter": "7",
|
| 31 |
+
"SBB Verkehr": "3",
|
| 32 |
+
"Steuern Finanzen": "2",
|
| 33 |
+
"Tiere": "8",
|
| 34 |
+
"Vereine Busines": "9"
|
| 35 |
+
},
|
| 36 |
+
"max_position_embeddings": 512,
|
| 37 |
+
"model_type": "distilbert",
|
| 38 |
+
"n_heads": 12,
|
| 39 |
+
"n_layers": 6,
|
| 40 |
+
"output_past": true,
|
| 41 |
+
"pad_token_id": 0,
|
| 42 |
+
"problem_type": "single_label_classification",
|
| 43 |
+
"qa_dropout": 0.1,
|
| 44 |
+
"seq_classif_dropout": 0.2,
|
| 45 |
+
"sinusoidal_pos_embds": false,
|
| 46 |
+
"tie_weights_": true,
|
| 47 |
+
"torch_dtype": "float32",
|
| 48 |
+
"transformers_version": "4.49.0",
|
| 49 |
+
"vocab_size": 119547
|
| 50 |
+
}
|
checkpoint-2000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06649425f55b8e481bbe57a42c8314cbfade664bab7557f6262cc21bd6ece677
|
| 3 |
+
size 541341984
|
checkpoint-2000/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2418a9b17fa7bb48880d94d3f402bf9c085e15a878f46ff2e6a2fbee0c583251
|
| 3 |
+
size 1082746042
|
checkpoint-2000/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8ddc5cce0545d1faeb5ea787dd65e4c185c739f84517931ea5488654960ada1
|
| 3 |
+
size 14244
|
checkpoint-2000/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c85bada4ac12da9bc05925afb9a1f6da25e734c2de7a5b86454e2f3562456b43
|
| 3 |
+
size 1064
|
checkpoint-2000/trainer_state.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0682523267838677,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 2000,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.5170630816959669,
|
| 13 |
+
"grad_norm": 0.07793747633695602,
|
| 14 |
+
"learning_rate": 4.138228197173389e-05,
|
| 15 |
+
"loss": 0.5111,
|
| 16 |
+
"step": 500
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 1.0341261633919339,
|
| 20 |
+
"grad_norm": 0.1947256475687027,
|
| 21 |
+
"learning_rate": 3.276456394346777e-05,
|
| 22 |
+
"loss": 0.0911,
|
| 23 |
+
"step": 1000
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 1.5511892450879006,
|
| 27 |
+
"grad_norm": 0.8741845488548279,
|
| 28 |
+
"learning_rate": 2.4146845915201654e-05,
|
| 29 |
+
"loss": 0.0476,
|
| 30 |
+
"step": 1500
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 2.0682523267838677,
|
| 34 |
+
"grad_norm": 0.00605323351919651,
|
| 35 |
+
"learning_rate": 1.5529127886935542e-05,
|
| 36 |
+
"loss": 0.0166,
|
| 37 |
+
"step": 2000
|
| 38 |
+
}
|
| 39 |
+
],
|
| 40 |
+
"logging_steps": 500,
|
| 41 |
+
"max_steps": 2901,
|
| 42 |
+
"num_input_tokens_seen": 0,
|
| 43 |
+
"num_train_epochs": 3,
|
| 44 |
+
"save_steps": 500,
|
| 45 |
+
"stateful_callbacks": {
|
| 46 |
+
"TrainerControl": {
|
| 47 |
+
"args": {
|
| 48 |
+
"should_epoch_stop": false,
|
| 49 |
+
"should_evaluate": false,
|
| 50 |
+
"should_log": false,
|
| 51 |
+
"should_save": true,
|
| 52 |
+
"should_training_stop": false
|
| 53 |
+
},
|
| 54 |
+
"attributes": {}
|
| 55 |
+
}
|
| 56 |
+
},
|
| 57 |
+
"total_flos": 2118190926028800.0,
|
| 58 |
+
"train_batch_size": 8,
|
| 59 |
+
"trial_name": null,
|
| 60 |
+
"trial_params": null
|
| 61 |
+
}
|
checkpoint-2000/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
|
| 3 |
+
size 5304
|
checkpoint-2500/config.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "distilbert-base-multilingual-cased",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "Diverses",
|
| 13 |
+
"1": "Bau",
|
| 14 |
+
"2": "Steuern Finanzen",
|
| 15 |
+
"3": "SBB Verkehr",
|
| 16 |
+
"4": "Living Moving",
|
| 17 |
+
"5": "Ausweise",
|
| 18 |
+
"6": "Kontakt",
|
| 19 |
+
"7": "Newsletter",
|
| 20 |
+
"8": "Tiere",
|
| 21 |
+
"9": "Vereine Busines"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"label2id": {
|
| 25 |
+
"Ausweise": "5",
|
| 26 |
+
"Bau": "1",
|
| 27 |
+
"Diverses": "0",
|
| 28 |
+
"Kontakt": "6",
|
| 29 |
+
"Living Moving": "4",
|
| 30 |
+
"Newsletter": "7",
|
| 31 |
+
"SBB Verkehr": "3",
|
| 32 |
+
"Steuern Finanzen": "2",
|
| 33 |
+
"Tiere": "8",
|
| 34 |
+
"Vereine Busines": "9"
|
| 35 |
+
},
|
| 36 |
+
"max_position_embeddings": 512,
|
| 37 |
+
"model_type": "distilbert",
|
| 38 |
+
"n_heads": 12,
|
| 39 |
+
"n_layers": 6,
|
| 40 |
+
"output_past": true,
|
| 41 |
+
"pad_token_id": 0,
|
| 42 |
+
"problem_type": "single_label_classification",
|
| 43 |
+
"qa_dropout": 0.1,
|
| 44 |
+
"seq_classif_dropout": 0.2,
|
| 45 |
+
"sinusoidal_pos_embds": false,
|
| 46 |
+
"tie_weights_": true,
|
| 47 |
+
"torch_dtype": "float32",
|
| 48 |
+
"transformers_version": "4.49.0",
|
| 49 |
+
"vocab_size": 119547
|
| 50 |
+
}
|
checkpoint-2500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e0ed38a6688c1cb0c62519347add3f16be5287f6172759ab8afc37195c7d6c2
|
| 3 |
+
size 541341984
|
checkpoint-2500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:144218efe1fef1c2c390d64e4f58fbe37d0a01f1de182c503d6e376125010be9
|
| 3 |
+
size 1082746042
|
checkpoint-2500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f221ce4a3c96aefefb7dba819906ba1ef4cb56f2b0cef7daa931324da203554
|
| 3 |
+
size 14244
|
checkpoint-2500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c3d8d64ac4946b22f89ebeefc20828ff0c8bcf4e81ea931c72d71d6308825f0
|
| 3 |
+
size 1064
|
checkpoint-2500/trainer_state.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.5853154084798344,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 2500,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.5170630816959669,
|
| 13 |
+
"grad_norm": 0.07793747633695602,
|
| 14 |
+
"learning_rate": 4.138228197173389e-05,
|
| 15 |
+
"loss": 0.5111,
|
| 16 |
+
"step": 500
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 1.0341261633919339,
|
| 20 |
+
"grad_norm": 0.1947256475687027,
|
| 21 |
+
"learning_rate": 3.276456394346777e-05,
|
| 22 |
+
"loss": 0.0911,
|
| 23 |
+
"step": 1000
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 1.5511892450879006,
|
| 27 |
+
"grad_norm": 0.8741845488548279,
|
| 28 |
+
"learning_rate": 2.4146845915201654e-05,
|
| 29 |
+
"loss": 0.0476,
|
| 30 |
+
"step": 1500
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 2.0682523267838677,
|
| 34 |
+
"grad_norm": 0.00605323351919651,
|
| 35 |
+
"learning_rate": 1.5529127886935542e-05,
|
| 36 |
+
"loss": 0.0166,
|
| 37 |
+
"step": 2000
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 2.5853154084798344,
|
| 41 |
+
"grad_norm": 0.00529734930023551,
|
| 42 |
+
"learning_rate": 6.911409858669425e-06,
|
| 43 |
+
"loss": 0.0221,
|
| 44 |
+
"step": 2500
|
| 45 |
+
}
|
| 46 |
+
],
|
| 47 |
+
"logging_steps": 500,
|
| 48 |
+
"max_steps": 2901,
|
| 49 |
+
"num_input_tokens_seen": 0,
|
| 50 |
+
"num_train_epochs": 3,
|
| 51 |
+
"save_steps": 500,
|
| 52 |
+
"stateful_callbacks": {
|
| 53 |
+
"TrainerControl": {
|
| 54 |
+
"args": {
|
| 55 |
+
"should_epoch_stop": false,
|
| 56 |
+
"should_evaluate": false,
|
| 57 |
+
"should_log": false,
|
| 58 |
+
"should_save": true,
|
| 59 |
+
"should_training_stop": false
|
| 60 |
+
},
|
| 61 |
+
"attributes": {}
|
| 62 |
+
}
|
| 63 |
+
},
|
| 64 |
+
"total_flos": 2648136116428800.0,
|
| 65 |
+
"train_batch_size": 8,
|
| 66 |
+
"trial_name": null,
|
| 67 |
+
"trial_params": null
|
| 68 |
+
}
|
checkpoint-2500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
|
| 3 |
+
size 5304
|
checkpoint-2901/config.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "distilbert-base-multilingual-cased",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "Diverses",
|
| 13 |
+
"1": "Bau",
|
| 14 |
+
"2": "Steuern Finanzen",
|
| 15 |
+
"3": "SBB Verkehr",
|
| 16 |
+
"4": "Living Moving",
|
| 17 |
+
"5": "Ausweise",
|
| 18 |
+
"6": "Kontakt",
|
| 19 |
+
"7": "Newsletter",
|
| 20 |
+
"8": "Tiere",
|
| 21 |
+
"9": "Vereine Busines"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"label2id": {
|
| 25 |
+
"Ausweise": "5",
|
| 26 |
+
"Bau": "1",
|
| 27 |
+
"Diverses": "0",
|
| 28 |
+
"Kontakt": "6",
|
| 29 |
+
"Living Moving": "4",
|
| 30 |
+
"Newsletter": "7",
|
| 31 |
+
"SBB Verkehr": "3",
|
| 32 |
+
"Steuern Finanzen": "2",
|
| 33 |
+
"Tiere": "8",
|
| 34 |
+
"Vereine Busines": "9"
|
| 35 |
+
},
|
| 36 |
+
"max_position_embeddings": 512,
|
| 37 |
+
"model_type": "distilbert",
|
| 38 |
+
"n_heads": 12,
|
| 39 |
+
"n_layers": 6,
|
| 40 |
+
"output_past": true,
|
| 41 |
+
"pad_token_id": 0,
|
| 42 |
+
"problem_type": "single_label_classification",
|
| 43 |
+
"qa_dropout": 0.1,
|
| 44 |
+
"seq_classif_dropout": 0.2,
|
| 45 |
+
"sinusoidal_pos_embds": false,
|
| 46 |
+
"tie_weights_": true,
|
| 47 |
+
"torch_dtype": "float32",
|
| 48 |
+
"transformers_version": "4.49.0",
|
| 49 |
+
"vocab_size": 119547
|
| 50 |
+
}
|
checkpoint-2901/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7718add1e1a61e3f08dbcf84f797c1f1f99848febf8996075f407b05f53743c
|
| 3 |
+
size 541341984
|
checkpoint-2901/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:340af5f79f889117dc258658bea11da0a16a37b40cf1aa3a50db69c57e1c9248
|
| 3 |
+
size 1082746042
|
checkpoint-2901/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e96d384380fdf344c6b16fbc86e41a54b92c4b38f643a9ff32e4f9caaa7b246
|
| 3 |
+
size 14244
|
checkpoint-2901/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36473bdf429db4e999b95bfe8092f2ac7e377b7df3806c8c1371b5694ad1cfba
|
| 3 |
+
size 1064
|
checkpoint-2901/trainer_state.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 3.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 2901,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.5170630816959669,
|
| 13 |
+
"grad_norm": 0.07793747633695602,
|
| 14 |
+
"learning_rate": 4.138228197173389e-05,
|
| 15 |
+
"loss": 0.5111,
|
| 16 |
+
"step": 500
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 1.0341261633919339,
|
| 20 |
+
"grad_norm": 0.1947256475687027,
|
| 21 |
+
"learning_rate": 3.276456394346777e-05,
|
| 22 |
+
"loss": 0.0911,
|
| 23 |
+
"step": 1000
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 1.5511892450879006,
|
| 27 |
+
"grad_norm": 0.8741845488548279,
|
| 28 |
+
"learning_rate": 2.4146845915201654e-05,
|
| 29 |
+
"loss": 0.0476,
|
| 30 |
+
"step": 1500
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 2.0682523267838677,
|
| 34 |
+
"grad_norm": 0.00605323351919651,
|
| 35 |
+
"learning_rate": 1.5529127886935542e-05,
|
| 36 |
+
"loss": 0.0166,
|
| 37 |
+
"step": 2000
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 2.5853154084798344,
|
| 41 |
+
"grad_norm": 0.00529734930023551,
|
| 42 |
+
"learning_rate": 6.911409858669425e-06,
|
| 43 |
+
"loss": 0.0221,
|
| 44 |
+
"step": 2500
|
| 45 |
+
}
|
| 46 |
+
],
|
| 47 |
+
"logging_steps": 500,
|
| 48 |
+
"max_steps": 2901,
|
| 49 |
+
"num_input_tokens_seen": 0,
|
| 50 |
+
"num_train_epochs": 3,
|
| 51 |
+
"save_steps": 500,
|
| 52 |
+
"stateful_callbacks": {
|
| 53 |
+
"TrainerControl": {
|
| 54 |
+
"args": {
|
| 55 |
+
"should_epoch_stop": false,
|
| 56 |
+
"should_evaluate": false,
|
| 57 |
+
"should_log": false,
|
| 58 |
+
"should_save": true,
|
| 59 |
+
"should_training_stop": true
|
| 60 |
+
},
|
| 61 |
+
"attributes": {}
|
| 62 |
+
}
|
| 63 |
+
},
|
| 64 |
+
"total_flos": 3072357241344000.0,
|
| 65 |
+
"train_batch_size": 8,
|
| 66 |
+
"trial_name": null,
|
| 67 |
+
"trial_params": null
|
| 68 |
+
}
|
checkpoint-2901/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
|
| 3 |
+
size 5304
|
checkpoint-500/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 541341984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00ae06b0c3310779437d30e44571ffe52dfebff275b81a49e2f1348cc9a32d5e
|
| 3 |
size 541341984
|
checkpoint-500/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1082746042
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9acce701aee38dba44fe43ebfd5cd674312e969dbd8592c1bcbfa2717aec7869
|
| 3 |
size 1082746042
|
checkpoint-500/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daa92eb9dbf87d661c2c5d87d885f655d8f4beec107882aa3575b7a43c5f2379
|
| 3 |
size 14244
|
checkpoint-500/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f68d10cb06ea356a076afc72bcb69890b9a3ee0fc15d6134feb701d2cfc90677
|
| 3 |
size 1064
|
checkpoint-500/trainer_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 500,
|
| 7 |
"is_hyper_param_search": false,
|
|
@@ -9,15 +9,15 @@
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"epoch":
|
| 13 |
-
"grad_norm":
|
| 14 |
-
"learning_rate":
|
| 15 |
-
"loss": 0.
|
| 16 |
"step": 500
|
| 17 |
}
|
| 18 |
],
|
| 19 |
"logging_steps": 500,
|
| 20 |
-
"max_steps":
|
| 21 |
"num_input_tokens_seen": 0,
|
| 22 |
"num_train_epochs": 3,
|
| 23 |
"save_steps": 500,
|
|
@@ -33,7 +33,7 @@
|
|
| 33 |
"attributes": {}
|
| 34 |
}
|
| 35 |
},
|
| 36 |
-
"total_flos":
|
| 37 |
"train_batch_size": 8,
|
| 38 |
"trial_name": null,
|
| 39 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.5170630816959669,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 500,
|
| 7 |
"is_hyper_param_search": false,
|
|
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"epoch": 0.5170630816959669,
|
| 13 |
+
"grad_norm": 0.07793747633695602,
|
| 14 |
+
"learning_rate": 4.138228197173389e-05,
|
| 15 |
+
"loss": 0.5111,
|
| 16 |
"step": 500
|
| 17 |
}
|
| 18 |
],
|
| 19 |
"logging_steps": 500,
|
| 20 |
+
"max_steps": 2901,
|
| 21 |
"num_input_tokens_seen": 0,
|
| 22 |
"num_train_epochs": 3,
|
| 23 |
"save_steps": 500,
|
|
|
|
| 33 |
"attributes": {}
|
| 34 |
}
|
| 35 |
},
|
| 36 |
+
"total_flos": 529945190400000.0,
|
| 37 |
"train_batch_size": 8,
|
| 38 |
"trial_name": null,
|
| 39 |
"trial_params": null
|
checkpoint-500/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
|
| 3 |
size 5304
|