gerbejon commited on
Commit
0504d47
·
verified ·
1 Parent(s): cc0d6ef

Upload folder using huggingface_hub

Browse files
Files changed (41) hide show
  1. checkpoint-1000/config.json +50 -0
  2. checkpoint-1000/model.safetensors +3 -0
  3. checkpoint-1000/optimizer.pt +3 -0
  4. checkpoint-1000/rng_state.pth +3 -0
  5. checkpoint-1000/scheduler.pt +3 -0
  6. checkpoint-1000/trainer_state.json +47 -0
  7. checkpoint-1000/training_args.bin +3 -0
  8. checkpoint-1500/config.json +50 -0
  9. checkpoint-1500/model.safetensors +3 -0
  10. checkpoint-1500/optimizer.pt +3 -0
  11. checkpoint-1500/rng_state.pth +3 -0
  12. checkpoint-1500/scheduler.pt +3 -0
  13. checkpoint-1500/trainer_state.json +54 -0
  14. checkpoint-1500/training_args.bin +3 -0
  15. checkpoint-2000/config.json +50 -0
  16. checkpoint-2000/model.safetensors +3 -0
  17. checkpoint-2000/optimizer.pt +3 -0
  18. checkpoint-2000/rng_state.pth +3 -0
  19. checkpoint-2000/scheduler.pt +3 -0
  20. checkpoint-2000/trainer_state.json +61 -0
  21. checkpoint-2000/training_args.bin +3 -0
  22. checkpoint-2500/config.json +50 -0
  23. checkpoint-2500/model.safetensors +3 -0
  24. checkpoint-2500/optimizer.pt +3 -0
  25. checkpoint-2500/rng_state.pth +3 -0
  26. checkpoint-2500/scheduler.pt +3 -0
  27. checkpoint-2500/trainer_state.json +68 -0
  28. checkpoint-2500/training_args.bin +3 -0
  29. checkpoint-2901/config.json +50 -0
  30. checkpoint-2901/model.safetensors +3 -0
  31. checkpoint-2901/optimizer.pt +3 -0
  32. checkpoint-2901/rng_state.pth +3 -0
  33. checkpoint-2901/scheduler.pt +3 -0
  34. checkpoint-2901/trainer_state.json +68 -0
  35. checkpoint-2901/training_args.bin +3 -0
  36. checkpoint-500/model.safetensors +1 -1
  37. checkpoint-500/optimizer.pt +1 -1
  38. checkpoint-500/rng_state.pth +1 -1
  39. checkpoint-500/scheduler.pt +1 -1
  40. checkpoint-500/trainer_state.json +7 -7
  41. checkpoint-500/training_args.bin +1 -1
checkpoint-1000/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-multilingual-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "Diverses",
13
+ "1": "Bau",
14
+ "2": "Steuern Finanzen",
15
+ "3": "SBB Verkehr",
16
+ "4": "Living Moving",
17
+ "5": "Ausweise",
18
+ "6": "Kontakt",
19
+ "7": "Newsletter",
20
+ "8": "Tiere",
21
+ "9": "Vereine Busines"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "label2id": {
25
+ "Ausweise": "5",
26
+ "Bau": "1",
27
+ "Diverses": "0",
28
+ "Kontakt": "6",
29
+ "Living Moving": "4",
30
+ "Newsletter": "7",
31
+ "SBB Verkehr": "3",
32
+ "Steuern Finanzen": "2",
33
+ "Tiere": "8",
34
+ "Vereine Busines": "9"
35
+ },
36
+ "max_position_embeddings": 512,
37
+ "model_type": "distilbert",
38
+ "n_heads": 12,
39
+ "n_layers": 6,
40
+ "output_past": true,
41
+ "pad_token_id": 0,
42
+ "problem_type": "single_label_classification",
43
+ "qa_dropout": 0.1,
44
+ "seq_classif_dropout": 0.2,
45
+ "sinusoidal_pos_embds": false,
46
+ "tie_weights_": true,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.49.0",
49
+ "vocab_size": 119547
50
+ }
checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c310f6b53cd09e9a7cf60f7e2a73a9507d23a87a2ad3689e413d9f845f926091
3
+ size 541341984
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6902649a4d25559dc28f73c87b36fe477420775afa1baff5bded7173e499dba7
3
+ size 1082746042
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91e4a3878be61db94decb268c93b6db245495aa6dd7cd695f4e8977e34757871
3
+ size 14244
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb7096f51f7fe636183113ecd99621958f06feb8078fab0f07288854f035563
3
+ size 1064
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0341261633919339,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5170630816959669,
13
+ "grad_norm": 0.07793747633695602,
14
+ "learning_rate": 4.138228197173389e-05,
15
+ "loss": 0.5111,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0341261633919339,
20
+ "grad_norm": 0.1947256475687027,
21
+ "learning_rate": 3.276456394346777e-05,
22
+ "loss": 0.0911,
23
+ "step": 1000
24
+ }
25
+ ],
26
+ "logging_steps": 500,
27
+ "max_steps": 2901,
28
+ "num_input_tokens_seen": 0,
29
+ "num_train_epochs": 3,
30
+ "save_steps": 500,
31
+ "stateful_callbacks": {
32
+ "TrainerControl": {
33
+ "args": {
34
+ "should_epoch_stop": false,
35
+ "should_evaluate": false,
36
+ "should_log": false,
37
+ "should_save": true,
38
+ "should_training_stop": false
39
+ },
40
+ "attributes": {}
41
+ }
42
+ },
43
+ "total_flos": 1059095463014400.0,
44
+ "train_batch_size": 8,
45
+ "trial_name": null,
46
+ "trial_params": null
47
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
3
+ size 5304
checkpoint-1500/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-multilingual-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "Diverses",
13
+ "1": "Bau",
14
+ "2": "Steuern Finanzen",
15
+ "3": "SBB Verkehr",
16
+ "4": "Living Moving",
17
+ "5": "Ausweise",
18
+ "6": "Kontakt",
19
+ "7": "Newsletter",
20
+ "8": "Tiere",
21
+ "9": "Vereine Busines"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "label2id": {
25
+ "Ausweise": "5",
26
+ "Bau": "1",
27
+ "Diverses": "0",
28
+ "Kontakt": "6",
29
+ "Living Moving": "4",
30
+ "Newsletter": "7",
31
+ "SBB Verkehr": "3",
32
+ "Steuern Finanzen": "2",
33
+ "Tiere": "8",
34
+ "Vereine Busines": "9"
35
+ },
36
+ "max_position_embeddings": 512,
37
+ "model_type": "distilbert",
38
+ "n_heads": 12,
39
+ "n_layers": 6,
40
+ "output_past": true,
41
+ "pad_token_id": 0,
42
+ "problem_type": "single_label_classification",
43
+ "qa_dropout": 0.1,
44
+ "seq_classif_dropout": 0.2,
45
+ "sinusoidal_pos_embds": false,
46
+ "tie_weights_": true,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.49.0",
49
+ "vocab_size": 119547
50
+ }
checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c2177f9065876f3427b822be0441d8cee99e4d044e0bcba5857a27ff70fe3b0
3
+ size 541341984
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba174cbeb794b3831391b9c0ed3103d7e929b88f5f961793354401071dfdcb80
3
+ size 1082746042
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4074593b796aacaad21e5712f624c0a7ff11d8d686144821e03175c218a2e71e
3
+ size 14244
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:088125f0fea80fe70ff2e523109f7063ed52a22b63d9fd81290043f690dfdd23
3
+ size 1064
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.5511892450879006,
5
+ "eval_steps": 500,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5170630816959669,
13
+ "grad_norm": 0.07793747633695602,
14
+ "learning_rate": 4.138228197173389e-05,
15
+ "loss": 0.5111,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0341261633919339,
20
+ "grad_norm": 0.1947256475687027,
21
+ "learning_rate": 3.276456394346777e-05,
22
+ "loss": 0.0911,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.5511892450879006,
27
+ "grad_norm": 0.8741845488548279,
28
+ "learning_rate": 2.4146845915201654e-05,
29
+ "loss": 0.0476,
30
+ "step": 1500
31
+ }
32
+ ],
33
+ "logging_steps": 500,
34
+ "max_steps": 2901,
35
+ "num_input_tokens_seen": 0,
36
+ "num_train_epochs": 3,
37
+ "save_steps": 500,
38
+ "stateful_callbacks": {
39
+ "TrainerControl": {
40
+ "args": {
41
+ "should_epoch_stop": false,
42
+ "should_evaluate": false,
43
+ "should_log": false,
44
+ "should_save": true,
45
+ "should_training_stop": false
46
+ },
47
+ "attributes": {}
48
+ }
49
+ },
50
+ "total_flos": 1589040653414400.0,
51
+ "train_batch_size": 8,
52
+ "trial_name": null,
53
+ "trial_params": null
54
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
3
+ size 5304
checkpoint-2000/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-multilingual-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "Diverses",
13
+ "1": "Bau",
14
+ "2": "Steuern Finanzen",
15
+ "3": "SBB Verkehr",
16
+ "4": "Living Moving",
17
+ "5": "Ausweise",
18
+ "6": "Kontakt",
19
+ "7": "Newsletter",
20
+ "8": "Tiere",
21
+ "9": "Vereine Busines"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "label2id": {
25
+ "Ausweise": "5",
26
+ "Bau": "1",
27
+ "Diverses": "0",
28
+ "Kontakt": "6",
29
+ "Living Moving": "4",
30
+ "Newsletter": "7",
31
+ "SBB Verkehr": "3",
32
+ "Steuern Finanzen": "2",
33
+ "Tiere": "8",
34
+ "Vereine Busines": "9"
35
+ },
36
+ "max_position_embeddings": 512,
37
+ "model_type": "distilbert",
38
+ "n_heads": 12,
39
+ "n_layers": 6,
40
+ "output_past": true,
41
+ "pad_token_id": 0,
42
+ "problem_type": "single_label_classification",
43
+ "qa_dropout": 0.1,
44
+ "seq_classif_dropout": 0.2,
45
+ "sinusoidal_pos_embds": false,
46
+ "tie_weights_": true,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.49.0",
49
+ "vocab_size": 119547
50
+ }
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06649425f55b8e481bbe57a42c8314cbfade664bab7557f6262cc21bd6ece677
3
+ size 541341984
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2418a9b17fa7bb48880d94d3f402bf9c085e15a878f46ff2e6a2fbee0c583251
3
+ size 1082746042
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ddc5cce0545d1faeb5ea787dd65e4c185c739f84517931ea5488654960ada1
3
+ size 14244
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c85bada4ac12da9bc05925afb9a1f6da25e734c2de7a5b86454e2f3562456b43
3
+ size 1064
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0682523267838677,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5170630816959669,
13
+ "grad_norm": 0.07793747633695602,
14
+ "learning_rate": 4.138228197173389e-05,
15
+ "loss": 0.5111,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0341261633919339,
20
+ "grad_norm": 0.1947256475687027,
21
+ "learning_rate": 3.276456394346777e-05,
22
+ "loss": 0.0911,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.5511892450879006,
27
+ "grad_norm": 0.8741845488548279,
28
+ "learning_rate": 2.4146845915201654e-05,
29
+ "loss": 0.0476,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 2.0682523267838677,
34
+ "grad_norm": 0.00605323351919651,
35
+ "learning_rate": 1.5529127886935542e-05,
36
+ "loss": 0.0166,
37
+ "step": 2000
38
+ }
39
+ ],
40
+ "logging_steps": 500,
41
+ "max_steps": 2901,
42
+ "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 3,
44
+ "save_steps": 500,
45
+ "stateful_callbacks": {
46
+ "TrainerControl": {
47
+ "args": {
48
+ "should_epoch_stop": false,
49
+ "should_evaluate": false,
50
+ "should_log": false,
51
+ "should_save": true,
52
+ "should_training_stop": false
53
+ },
54
+ "attributes": {}
55
+ }
56
+ },
57
+ "total_flos": 2118190926028800.0,
58
+ "train_batch_size": 8,
59
+ "trial_name": null,
60
+ "trial_params": null
61
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
3
+ size 5304
checkpoint-2500/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-multilingual-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "Diverses",
13
+ "1": "Bau",
14
+ "2": "Steuern Finanzen",
15
+ "3": "SBB Verkehr",
16
+ "4": "Living Moving",
17
+ "5": "Ausweise",
18
+ "6": "Kontakt",
19
+ "7": "Newsletter",
20
+ "8": "Tiere",
21
+ "9": "Vereine Busines"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "label2id": {
25
+ "Ausweise": "5",
26
+ "Bau": "1",
27
+ "Diverses": "0",
28
+ "Kontakt": "6",
29
+ "Living Moving": "4",
30
+ "Newsletter": "7",
31
+ "SBB Verkehr": "3",
32
+ "Steuern Finanzen": "2",
33
+ "Tiere": "8",
34
+ "Vereine Busines": "9"
35
+ },
36
+ "max_position_embeddings": 512,
37
+ "model_type": "distilbert",
38
+ "n_heads": 12,
39
+ "n_layers": 6,
40
+ "output_past": true,
41
+ "pad_token_id": 0,
42
+ "problem_type": "single_label_classification",
43
+ "qa_dropout": 0.1,
44
+ "seq_classif_dropout": 0.2,
45
+ "sinusoidal_pos_embds": false,
46
+ "tie_weights_": true,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.49.0",
49
+ "vocab_size": 119547
50
+ }
checkpoint-2500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0ed38a6688c1cb0c62519347add3f16be5287f6172759ab8afc37195c7d6c2
3
+ size 541341984
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:144218efe1fef1c2c390d64e4f58fbe37d0a01f1de182c503d6e376125010be9
3
+ size 1082746042
checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f221ce4a3c96aefefb7dba819906ba1ef4cb56f2b0cef7daa931324da203554
3
+ size 14244
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3d8d64ac4946b22f89ebeefc20828ff0c8bcf4e81ea931c72d71d6308825f0
3
+ size 1064
checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.5853154084798344,
5
+ "eval_steps": 500,
6
+ "global_step": 2500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5170630816959669,
13
+ "grad_norm": 0.07793747633695602,
14
+ "learning_rate": 4.138228197173389e-05,
15
+ "loss": 0.5111,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0341261633919339,
20
+ "grad_norm": 0.1947256475687027,
21
+ "learning_rate": 3.276456394346777e-05,
22
+ "loss": 0.0911,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.5511892450879006,
27
+ "grad_norm": 0.8741845488548279,
28
+ "learning_rate": 2.4146845915201654e-05,
29
+ "loss": 0.0476,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 2.0682523267838677,
34
+ "grad_norm": 0.00605323351919651,
35
+ "learning_rate": 1.5529127886935542e-05,
36
+ "loss": 0.0166,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 2.5853154084798344,
41
+ "grad_norm": 0.00529734930023551,
42
+ "learning_rate": 6.911409858669425e-06,
43
+ "loss": 0.0221,
44
+ "step": 2500
45
+ }
46
+ ],
47
+ "logging_steps": 500,
48
+ "max_steps": 2901,
49
+ "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 3,
51
+ "save_steps": 500,
52
+ "stateful_callbacks": {
53
+ "TrainerControl": {
54
+ "args": {
55
+ "should_epoch_stop": false,
56
+ "should_evaluate": false,
57
+ "should_log": false,
58
+ "should_save": true,
59
+ "should_training_stop": false
60
+ },
61
+ "attributes": {}
62
+ }
63
+ },
64
+ "total_flos": 2648136116428800.0,
65
+ "train_batch_size": 8,
66
+ "trial_name": null,
67
+ "trial_params": null
68
+ }
checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
3
+ size 5304
checkpoint-2901/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-multilingual-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "Diverses",
13
+ "1": "Bau",
14
+ "2": "Steuern Finanzen",
15
+ "3": "SBB Verkehr",
16
+ "4": "Living Moving",
17
+ "5": "Ausweise",
18
+ "6": "Kontakt",
19
+ "7": "Newsletter",
20
+ "8": "Tiere",
21
+ "9": "Vereine Busines"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "label2id": {
25
+ "Ausweise": "5",
26
+ "Bau": "1",
27
+ "Diverses": "0",
28
+ "Kontakt": "6",
29
+ "Living Moving": "4",
30
+ "Newsletter": "7",
31
+ "SBB Verkehr": "3",
32
+ "Steuern Finanzen": "2",
33
+ "Tiere": "8",
34
+ "Vereine Busines": "9"
35
+ },
36
+ "max_position_embeddings": 512,
37
+ "model_type": "distilbert",
38
+ "n_heads": 12,
39
+ "n_layers": 6,
40
+ "output_past": true,
41
+ "pad_token_id": 0,
42
+ "problem_type": "single_label_classification",
43
+ "qa_dropout": 0.1,
44
+ "seq_classif_dropout": 0.2,
45
+ "sinusoidal_pos_embds": false,
46
+ "tie_weights_": true,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.49.0",
49
+ "vocab_size": 119547
50
+ }
checkpoint-2901/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7718add1e1a61e3f08dbcf84f797c1f1f99848febf8996075f407b05f53743c
3
+ size 541341984
checkpoint-2901/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:340af5f79f889117dc258658bea11da0a16a37b40cf1aa3a50db69c57e1c9248
3
+ size 1082746042
checkpoint-2901/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e96d384380fdf344c6b16fbc86e41a54b92c4b38f643a9ff32e4f9caaa7b246
3
+ size 14244
checkpoint-2901/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36473bdf429db4e999b95bfe8092f2ac7e377b7df3806c8c1371b5694ad1cfba
3
+ size 1064
checkpoint-2901/trainer_state.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2901,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5170630816959669,
13
+ "grad_norm": 0.07793747633695602,
14
+ "learning_rate": 4.138228197173389e-05,
15
+ "loss": 0.5111,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0341261633919339,
20
+ "grad_norm": 0.1947256475687027,
21
+ "learning_rate": 3.276456394346777e-05,
22
+ "loss": 0.0911,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.5511892450879006,
27
+ "grad_norm": 0.8741845488548279,
28
+ "learning_rate": 2.4146845915201654e-05,
29
+ "loss": 0.0476,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 2.0682523267838677,
34
+ "grad_norm": 0.00605323351919651,
35
+ "learning_rate": 1.5529127886935542e-05,
36
+ "loss": 0.0166,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 2.5853154084798344,
41
+ "grad_norm": 0.00529734930023551,
42
+ "learning_rate": 6.911409858669425e-06,
43
+ "loss": 0.0221,
44
+ "step": 2500
45
+ }
46
+ ],
47
+ "logging_steps": 500,
48
+ "max_steps": 2901,
49
+ "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 3,
51
+ "save_steps": 500,
52
+ "stateful_callbacks": {
53
+ "TrainerControl": {
54
+ "args": {
55
+ "should_epoch_stop": false,
56
+ "should_evaluate": false,
57
+ "should_log": false,
58
+ "should_save": true,
59
+ "should_training_stop": true
60
+ },
61
+ "attributes": {}
62
+ }
63
+ },
64
+ "total_flos": 3072357241344000.0,
65
+ "train_batch_size": 8,
66
+ "trial_name": null,
67
+ "trial_params": null
68
+ }
checkpoint-2901/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
3
+ size 5304
checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9acf41bce88d09605f3fb5a1c9124389bdd12f4f6513ed970e601e5c5f9eb19a
3
  size 541341984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00ae06b0c3310779437d30e44571ffe52dfebff275b81a49e2f1348cc9a32d5e
3
  size 541341984
checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc511cf8b59576e64735f4451ba289ed8728b89f3304191df4d73e775fa8543
3
  size 1082746042
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9acce701aee38dba44fe43ebfd5cd674312e969dbd8592c1bcbfa2717aec7869
3
  size 1082746042
checkpoint-500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6d29a9974cf45a3c76c534b0043894d7aaa847d5a5fce754c3a414698716b42
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daa92eb9dbf87d661c2c5d87d885f655d8f4beec107882aa3575b7a43c5f2379
3
  size 14244
checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93754e2ca02042c8abce040086fb00995437b814de831f023a199f3f5495d180
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68d10cb06ea356a076afc72bcb69890b9a3ee0fc15d6134feb701d2cfc90677
3
  size 1064
checkpoint-500/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.202643171806167,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": false,
@@ -9,15 +9,15 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 2.202643171806167,
13
- "grad_norm": 3.462831735610962,
14
- "learning_rate": 1.3289280469897211e-05,
15
- "loss": 0.5364,
16
  "step": 500
17
  }
18
  ],
19
  "logging_steps": 500,
20
- "max_steps": 681,
21
  "num_input_tokens_seen": 0,
22
  "num_train_epochs": 3,
23
  "save_steps": 500,
@@ -33,7 +33,7 @@
33
  "attributes": {}
34
  }
35
  },
36
- "total_flos": 529680217804800.0,
37
  "train_batch_size": 8,
38
  "trial_name": null,
39
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5170630816959669,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.5170630816959669,
13
+ "grad_norm": 0.07793747633695602,
14
+ "learning_rate": 4.138228197173389e-05,
15
+ "loss": 0.5111,
16
  "step": 500
17
  }
18
  ],
19
  "logging_steps": 500,
20
+ "max_steps": 2901,
21
  "num_input_tokens_seen": 0,
22
  "num_train_epochs": 3,
23
  "save_steps": 500,
 
33
  "attributes": {}
34
  }
35
  },
36
+ "total_flos": 529945190400000.0,
37
  "train_batch_size": 8,
38
  "trial_name": null,
39
  "trial_params": null
checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c82899e96a4a3da07fe891ccfac25e58a294f4f9078446dc703903a1a90f3773
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0579d5f17fe16650af48a3029e1540e24f8069a317303f62b822b18ad54759be
3
  size 5304