lilyray commited on
Commit
6f33b4d
·
verified ·
1 Parent(s): 4522022

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:485040adadc4df794c682acb342a777cc1487a106aa332a6f9bbc3da47b2da3c
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1312452b074c42529308bf096701b8d540d3c2d5bf954ff9e867b3a6d6657c85
3
  size 267832560
run-0/checkpoint-1432/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "vocab_size": 30522
25
+ }
run-0/checkpoint-1432/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb856f110c71a538885c02f2cd792aaa30e4cc15d299efaa4dc146abd9cb2832
3
+ size 267832560
run-0/checkpoint-1432/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55779307880b55bb2f0623f1c34649ef6c88e081993f79d1aff6dcf1754cfcc3
3
+ size 535727290
run-0/checkpoint-1432/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4af46db25777f8205e06802b2fbf6310028acd288a751c232e877a9be58c46c
3
+ size 14308
run-0/checkpoint-1432/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c23b0c45f8e2b75b0f4308b33b6868f7d32d540a591b3b9ec91e85f3b0e057
3
+ size 1064
run-0/checkpoint-1432/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-1432/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-1432/trainer_state.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6083901524543762,
3
+ "best_model_checkpoint": "./results/run-0/checkpoint-358",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1432,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.6722222222222222,
14
+ "eval_loss": 0.6083901524543762,
15
+ "eval_runtime": 5.7737,
16
+ "eval_samples_per_second": 165.406,
17
+ "eval_steps_per_second": 20.784,
18
+ "step": 358
19
+ },
20
+ {
21
+ "epoch": 1.4,
22
+ "grad_norm": 8.0650634765625,
23
+ "learning_rate": 1.0536381136099229e-05,
24
+ "loss": 0.6149,
25
+ "step": 500
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "eval_accuracy": 0.6920138888888888,
30
+ "eval_loss": 0.6155831813812256,
31
+ "eval_runtime": 6.4389,
32
+ "eval_samples_per_second": 148.317,
33
+ "eval_steps_per_second": 18.637,
34
+ "step": 716
35
+ },
36
+ {
37
+ "epoch": 2.79,
38
+ "grad_norm": 5.074481010437012,
39
+ "learning_rate": 4.8838161489215315e-06,
40
+ "loss": 0.4408,
41
+ "step": 1000
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "eval_accuracy": 0.6836805555555555,
46
+ "eval_loss": 0.6700696349143982,
47
+ "eval_runtime": 5.7921,
48
+ "eval_samples_per_second": 164.88,
49
+ "eval_steps_per_second": 20.718,
50
+ "step": 1074
51
+ },
52
+ {
53
+ "epoch": 4.0,
54
+ "eval_accuracy": 0.6878472222222222,
55
+ "eval_loss": 0.7763063907623291,
56
+ "eval_runtime": 6.009,
57
+ "eval_samples_per_second": 158.928,
58
+ "eval_steps_per_second": 19.97,
59
+ "step": 1432
60
+ }
61
+ ],
62
+ "logging_steps": 500,
63
+ "max_steps": 1432,
64
+ "num_input_tokens_seen": 0,
65
+ "num_train_epochs": 4,
66
+ "save_steps": 500,
67
+ "total_flos": 1059209319653376.0,
68
+ "train_batch_size": 8,
69
+ "trial_name": null,
70
+ "trial_params": {
71
+ "learning_rate": 1.6188946123276927e-05,
72
+ "num_train_epochs": 4,
73
+ "per_device_train_batch_size": 8,
74
+ "seed": 20
75
+ }
76
+ }
run-0/checkpoint-1432/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756e0c6b01f2dc28da5639bc1c7bcb62246c7526b8bef0298780aea65e098381
3
+ size 4920
run-0/checkpoint-1432/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-358/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acb32e8f6fbc51c89b529efbb995526137d0ccde0b805800ac61d50b8b226398
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1312452b074c42529308bf096701b8d540d3c2d5bf954ff9e867b3a6d6657c85
3
  size 267832560
run-1/checkpoint-358/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cec03e21180ed7b160804a511526c6ac798f2dedee191d716a3e9b72844c2f0
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c9f3c571828e7bd43a825254c3ec3a5b29fa0e05eae8b686eba566a6105df9
3
  size 535727290
run-1/checkpoint-358/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c60a2df6de14c702310760f72492ff50324690863882e62ff3ac70aaf5e5529
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03fbffb5807ef37922e14f44a85e6eeab69e0ade6bec6152f385a1d56f1f8e7c
3
+ size 14244
run-1/checkpoint-358/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ba9c73e709f80e22d05b4cb1e362b57eb04ffe20023f81bd7c5c5dbd5a44e09
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d99530cc4ae1cbb1d5cc5857d9a9572c831a15fbc92252c30c3a92b9af3025e0
3
  size 1064
run-1/checkpoint-358/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.623793363571167,
3
- "best_model_checkpoint": "./results/run-1/checkpoint-179",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 358,
7
  "is_hyper_param_search": true,
@@ -10,35 +10,26 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6246212121212121,
14
- "eval_loss": 0.623793363571167,
15
- "eval_runtime": 5.6017,
16
- "eval_samples_per_second": 170.484,
17
- "eval_steps_per_second": 10.711,
18
- "step": 179
19
- },
20
- {
21
- "epoch": 2.0,
22
- "eval_accuracy": 0.6923295454545454,
23
- "eval_loss": 0.6612856984138489,
24
- "eval_runtime": 5.3912,
25
- "eval_samples_per_second": 177.139,
26
- "eval_steps_per_second": 11.129,
27
  "step": 358
28
  }
29
  ],
30
  "logging_steps": 500,
31
  "max_steps": 716,
32
  "num_input_tokens_seen": 0,
33
- "num_train_epochs": 4,
34
  "save_steps": 500,
35
  "total_flos": 0,
36
- "train_batch_size": 16,
37
  "trial_name": null,
38
  "trial_params": {
39
- "learning_rate": 9.621056977763735e-05,
40
- "num_train_epochs": 4,
41
- "per_device_train_batch_size": 16,
42
- "seed": 17
43
  }
44
  }
 
1
  {
2
+ "best_metric": 0.6688534617424011,
3
+ "best_model_checkpoint": "./results/run-1/checkpoint-358",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
  "global_step": 358,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6211805555555555,
14
+ "eval_loss": 0.6688534617424011,
15
+ "eval_runtime": 5.5824,
16
+ "eval_samples_per_second": 171.075,
17
+ "eval_steps_per_second": 21.496,
 
 
 
 
 
 
 
 
 
18
  "step": 358
19
  }
20
  ],
21
  "logging_steps": 500,
22
  "max_steps": 716,
23
  "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 2,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
+ "train_batch_size": 8,
28
  "trial_name": null,
29
  "trial_params": {
30
+ "learning_rate": 8.514687247224755e-05,
31
+ "num_train_epochs": 2,
32
+ "per_device_train_batch_size": 8,
33
+ "seed": 40
34
  }
35
  }
run-1/checkpoint-358/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dd0a6f82f44ad0806dfecfa060c04182bf3daf51bc9b7474a1711e44c162602
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb08b78a879b2a9b7abbbaf6e8ab5cc5cddbc616d40fcf027dd248408bd13c4e
3
  size 4920
run-1/checkpoint-716/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.623793363571167,
3
- "best_model_checkpoint": "./results/run-1/checkpoint-179",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 716,
7
  "is_hyper_param_search": true,
@@ -10,60 +10,42 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6246212121212121,
14
- "eval_loss": 0.623793363571167,
15
- "eval_runtime": 5.6017,
16
- "eval_samples_per_second": 170.484,
17
- "eval_steps_per_second": 10.711,
18
- "step": 179
19
- },
20
- {
21
- "epoch": 2.0,
22
- "eval_accuracy": 0.6923295454545454,
23
- "eval_loss": 0.6612856984138489,
24
- "eval_runtime": 5.3912,
25
- "eval_samples_per_second": 177.139,
26
- "eval_steps_per_second": 11.129,
27
  "step": 358
28
  },
29
  {
30
- "epoch": 2.79,
31
- "grad_norm": 3.6779439449310303,
32
- "learning_rate": 2.902441769828166e-05,
33
- "loss": 0.431,
34
  "step": 500
35
  },
36
  {
37
- "epoch": 3.0,
38
- "eval_accuracy": 0.6544507575757575,
39
- "eval_loss": 1.3626810312271118,
40
- "eval_runtime": 5.6448,
41
- "eval_samples_per_second": 169.184,
42
- "eval_steps_per_second": 10.629,
43
- "step": 537
44
- },
45
- {
46
- "epoch": 4.0,
47
- "eval_accuracy": 0.6836174242424242,
48
- "eval_loss": 1.5171432495117188,
49
- "eval_runtime": 5.46,
50
- "eval_samples_per_second": 174.907,
51
- "eval_steps_per_second": 10.989,
52
  "step": 716
53
  }
54
  ],
55
  "logging_steps": 500,
56
  "max_steps": 716,
57
  "num_input_tokens_seen": 0,
58
- "num_train_epochs": 4,
59
  "save_steps": 500,
60
- "total_flos": 1059209319653376.0,
61
- "train_batch_size": 16,
62
  "trial_name": null,
63
  "trial_params": {
64
- "learning_rate": 9.621056977763735e-05,
65
- "num_train_epochs": 4,
66
- "per_device_train_batch_size": 16,
67
- "seed": 17
68
  }
69
  }
 
1
  {
2
+ "best_metric": 0.6688534617424011,
3
+ "best_model_checkpoint": "./results/run-1/checkpoint-358",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 716,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6211805555555555,
14
+ "eval_loss": 0.6688534617424011,
15
+ "eval_runtime": 5.5824,
16
+ "eval_samples_per_second": 171.075,
17
+ "eval_steps_per_second": 21.496,
 
 
 
 
 
 
 
 
 
18
  "step": 358
19
  },
20
  {
21
+ "epoch": 1.4,
22
+ "grad_norm": 5.350462436676025,
23
+ "learning_rate": 2.5686765997214346e-05,
24
+ "loss": 0.6185,
25
  "step": 500
26
  },
27
  {
28
+ "epoch": 2.0,
29
+ "eval_accuracy": 0.6732638888888888,
30
+ "eval_loss": 0.684463620185852,
31
+ "eval_runtime": 6.2186,
32
+ "eval_samples_per_second": 153.57,
33
+ "eval_steps_per_second": 19.297,
 
 
 
 
 
 
 
 
 
34
  "step": 716
35
  }
36
  ],
37
  "logging_steps": 500,
38
  "max_steps": 716,
39
  "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 2,
41
  "save_steps": 500,
42
+ "total_flos": 529604659826688.0,
43
+ "train_batch_size": 8,
44
  "trial_name": null,
45
  "trial_params": {
46
+ "learning_rate": 8.514687247224755e-05,
47
+ "num_train_epochs": 2,
48
+ "per_device_train_batch_size": 8,
49
+ "seed": 40
50
  }
51
  }
runs/Mar09_20-46-55_12ef847df42e/events.out.tfevents.1710017918.12ef847df42e.11821.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:848c1d7a9c280a95f9ee01d919f6843e26fcd8a537e50d6c0500c313fe6f7baa
3
+ size 5649
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:756e0c6b01f2dc28da5639bc1c7bcb62246c7526b8bef0298780aea65e098381
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb08b78a879b2a9b7abbbaf6e8ab5cc5cddbc616d40fcf027dd248408bd13c4e
3
  size 4920