ShengdingHu commited on
Commit
e0dc0d9
·
1 Parent(s): 865446e

Training in progress, step 100

Browse files
all_results.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 85.71428571428571,
4
- "eval_average_metrics": 73.0,
5
- "eval_f1_multiclass": 60.285714285714285,
6
- "eval_loss": 0.18336069583892822,
7
- "eval_runtime": 0.35,
8
- "eval_samples_per_second": 80.007,
9
- "test_accuracy": 78.57142857142857,
10
- "test_average_metrics": 66.48351648351648,
11
- "test_f1_multiclass": 54.395604395604394,
12
- "test_loss": 0.2799363434314728,
13
- "test_runtime": 0.3233,
14
- "test_samples_per_second": 86.617,
15
- "train_loss": 0.5726573467254639,
16
- "train_runtime": 63.2805,
17
  "train_samples": 250,
18
- "train_samples_per_second": 79.013,
19
- "train_steps_per_second": 2.528
20
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 100.0,
4
+ "eval_average_metrics": 100.0,
5
+ "eval_f1_multiclass": 100.0,
6
+ "eval_loss": 0.007105956319719553,
7
+ "eval_runtime": 0.2511,
8
+ "eval_samples_per_second": 111.49,
9
+ "test_accuracy": 82.14285714285714,
10
+ "test_average_metrics": 76.00732600732601,
11
+ "test_f1_multiclass": 69.87179487179488,
12
+ "test_loss": 0.1506887525320053,
13
+ "test_runtime": 0.2093,
14
+ "test_samples_per_second": 133.792,
15
+ "train_loss": 0.19091113805770873,
16
+ "train_runtime": 58.4291,
17
  "train_samples": 250,
18
+ "train_samples_per_second": 85.574,
19
+ "train_steps_per_second": 2.738
20
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 85.71428571428571,
4
- "eval_average_metrics": 73.0,
5
- "eval_f1_multiclass": 60.285714285714285,
6
- "eval_loss": 0.18336069583892822,
7
- "eval_runtime": 0.35,
8
- "eval_samples_per_second": 80.007
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 100.0,
4
+ "eval_average_metrics": 100.0,
5
+ "eval_f1_multiclass": 100.0,
6
+ "eval_loss": 0.007105956319719553,
7
+ "eval_runtime": 0.2511,
8
+ "eval_samples_per_second": 111.49
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0573c78cbc10c56dd04e51ad3381b15ae1bb52835f7a90be1fd2daa19a59947e
3
- size 7551621
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea14cf46ad1619ee1ef08e0ad3fd81cec57e290129f14d4585b6a468f1f0bd36
3
+ size 2631685
runs/Feb01_02-59-20_node2/events.out.tfevents.1643655675.node2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96fde66482d741bcfe90036e27c50a26b7a2302a2d15fbe9abfb70f670a40b1a
3
- size 4324
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b63714958fb6afa0b43c78c085d6bc30bc7f8d6ce3c2cef0174e13548e430044
3
+ size 5050
runs/Feb01_02-59-20_node2/events.out.tfevents.1643655734.node2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d412c355b5d78bcd746aa140c413f9f9c3408d1f12f4ec20b3c48319366a6036
3
+ size 798
runs/Feb02_16-19-40_node1/1643790011.7594028/events.out.tfevents.1643790011.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2eec56d84bc60fca26bf5ca6fc935bf39a6628c8eb25fb7889c3db7cc9bb069
3
+ size 5035
runs/Feb02_16-19-40_node1/events.out.tfevents.1643790011.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d9109a7c681983ea1d1823d23c916baf9d2d0778ae35e13ff774d9516b83166
3
+ size 4324
test_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "test_accuracy": 78.57142857142857,
4
- "test_average_metrics": 66.48351648351648,
5
- "test_f1_multiclass": 54.395604395604394,
6
- "test_loss": 0.2799363434314728,
7
- "test_runtime": 0.3233,
8
- "test_samples_per_second": 86.617
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "test_accuracy": 82.14285714285714,
4
+ "test_average_metrics": 76.00732600732601,
5
+ "test_f1_multiclass": 69.87179487179488,
6
+ "test_loss": 0.1506887525320053,
7
+ "test_runtime": 0.2093,
8
+ "test_samples_per_second": 133.792
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.5726573467254639,
4
- "train_runtime": 63.2805,
5
  "train_samples": 250,
6
- "train_samples_per_second": 79.013,
7
- "train_steps_per_second": 2.528
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.19091113805770873,
4
+ "train_runtime": 58.4291,
5
  "train_samples": 250,
6
+ "train_samples_per_second": 85.574,
7
+ "train_steps_per_second": 2.738
8
  }
trainer_state.json CHANGED
@@ -1,25 +1,35 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
  "epoch": 20.0,
5
  "global_step": 160,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 20.0,
12
  "step": 160,
13
- "total_flos": 1311789053515248.0,
14
- "train_loss": 0.5726573467254639,
15
- "train_runtime": 63.2805,
16
- "train_samples_per_second": 79.013,
17
- "train_steps_per_second": 2.528
18
  }
19
  ],
20
  "max_steps": 160,
21
  "num_train_epochs": 20,
22
- "total_flos": 1311789053515248.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
 
1
  {
2
+ "best_metric": 100.0,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/superglue-cb/checkpoint-100",
4
  "epoch": 20.0,
5
  "global_step": 160,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 12.5,
12
+ "eval_accuracy": 100.0,
13
+ "eval_average_metrics": 100.0,
14
+ "eval_f1_multiclass": 100.0,
15
+ "eval_loss": 0.007105956319719553,
16
+ "eval_runtime": 0.2475,
17
+ "eval_samples_per_second": 113.147,
18
+ "step": 100
19
+ },
20
  {
21
  "epoch": 20.0,
22
  "step": 160,
23
+ "total_flos": 1322112125892096.0,
24
+ "train_loss": 0.19091113805770873,
25
+ "train_runtime": 58.4291,
26
+ "train_samples_per_second": 85.574,
27
+ "train_steps_per_second": 2.738
28
  }
29
  ],
30
  "max_steps": 160,
31
  "num_train_epochs": 20,
32
+ "total_flos": 1322112125892096.0,
33
  "trial_name": null,
34
  "trial_params": null
35
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77ed515ec8421abcec58cda3fc06acdc764638bd9430acc9cf100024288e02aa
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4011332cc5f8c28977c05caffbccdc5053455cedcf074b3a5f9c7bae07772739
3
  size 3183
training_config.json CHANGED
@@ -1 +1 @@
1
- {"bottleneck_dim": 24, "dataset_config_name": ["en"], "delta_type": "adapter", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "superglue-cb", "eval_steps": 100, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 256, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/superglue-cb", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 100, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "superglue-cb", "test_dataset_config_name": ["en"], "test_dataset_name": "superglue-cb", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}
 
1
+ {"dataset_config_name": ["en"], "delta_type": "lora", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "superglue-cb", "eval_steps": 100, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "lora_r": 8, "max_source_length": 256, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/superglue-cb", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 100, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "superglue-cb", "test_dataset_config_name": ["en"], "test_dataset_name": "superglue-cb", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}