peterbeamish commited on
Commit
8182378
·
1 Parent(s): 94febd1

Model save

Browse files
Files changed (3) hide show
  1. README.md +10 -10
  2. trainer_state.json +12 -76
  3. training_args.bin +1 -1
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
  This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: nan
21
- - Rouge1: 0.0259
22
- - Rouge2: 0.0047
23
- - Rougel: 0.0256
24
- - Rougelsum: 0.0255
25
- - Gen Len: 17.1436
26
 
27
  ## Model description
28
 
@@ -41,7 +41,7 @@ More information needed
41
  ### Training hyperparameters
42
 
43
  The following hyperparameters were used during training:
44
- - learning_rate: 2e-05
45
  - train_batch_size: 16
46
  - eval_batch_size: 16
47
  - seed: 42
@@ -53,10 +53,10 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
56
- | No log | 1.0 | 252 | nan | 0.0259 | 0.0047 | 0.0256 | 0.0255 | 17.1436 |
57
- | 0.0 | 2.0 | 504 | nan | 0.0259 | 0.0047 | 0.0256 | 0.0255 | 17.1436 |
58
- | 0.0 | 3.0 | 756 | nan | 0.0259 | 0.0047 | 0.0256 | 0.0255 | 17.1436 |
59
- | 0.0 | 4.0 | 1008 | nan | 0.0259 | 0.0047 | 0.0256 | 0.0255 | 17.1436 |
60
 
61
 
62
  ### Framework versions
 
18
  This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: nan
21
+ - Rouge1: 0.04
22
+ - Rouge2: 0.0
23
+ - Rougel: 0.04
24
+ - Rougelsum: 0.04
25
+ - Gen Len: 18.4
26
 
27
  ## Model description
28
 
 
41
  ### Training hyperparameters
42
 
43
  The following hyperparameters were used during training:
44
+ - learning_rate: 5e-05
45
  - train_batch_size: 16
46
  - eval_batch_size: 16
47
  - seed: 42
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
56
+ | No log | 1.0 | 1 | nan | 0.04 | 0.0 | 0.04 | 0.04 | 18.4 |
57
+ | No log | 2.0 | 2 | nan | 0.04 | 0.0 | 0.04 | 0.04 | 18.4 |
58
+ | No log | 3.0 | 3 | nan | 0.04 | 0.0 | 0.04 | 0.04 | 18.4 |
59
+ | No log | 4.0 | 4 | nan | 0.04 | 0.0 | 0.04 | 0.04 | 18.4 |
60
 
61
 
62
  ### Framework versions
trainer_state.json CHANGED
@@ -1,92 +1,28 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
- "eval_steps": 500,
6
- "global_step": 1008,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 1.0,
13
- "eval_gen_len": 17.1436,
14
- "eval_loss": NaN,
15
- "eval_rouge1": 0.0259,
16
- "eval_rouge2": 0.0047,
17
- "eval_rougeL": 0.0256,
18
- "eval_rougeLsum": 0.0255,
19
- "eval_runtime": 105.9672,
20
- "eval_samples_per_second": 38.05,
21
- "eval_steps_per_second": 2.378,
22
- "step": 252
23
- },
24
- {
25
- "epoch": 1.98,
26
- "learning_rate": 2e-05,
27
- "loss": 0.0,
28
- "step": 500
29
- },
30
- {
31
- "epoch": 2.0,
32
- "eval_gen_len": 17.1436,
33
- "eval_loss": NaN,
34
- "eval_rouge1": 0.0259,
35
- "eval_rouge2": 0.0047,
36
- "eval_rougeL": 0.0256,
37
- "eval_rougeLsum": 0.0255,
38
- "eval_runtime": 106.6812,
39
- "eval_samples_per_second": 37.795,
40
- "eval_steps_per_second": 2.362,
41
- "step": 504
42
- },
43
  {
44
  "epoch": 3.0,
45
- "eval_gen_len": 17.1436,
46
- "eval_loss": NaN,
47
- "eval_rouge1": 0.0259,
48
- "eval_rouge2": 0.0047,
49
- "eval_rougeL": 0.0256,
50
- "eval_rougeLsum": 0.0255,
51
- "eval_runtime": 108.1549,
52
- "eval_samples_per_second": 37.28,
53
- "eval_steps_per_second": 2.33,
54
- "step": 756
55
- },
56
- {
57
- "epoch": 3.97,
58
- "learning_rate": 2e-05,
59
- "loss": 0.0,
60
- "step": 1000
61
- },
62
- {
63
- "epoch": 4.0,
64
- "eval_gen_len": 17.1436,
65
- "eval_loss": NaN,
66
- "eval_rouge1": 0.0259,
67
- "eval_rouge2": 0.0047,
68
- "eval_rougeL": 0.0256,
69
- "eval_rougeLsum": 0.0255,
70
- "eval_runtime": 109.2088,
71
- "eval_samples_per_second": 36.92,
72
- "eval_steps_per_second": 2.308,
73
- "step": 1008
74
- },
75
- {
76
- "epoch": 4.0,
77
- "step": 1008,
78
- "total_flos": 2997298497847296.0,
79
  "train_loss": 0.0,
80
- "train_runtime": 577.5801,
81
- "train_samples_per_second": 27.916,
82
- "train_steps_per_second": 1.745
83
  }
84
  ],
85
- "logging_steps": 500,
86
- "max_steps": 1008,
87
- "num_train_epochs": 4,
88
  "save_steps": 500,
89
- "total_flos": 2997298497847296.0,
90
  "trial_name": null,
91
  "trial_params": null
92
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 8000,
6
+ "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 3.0,
13
+ "step": 15,
14
+ "total_flos": 5576715141120.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "train_loss": 0.0,
16
+ "train_runtime": 1.4301,
17
+ "train_samples_per_second": 20.977,
18
+ "train_steps_per_second": 10.489
19
  }
20
  ],
21
+ "logging_steps": 1000,
22
+ "max_steps": 15,
23
+ "num_train_epochs": 3,
24
  "save_steps": 500,
25
+ "total_flos": 5576715141120.0,
26
  "trial_name": null,
27
  "trial_params": null
28
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d310d374aa2849a37e78202fd9915ecc4961d20fb6d34fdf96221cedd54b02e
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9f74c9e705ad2deec5fc5ba4128d1061376466dc7dc6641b85cda934250fba4
3
  size 4155