danield12 commited on
Commit
32027ee
·
verified ·
1 Parent(s): 57f1ae4

Upload folder using huggingface_hub

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83e01fc1d25dedc753f4825dabab388ed7180db6815fc7516b723efdbd0fc2c1
3
  size 160086542
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d661b5d8e28c6286c4f688153e59f25326218295967aead1da1fcca840dd8cde
3
  size 160086542
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 0.99,
3
- "eval_accuracy": 0.78,
4
- "eval_f1_score": 0.7760000000000001,
5
- "eval_gmean": 0.7929678856251685,
6
- "eval_loss": 0.5592187643051147,
7
- "eval_precision": 0.82625,
8
- "eval_recall": 0.78,
9
- "eval_runtime": 111.3804,
10
- "eval_samples_per_second": 0.449,
11
- "eval_steps_per_second": 0.063,
12
- "total_flos": 8.453461911404544e+16,
13
- "train_loss": 0.6594164299242424,
14
- "train_runtime": 12749.8583,
15
- "train_samples_per_second": 0.336,
16
- "train_steps_per_second": 0.003
17
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.72,
4
+ "eval_f1_score": 0.7154911433172302,
5
+ "eval_gmean": 0.7399824497743348,
6
+ "eval_loss": 0.6498633027076721,
7
+ "eval_precision": 0.7727777777777777,
8
+ "eval_recall": 0.72,
9
+ "eval_runtime": 137.6618,
10
+ "eval_samples_per_second": 0.363,
11
+ "eval_steps_per_second": 0.051,
12
+ "total_flos": 8.468389371174912e+16,
13
+ "train_loss": 0.9084581972947762,
14
+ "train_runtime": 14096.0939,
15
+ "train_samples_per_second": 0.304,
16
+ "train_steps_per_second": 0.005
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 0.99,
3
- "eval_accuracy": 0.78,
4
- "eval_f1_score": 0.7760000000000001,
5
- "eval_gmean": 0.7929678856251685,
6
- "eval_loss": 0.5592187643051147,
7
- "eval_precision": 0.82625,
8
- "eval_recall": 0.78,
9
- "eval_runtime": 111.3804,
10
- "eval_samples_per_second": 0.449,
11
- "eval_steps_per_second": 0.063
12
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.72,
4
+ "eval_f1_score": 0.7154911433172302,
5
+ "eval_gmean": 0.7399824497743348,
6
+ "eval_loss": 0.6498633027076721,
7
+ "eval_precision": 0.7727777777777777,
8
+ "eval_recall": 0.72,
9
+ "eval_runtime": 137.6618,
10
+ "eval_samples_per_second": 0.363,
11
+ "eval_steps_per_second": 0.051
12
  }
runs/Apr01_20-59-31_966bae07cabd/events.out.tfevents.1712005173.966bae07cabd.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2af5e154e7787b72185f5f03a708d8ef21c994033ded01a0f686e55b9b7fad7
3
+ size 7948
runs/Apr01_20-59-31_966bae07cabd/events.out.tfevents.1712019406.966bae07cabd.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45e57e67436a7e93efa978b3043c3c93592f8f2660f789f90daec17a59b5e40
3
+ size 605
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.99,
3
- "total_flos": 8.453461911404544e+16,
4
- "train_loss": 0.6594164299242424,
5
- "train_runtime": 12749.8583,
6
- "train_samples_per_second": 0.336,
7
- "train_steps_per_second": 0.003
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "total_flos": 8.468389371174912e+16,
4
+ "train_loss": 0.9084581972947762,
5
+ "train_runtime": 14096.0939,
6
+ "train_samples_per_second": 0.304,
7
+ "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -1,71 +1,112 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9850746268656716,
5
  "eval_steps": 28,
6
- "global_step": 33,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.24,
13
- "grad_norm": 19.75,
14
- "learning_rate": 1.6129032258064517e-05,
15
- "loss": 0.7026,
16
  "step": 8
17
  },
18
  {
19
- "epoch": 0.48,
20
- "grad_norm": 14.875,
21
- "learning_rate": 1.096774193548387e-05,
22
- "loss": 0.6661,
23
  "step": 16
24
  },
25
  {
26
- "epoch": 0.72,
27
- "grad_norm": 18.375,
28
- "learning_rate": 5.806451612903226e-06,
29
- "loss": 0.6407,
30
  "step": 24
31
  },
32
  {
33
- "epoch": 0.84,
34
- "eval_accuracy": 0.78,
35
- "eval_f1_score": 0.7760000000000001,
36
- "eval_gmean": 0.7929678856251685,
37
- "eval_loss": 0.5600000023841858,
38
- "eval_precision": 0.82625,
39
- "eval_recall": 0.78,
40
- "eval_runtime": 108.8057,
41
- "eval_samples_per_second": 0.46,
42
- "eval_steps_per_second": 0.064,
43
  "step": 28
44
  },
45
  {
46
- "epoch": 0.96,
47
- "grad_norm": 16.25,
48
- "learning_rate": 6.451612903225807e-07,
49
- "loss": 0.6313,
50
  "step": 32
51
  },
52
  {
53
- "epoch": 0.99,
54
- "step": 33,
55
- "total_flos": 8.453461911404544e+16,
56
- "train_loss": 0.6594164299242424,
57
- "train_runtime": 12749.8583,
58
- "train_samples_per_second": 0.336,
59
- "train_steps_per_second": 0.003
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  }
61
  ],
62
  "logging_steps": 8,
63
- "max_steps": 33,
64
  "num_input_tokens_seen": 0,
65
  "num_train_epochs": 1,
66
  "save_steps": 500,
67
- "total_flos": 8.453461911404544e+16,
68
- "train_batch_size": 32,
69
  "trial_name": null,
70
  "trial_params": null
71
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 28,
6
+ "global_step": 67,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.12,
13
+ "grad_norm": 47.25,
14
+ "learning_rate": 1.8153846153846155e-05,
15
+ "loss": 1.1395,
16
  "step": 8
17
  },
18
  {
19
+ "epoch": 0.24,
20
+ "grad_norm": 39.75,
21
+ "learning_rate": 1.5692307692307693e-05,
22
+ "loss": 1.0434,
23
  "step": 16
24
  },
25
  {
26
+ "epoch": 0.36,
27
+ "grad_norm": 42.0,
28
+ "learning_rate": 1.3230769230769231e-05,
29
+ "loss": 0.8923,
30
  "step": 24
31
  },
32
  {
33
+ "epoch": 0.42,
34
+ "eval_accuracy": 0.66,
35
+ "eval_f1_score": 0.6473765809873521,
36
+ "eval_gmean": 0.6861694917646597,
37
+ "eval_loss": 0.692578136920929,
38
+ "eval_precision": 0.7367619047619047,
39
+ "eval_recall": 0.66,
40
+ "eval_runtime": 137.4001,
41
+ "eval_samples_per_second": 0.364,
42
+ "eval_steps_per_second": 0.051,
43
  "step": 28
44
  },
45
  {
46
+ "epoch": 0.48,
47
+ "grad_norm": 41.5,
48
+ "learning_rate": 1.076923076923077e-05,
49
+ "loss": 0.8968,
50
  "step": 32
51
  },
52
  {
53
+ "epoch": 0.6,
54
+ "grad_norm": 40.75,
55
+ "learning_rate": 8.307692307692309e-06,
56
+ "loss": 0.8597,
57
+ "step": 40
58
+ },
59
+ {
60
+ "epoch": 0.72,
61
+ "grad_norm": 29.0,
62
+ "learning_rate": 5.846153846153847e-06,
63
+ "loss": 0.8077,
64
+ "step": 48
65
+ },
66
+ {
67
+ "epoch": 0.84,
68
+ "grad_norm": 37.75,
69
+ "learning_rate": 3.384615384615385e-06,
70
+ "loss": 0.8601,
71
+ "step": 56
72
+ },
73
+ {
74
+ "epoch": 0.84,
75
+ "eval_accuracy": 0.72,
76
+ "eval_f1_score": 0.7154911433172302,
77
+ "eval_gmean": 0.7399824497743348,
78
+ "eval_loss": 0.6498633027076721,
79
+ "eval_precision": 0.7727777777777777,
80
+ "eval_recall": 0.72,
81
+ "eval_runtime": 137.4306,
82
+ "eval_samples_per_second": 0.364,
83
+ "eval_steps_per_second": 0.051,
84
+ "step": 56
85
+ },
86
+ {
87
+ "epoch": 0.96,
88
+ "grad_norm": 39.25,
89
+ "learning_rate": 9.230769230769232e-07,
90
+ "loss": 0.8283,
91
+ "step": 64
92
+ },
93
+ {
94
+ "epoch": 1.0,
95
+ "step": 67,
96
+ "total_flos": 8.468389371174912e+16,
97
+ "train_loss": 0.9084581972947762,
98
+ "train_runtime": 14096.0939,
99
+ "train_samples_per_second": 0.304,
100
+ "train_steps_per_second": 0.005
101
  }
102
  ],
103
  "logging_steps": 8,
104
+ "max_steps": 67,
105
  "num_input_tokens_seen": 0,
106
  "num_train_epochs": 1,
107
  "save_steps": 500,
108
+ "total_flos": 8.468389371174912e+16,
109
+ "train_batch_size": 16,
110
  "trial_name": null,
111
  "trial_params": null
112
  }