File size: 2,695 Bytes
0f92b28
 
 
f00b7cc
57f1ae4
f00b7cc
0f92b28
 
 
 
694a3e4
f00b7cc
 
 
 
57f1ae4
694a3e4
 
f00b7cc
 
 
 
181a7d9
 
 
f00b7cc
 
 
 
181a7d9
 
 
f00b7cc
 
 
 
 
 
 
 
 
 
181a7d9
 
 
f00b7cc
 
 
 
181a7d9
 
 
f00b7cc
 
 
 
181a7d9
 
 
f00b7cc
 
 
 
181a7d9
 
 
f00b7cc
 
 
 
181a7d9
 
 
f00b7cc
 
 
 
 
 
 
 
 
 
181a7d9
 
 
f00b7cc
 
 
 
181a7d9
 
 
 
f00b7cc
 
 
 
 
 
0f92b28
 
 
f00b7cc
0f92b28
181a7d9
0f92b28
f00b7cc
 
0f92b28
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 28,
  "global_step": 67,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.12,
      "grad_norm": 9.4375,
      "learning_rate": 1.8153846153846155e-05,
      "loss": 0.4282,
      "step": 8
    },
    {
      "epoch": 0.24,
      "grad_norm": 10.5,
      "learning_rate": 1.5692307692307693e-05,
      "loss": 0.4335,
      "step": 16
    },
    {
      "epoch": 0.36,
      "grad_norm": 5.03125,
      "learning_rate": 1.3230769230769231e-05,
      "loss": 0.3984,
      "step": 24
    },
    {
      "epoch": 0.42,
      "eval_accuracy": 0.8166666666666667,
      "eval_f1_score": 0.8080459770114942,
      "eval_gmean": 0.7501820727030913,
      "eval_loss": 0.5816406011581421,
      "eval_precision": 0.8133022774327122,
      "eval_recall": 0.8166666666666667,
      "eval_runtime": 175.4354,
      "eval_samples_per_second": 0.342,
      "eval_steps_per_second": 0.046,
      "step": 28
    },
    {
      "epoch": 0.48,
      "grad_norm": 6.625,
      "learning_rate": 1.076923076923077e-05,
      "loss": 0.3979,
      "step": 32
    },
    {
      "epoch": 0.6,
      "grad_norm": 7.875,
      "learning_rate": 8.307692307692309e-06,
      "loss": 0.3657,
      "step": 40
    },
    {
      "epoch": 0.72,
      "grad_norm": 10.0,
      "learning_rate": 5.846153846153847e-06,
      "loss": 0.38,
      "step": 48
    },
    {
      "epoch": 0.84,
      "grad_norm": 9.6875,
      "learning_rate": 3.384615384615385e-06,
      "loss": 0.4054,
      "step": 56
    },
    {
      "epoch": 0.84,
      "eval_accuracy": 0.8166666666666667,
      "eval_f1_score": 0.8080459770114942,
      "eval_gmean": 0.7501820727030913,
      "eval_loss": 0.5850911736488342,
      "eval_precision": 0.8133022774327122,
      "eval_recall": 0.8166666666666667,
      "eval_runtime": 173.49,
      "eval_samples_per_second": 0.346,
      "eval_steps_per_second": 0.046,
      "step": 56
    },
    {
      "epoch": 0.96,
      "grad_norm": 8.8125,
      "learning_rate": 9.230769230769232e-07,
      "loss": 0.3864,
      "step": 64
    },
    {
      "epoch": 1.0,
      "step": 67,
      "total_flos": 8.435821078904832e+16,
      "train_loss": 0.405215135261194,
      "train_runtime": 12957.5785,
      "train_samples_per_second": 0.33,
      "train_steps_per_second": 0.005
    }
  ],
  "logging_steps": 8,
  "max_steps": 67,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 8.435821078904832e+16,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}