File size: 2,516 Bytes
b4b5923
 
 
a34d7ae
 
 
b4b5923
 
 
 
 
a34d7ae
 
3ee2afb
a34d7ae
b4b5923
 
 
a34d7ae
 
 
 
b4b5923
 
 
a34d7ae
 
 
 
 
 
553c12f
 
a34d7ae
 
 
 
553c12f
 
 
a34d7ae
 
 
 
 
553c12f
 
 
a34d7ae
 
 
 
 
3ee2afb
 
a34d7ae
 
 
 
 
 
3ee2afb
 
a34d7ae
 
3ee2afb
a34d7ae
 
3ee2afb
 
a34d7ae
 
 
 
 
 
3ee2afb
 
a34d7ae
 
 
 
 
 
 
b4b5923
 
 
a34d7ae
b4b5923
a34d7ae
b4b5923
 
 
 
 
 
 
 
 
 
 
 
 
a34d7ae
b4b5923
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.6,
  "eval_steps": 5,
  "global_step": 20,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.08,
      "grad_norm": 0.7405178384640212,
      "learning_rate": 0.0001,
      "loss": 1.0732,
      "step": 1
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.8403334341079279,
      "learning_rate": 0.00018660254037844388,
      "loss": 0.865,
      "step": 5
    },
    {
      "epoch": 0.4,
      "eval_loss": 0.7255128622055054,
      "eval_runtime": 3.1356,
      "eval_samples_per_second": 6.378,
      "eval_steps_per_second": 1.595,
      "step": 5
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.46504703007968234,
      "learning_rate": 0.00011736481776669306,
      "loss": 0.6956,
      "step": 10
    },
    {
      "epoch": 0.8,
      "eval_loss": 0.630705714225769,
      "eval_runtime": 1.9072,
      "eval_samples_per_second": 10.486,
      "eval_steps_per_second": 2.622,
      "step": 10
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.41977051641880375,
      "learning_rate": 3.5721239031346066e-05,
      "loss": 0.5421,
      "step": 15
    },
    {
      "epoch": 1.2,
      "eval_loss": 0.6072101593017578,
      "eval_runtime": 1.9054,
      "eval_samples_per_second": 10.497,
      "eval_steps_per_second": 2.624,
      "step": 15
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.42132802092200616,
      "learning_rate": 0.0,
      "loss": 0.5015,
      "step": 20
    },
    {
      "epoch": 1.6,
      "eval_loss": 0.6025974750518799,
      "eval_runtime": 1.8831,
      "eval_samples_per_second": 10.621,
      "eval_steps_per_second": 2.655,
      "step": 20
    },
    {
      "epoch": 1.6,
      "step": 20,
      "total_flos": 1368128028672.0,
      "train_loss": 0.66145840883255,
      "train_runtime": 82.9739,
      "train_samples_per_second": 1.928,
      "train_steps_per_second": 0.241
    }
  ],
  "logging_steps": 5,
  "max_steps": 20,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1368128028672.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}