File size: 2,213 Bytes
8eaf820
 
 
 
 
 
 
 
 
 
 
 
 
fd2a6cb
8eaf820
fd2a6cb
8eaf820
 
 
 
fd2a6cb
8eaf820
fd2a6cb
8eaf820
 
 
 
fd2a6cb
8eaf820
fd2a6cb
8eaf820
 
 
 
fd2a6cb
8eaf820
fd2a6cb
8eaf820
 
 
 
fd2a6cb
8eaf820
fd2a6cb
8eaf820
 
 
 
fd2a6cb
8eaf820
fd2a6cb
8eaf820
 
 
 
fd2a6cb
8eaf820
fd2a6cb
8eaf820
 
 
 
 
fd2a6cb
 
 
 
 
8eaf820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd2a6cb
8eaf820
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 78,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.2564102564102564,
      "grad_norm": 5.721946298566637,
      "learning_rate": 9.994965332706574e-06,
      "loss": 5.2582,
      "step": 10
    },
    {
      "epoch": 0.5128205128205128,
      "grad_norm": 2.8109843358784836,
      "learning_rate": 9.40297765928369e-06,
      "loss": 0.0252,
      "step": 20
    },
    {
      "epoch": 0.7692307692307693,
      "grad_norm": 0.23206407267268572,
      "learning_rate": 7.938926261462366e-06,
      "loss": 0.0201,
      "step": 30
    },
    {
      "epoch": 1.0256410256410255,
      "grad_norm": 0.45846925644305964,
      "learning_rate": 5.892784473993184e-06,
      "loss": 0.0238,
      "step": 40
    },
    {
      "epoch": 1.282051282051282,
      "grad_norm": 0.04034621588048863,
      "learning_rate": 3.669815772166625e-06,
      "loss": 0.0046,
      "step": 50
    },
    {
      "epoch": 1.5384615384615383,
      "grad_norm": 0.005579535835401499,
      "learning_rate": 1.7103063703014372e-06,
      "loss": 0.002,
      "step": 60
    },
    {
      "epoch": 1.7948717948717947,
      "grad_norm": 0.027412739220486586,
      "learning_rate": 4.0236113724274716e-07,
      "loss": 0.0027,
      "step": 70
    },
    {
      "epoch": 2.0,
      "step": 78,
      "total_flos": 8891638874112.0,
      "train_loss": 0.6841755338828079,
      "train_runtime": 566.7608,
      "train_samples_per_second": 4.34,
      "train_steps_per_second": 0.138
    }
  ],
  "logging_steps": 10,
  "max_steps": 78,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 5000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 8891638874112.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}