File size: 1,994 Bytes
d9fc591
 
 
90d59b0
d9fc591
90d59b0
d9fc591
 
 
 
 
90d59b0
 
d9fc591
90d59b0
 
d9fc591
 
90d59b0
 
d9fc591
90d59b0
 
d9fc591
 
90d59b0
 
c0e6a1a
90d59b0
 
d9fc591
 
90d59b0
 
c0e6a1a
90d59b0
 
d9fc591
 
90d59b0
 
c0e6a1a
90d59b0
 
d9fc591
 
90d59b0
 
c0e6a1a
90d59b0
 
d9fc591
 
90d59b0
 
 
 
 
 
 
d9fc591
 
 
90d59b0
d9fc591
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90d59b0
d9fc591
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.5555555555555554,
  "eval_steps": 500,
  "global_step": 12,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 1.2005751132965088,
      "learning_rate": 0.0001666666666666667,
      "loss": 9.4962,
      "step": 2
    },
    {
      "epoch": 1.1851851851851851,
      "grad_norm": 1.47324538230896,
      "learning_rate": 0.00013333333333333334,
      "loss": 9.1456,
      "step": 4
    },
    {
      "epoch": 1.7777777777777777,
      "grad_norm": 2.0616180896759033,
      "learning_rate": 0.0001,
      "loss": 8.731,
      "step": 6
    },
    {
      "epoch": 2.3703703703703702,
      "grad_norm": 2.349423885345459,
      "learning_rate": 6.666666666666667e-05,
      "loss": 8.2574,
      "step": 8
    },
    {
      "epoch": 2.962962962962963,
      "grad_norm": 1.723225474357605,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 7.9055,
      "step": 10
    },
    {
      "epoch": 3.5555555555555554,
      "grad_norm": 1.465832233428955,
      "learning_rate": 0.0,
      "loss": 7.7976,
      "step": 12
    },
    {
      "epoch": 3.5555555555555554,
      "step": 12,
      "total_flos": 69844190099040.0,
      "train_loss": 8.555556774139404,
      "train_runtime": 79.8982,
      "train_samples_per_second": 2.703,
      "train_steps_per_second": 0.15
    }
  ],
  "logging_steps": 2,
  "max_steps": 12,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 69844190099040.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}