File size: 2,327 Bytes
5975792
 
 
 
 
 
 
 
 
 
 
 
 
e334eae
 
 
 
5975792
 
 
 
e334eae
5975792
e334eae
5975792
 
 
 
e334eae
5975792
e334eae
5975792
 
 
 
e334eae
5975792
e334eae
5975792
 
 
 
e334eae
 
 
 
5975792
 
 
 
e334eae
5975792
e334eae
5975792
 
 
 
e334eae
5975792
e334eae
5975792
 
 
 
e334eae
5975792
e334eae
5975792
 
 
 
e334eae
 
 
 
5975792
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.07430340557275542,
  "eval_steps": 3,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0,
      "eval_loss": 0.9052417278289795,
      "eval_runtime": 8.4448,
      "eval_samples_per_second": 4.026,
      "eval_steps_per_second": 2.013,
      "step": 0
    },
    {
      "epoch": 0.01238390092879257,
      "grad_norm": 0.8082026243209839,
      "learning_rate": 0.0,
      "loss": 1.1985,
      "step": 1
    },
    {
      "epoch": 0.02476780185758514,
      "grad_norm": 0.50789475440979,
      "learning_rate": 2e-05,
      "loss": 0.6888,
      "step": 2
    },
    {
      "epoch": 0.03715170278637771,
      "grad_norm": 0.8321412205696106,
      "learning_rate": 4e-05,
      "loss": 1.0164,
      "step": 3
    },
    {
      "epoch": 0.03715170278637771,
      "eval_loss": 0.9056991338729858,
      "eval_runtime": 7.8859,
      "eval_samples_per_second": 4.312,
      "eval_steps_per_second": 2.156,
      "step": 3
    },
    {
      "epoch": 0.04953560371517028,
      "grad_norm": 0.4088127613067627,
      "learning_rate": 6e-05,
      "loss": 0.7558,
      "step": 4
    },
    {
      "epoch": 0.06191950464396285,
      "grad_norm": 0.7107352614402771,
      "learning_rate": 8e-05,
      "loss": 1.2576,
      "step": 5
    },
    {
      "epoch": 0.07430340557275542,
      "grad_norm": 0.5187698006629944,
      "learning_rate": 0.0001,
      "loss": 1.0848,
      "step": 6
    },
    {
      "epoch": 0.07430340557275542,
      "eval_loss": 0.9046434164047241,
      "eval_runtime": 7.844,
      "eval_samples_per_second": 4.335,
      "eval_steps_per_second": 2.167,
      "step": 6
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 144327309262848.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}