youssefedweqd commited on
Commit
59b7e1d
·
verified ·
1 Parent(s): 131cdf8

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d25ebe37203fab0f1f9f0b4e446da035b43b9686ba03902efb952df23824c7d
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:374da347622d668269859b72eea6f2d0b4a91b341a780d5b61734db381dfaa05
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6ebb6c65daaf13a13d5ad4ba2eef62702dc6afe14c5108f3872ae914e6a5136
3
  size 323292202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87be9e7a373870535fef2fa6152f10c18656b3e076faafdcd2eb3a7235ee483
3
  size 323292202
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a5666ca01619bd62acfd9039576fd98e2c6aac3986385a325913fa57b46962f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f8f252de09a4793e5f13e772318938c884d18658b3687d3f2f6915920086464
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.016556291390728478,
6
  "eval_steps": 100,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -30,6 +30,28 @@
30
  "eval_samples_per_second": 3.185,
31
  "eval_steps_per_second": 3.185,
32
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
34
  ],
35
  "logging_steps": 50,
@@ -49,7 +71,7 @@
49
  "attributes": {}
50
  }
51
  },
52
- "total_flos": 799936871399424.0,
53
  "train_batch_size": 1,
54
  "trial_name": null,
55
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.033112582781456956,
6
  "eval_steps": 100,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
30
  "eval_samples_per_second": 3.185,
31
  "eval_steps_per_second": 3.185,
32
  "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.024834437086092714,
36
+ "grad_norm": 2.393519878387451,
37
+ "learning_rate": 8.222958057395145e-06,
38
+ "loss": 1.1353,
39
+ "step": 150
40
+ },
41
+ {
42
+ "epoch": 0.033112582781456956,
43
+ "grad_norm": 2.5604305267333984,
44
+ "learning_rate": 1.0982339955849891e-05,
45
+ "loss": 1.0539,
46
+ "step": 200
47
+ },
48
+ {
49
+ "epoch": 0.033112582781456956,
50
+ "eval_loss": 1.0201358795166016,
51
+ "eval_runtime": 1896.3026,
52
+ "eval_samples_per_second": 3.185,
53
+ "eval_steps_per_second": 3.185,
54
+ "step": 200
55
  }
56
  ],
57
  "logging_steps": 50,
 
71
  "attributes": {}
72
  }
73
  },
74
+ "total_flos": 1563083893899264.0,
75
  "train_batch_size": 1,
76
  "trial_name": null,
77
  "trial_params": null