guyhadad01 commited on
Commit
94dfde9
·
verified ·
1 Parent(s): b55a1cf

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cbeeecf5f33a8fe98f465a7ebe436fa13049f32a089a744575f64c982ba6f45
3
  size 471641972
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86c0b967524dfce05ac02dfb34086d01c2212c208daec09e8e093ba97faff356
3
  size 471641972
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6c6070216d6484067400562a6849b4c72f15f3f6816280335b442d080a2a835
3
  size 943408715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8139a2ef975cd69d424b05a8163888a752474327f41ca0821c02596cc4d9b0c
3
  size 943408715
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54f743afad260d48a721280fb087e2db9d559da17786d4f25babaf33caec1409
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e41409469bcb86e30ac3d1d2dddf57cacc620d120f60f2c25eb795e483356deb
3
  size 1383
last-checkpoint/trainer_state.json CHANGED
@@ -11,79 +11,79 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.00010152026598309688,
14
- "grad_norm": 22.092256546020508,
15
  "learning_rate": 0.0,
16
- "loss": 13.7932,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.010152026598309688,
21
- "grad_norm": 32.477996826171875,
22
  "learning_rate": 2.008113590263692e-05,
23
- "loss": 10.8313,
24
  "step": 100
25
  },
26
  {
27
  "epoch": 0.020304053196619376,
28
- "grad_norm": 23.895782470703125,
29
  "learning_rate": 4.036511156186613e-05,
30
- "loss": 7.3668,
31
  "step": 200
32
  },
33
  {
34
  "epoch": 0.030456079794929064,
35
- "grad_norm": 20.44896697998047,
36
  "learning_rate": 6.064908722109534e-05,
37
- "loss": 5.9214,
38
  "step": 300
39
  },
40
  {
41
  "epoch": 0.04060810639323875,
42
- "grad_norm": 12.489410400390625,
43
  "learning_rate": 8.093306288032456e-05,
44
- "loss": 5.0598,
45
  "step": 400
46
  },
47
  {
48
  "epoch": 0.05076013299154844,
49
- "grad_norm": 11.84785270690918,
50
  "learning_rate": 9.9935883735841e-05,
51
- "loss": 4.5018,
52
  "step": 500
53
  },
54
  {
55
  "epoch": 0.06091215958985813,
56
- "grad_norm": 10.175586700439453,
57
  "learning_rate": 9.886727933319086e-05,
58
- "loss": 4.1055,
59
  "step": 600
60
  },
61
  {
62
  "epoch": 0.07106418618816782,
63
- "grad_norm": 8.461668014526367,
64
  "learning_rate": 9.779867493054072e-05,
65
- "loss": 3.813,
66
  "step": 700
67
  },
68
  {
69
  "epoch": 0.0812162127864775,
70
- "grad_norm": 8.445116996765137,
71
  "learning_rate": 9.673007052789058e-05,
72
- "loss": 3.6419,
73
  "step": 800
74
  },
75
  {
76
  "epoch": 0.09136823938478719,
77
- "grad_norm": 8.415114402770996,
78
  "learning_rate": 9.566146612524043e-05,
79
- "loss": 3.4315,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 0.10152026598309688,
84
- "grad_norm": 8.78720760345459,
85
  "learning_rate": 9.45928617225903e-05,
86
- "loss": 3.3252,
87
  "step": 1000
88
  }
89
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.00010152026598309688,
14
+ "grad_norm": 20.906557083129883,
15
  "learning_rate": 0.0,
16
+ "loss": 13.5217,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.010152026598309688,
21
+ "grad_norm": 33.71811294555664,
22
  "learning_rate": 2.008113590263692e-05,
23
+ "loss": 10.8681,
24
  "step": 100
25
  },
26
  {
27
  "epoch": 0.020304053196619376,
28
+ "grad_norm": 27.14719581604004,
29
  "learning_rate": 4.036511156186613e-05,
30
+ "loss": 7.3949,
31
  "step": 200
32
  },
33
  {
34
  "epoch": 0.030456079794929064,
35
+ "grad_norm": 22.181039810180664,
36
  "learning_rate": 6.064908722109534e-05,
37
+ "loss": 5.9034,
38
  "step": 300
39
  },
40
  {
41
  "epoch": 0.04060810639323875,
42
+ "grad_norm": 14.685601234436035,
43
  "learning_rate": 8.093306288032456e-05,
44
+ "loss": 5.0405,
45
  "step": 400
46
  },
47
  {
48
  "epoch": 0.05076013299154844,
49
+ "grad_norm": 12.026344299316406,
50
  "learning_rate": 9.9935883735841e-05,
51
+ "loss": 4.4838,
52
  "step": 500
53
  },
54
  {
55
  "epoch": 0.06091215958985813,
56
+ "grad_norm": 14.332210540771484,
57
  "learning_rate": 9.886727933319086e-05,
58
+ "loss": 4.0977,
59
  "step": 600
60
  },
61
  {
62
  "epoch": 0.07106418618816782,
63
+ "grad_norm": 10.582870483398438,
64
  "learning_rate": 9.779867493054072e-05,
65
+ "loss": 3.8085,
66
  "step": 700
67
  },
68
  {
69
  "epoch": 0.0812162127864775,
70
+ "grad_norm": 8.66326904296875,
71
  "learning_rate": 9.673007052789058e-05,
72
+ "loss": 3.6189,
73
  "step": 800
74
  },
75
  {
76
  "epoch": 0.09136823938478719,
77
+ "grad_norm": 8.398235321044922,
78
  "learning_rate": 9.566146612524043e-05,
79
+ "loss": 3.4115,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 0.10152026598309688,
84
+ "grad_norm": 8.620134353637695,
85
  "learning_rate": 9.45928617225903e-05,
86
+ "loss": 3.3107,
87
  "step": 1000
88
  }
89
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f6d2140fda77ba220cd4e909f52c6a71604647d304e3a03f975d883e07d792e
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2f5d55b83ef38d668a8df30659376d55e51ae6c9a754af891a613a6e26ed40b
3
  size 5905