malekradwan130 commited on
Commit
b9b181d
·
verified ·
1 Parent(s): 9bcc325

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e10d1a1f03706f449e3b2bdc8ebbf99c23a139de61252d02b890bf93f15030e
3
  size 59001752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7353692ff981605c93cf22b096dbd246c5cf5bf5fc543b490cf3abeac82a7a31
3
  size 59001752
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1da90b91619dd51767af992201daf5098869400c2005e5f7e7e8a21dc66b546f
3
  size 118086731
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:149be6cff081d78c7d5b1ec35545a1530746b35e6a028f2e35055b6d9fbeb68c
3
  size 118086731
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:859b15219f18ea947c283626d03dcbca0c6586c2c7533cd392aab98f051007c1
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a3365689e6d26d46f2bfa2d42b0b54103889b4f6a6bad2f60268c5b6325623d
3
  size 14645
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f8849200ab201085ca6ed3af3cd6f832415916a84b1e68395125ecc5ecd39e8
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3656c0abce9576d0b5083188f2c7d6efcbee0134d27c94bb8b76920474ad16de
3
  size 1383
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02dff168b6030b33e9d563d15cebe08c9687f4919e4a5b24732fd416209a558e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bebaf2e9a58bbd47b2799ce9fd04cb408b13ca7015b9a1b4e2b2bcad498171ac
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.011851676271462644,
6
  "eval_steps": 500,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -36,6 +36,62 @@
36
  "learning_rate": 0.0004941336809670538,
37
  "loss": 1.0972,
38
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
  ],
41
  "logging_steps": 50,
@@ -55,7 +111,7 @@
55
  "attributes": {}
56
  }
57
  },
58
- "total_flos": 1.3711045558272e+16,
59
  "train_batch_size": 2,
60
  "trial_name": null,
61
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.035555028814387934,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
36
  "learning_rate": 0.0004941336809670538,
37
  "loss": 1.0972,
38
  "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.014814595339328307,
42
+ "grad_norm": 0.07067917287349701,
43
+ "learning_rate": 0.0004926522872718654,
44
+ "loss": 1.1012,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.017777514407193967,
49
+ "grad_norm": 0.12936587631702423,
50
+ "learning_rate": 0.0004911708935766769,
51
+ "loss": 1.0908,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.02074043347505963,
56
+ "grad_norm": 0.09118826687335968,
57
+ "learning_rate": 0.0004896894998814885,
58
+ "loss": 1.0773,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.02370335254292529,
63
+ "grad_norm": 0.08850109577178955,
64
+ "learning_rate": 0.00048820810618630005,
65
+ "loss": 1.1012,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.02666627161079095,
70
+ "grad_norm": 0.07888604700565338,
71
+ "learning_rate": 0.00048672671249111167,
72
+ "loss": 1.1343,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.029629190678656613,
77
+ "grad_norm": 0.0906878113746643,
78
+ "learning_rate": 0.00048524531879592323,
79
+ "loss": 1.1184,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.032592109746522276,
84
+ "grad_norm": 0.07720430195331573,
85
+ "learning_rate": 0.0004837639251007348,
86
+ "loss": 1.0968,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 0.035555028814387934,
91
+ "grad_norm": 0.0831717997789383,
92
+ "learning_rate": 0.00048228253140554636,
93
+ "loss": 1.0983,
94
+ "step": 600
95
  }
96
  ],
97
  "logging_steps": 50,
 
111
  "attributes": {}
112
  }
113
  },
114
+ "total_flos": 4.1133136674816e+16,
115
  "train_batch_size": 2,
116
  "trial_name": null,
117
  "trial_params": null