Add files using upload-large-folder tool
Browse files- model.safetensors +1 -1
- quant_log.csv +144 -144
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 845033800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01f9ceaebdb06d5deee40a8d4db27fe595dff5875a126846cde3a7317ffeb05e
|
| 3 |
size 845033800
|
quant_log.csv
CHANGED
|
@@ -1,145 +1,145 @@
|
|
| 1 |
layer,module,loss,samples,damp,time
|
| 2 |
-
0,self_attn.k_proj,0.00399020,0.01000,1.
|
| 3 |
-
0,self_attn.v_proj,0.00070882,0.01000,0.
|
| 4 |
-
0,self_attn.q_proj,0.00407133,0.01000,0.
|
| 5 |
-
0,self_attn.out_proj,0.
|
| 6 |
-
0,fc1,0.
|
| 7 |
-
0,fc2,0.
|
| 8 |
-
1,self_attn.k_proj,0.
|
| 9 |
-
1,self_attn.v_proj,0.
|
| 10 |
-
1,self_attn.q_proj,0.
|
| 11 |
-
1,self_attn.out_proj,0.
|
| 12 |
-
1,fc1,0.
|
| 13 |
-
1,fc2,0.
|
| 14 |
-
2,self_attn.k_proj,0.
|
| 15 |
-
2,self_attn.v_proj,0.
|
| 16 |
-
2,self_attn.q_proj,0.
|
| 17 |
-
2,self_attn.out_proj,0.
|
| 18 |
-
2,fc1,0.
|
| 19 |
-
2,fc2,0.
|
| 20 |
-
3,self_attn.k_proj,0.
|
| 21 |
-
3,self_attn.v_proj,0.
|
| 22 |
-
3,self_attn.q_proj,0.
|
| 23 |
-
3,self_attn.out_proj,0.
|
| 24 |
-
3,fc1,0.
|
| 25 |
-
3,fc2,0.
|
| 26 |
-
4,self_attn.k_proj,0.
|
| 27 |
-
4,self_attn.v_proj,0.
|
| 28 |
-
4,self_attn.q_proj,0.
|
| 29 |
-
4,self_attn.out_proj,0.
|
| 30 |
-
4,fc1,0.
|
| 31 |
-
4,fc2,0.
|
| 32 |
-
5,self_attn.k_proj,0.
|
| 33 |
-
5,self_attn.v_proj,0.
|
| 34 |
-
5,self_attn.q_proj,0.
|
| 35 |
-
5,self_attn.out_proj,0.
|
| 36 |
-
5,fc1,0.
|
| 37 |
-
5,fc2,0.
|
| 38 |
-
6,self_attn.k_proj,0.
|
| 39 |
-
6,self_attn.v_proj,0.
|
| 40 |
-
6,self_attn.q_proj,0.
|
| 41 |
-
6,self_attn.out_proj,0.
|
| 42 |
-
6,fc1,0.
|
| 43 |
-
6,fc2,0.
|
| 44 |
-
7,self_attn.k_proj,0.
|
| 45 |
-
7,self_attn.v_proj,0.
|
| 46 |
-
7,self_attn.q_proj,0.
|
| 47 |
-
7,self_attn.out_proj,0.
|
| 48 |
-
7,fc1,0.
|
| 49 |
-
7,fc2,0.
|
| 50 |
-
8,self_attn.k_proj,0.
|
| 51 |
-
8,self_attn.v_proj,0.
|
| 52 |
-
8,self_attn.q_proj,0.
|
| 53 |
-
8,self_attn.out_proj,0.
|
| 54 |
-
8,fc1,0.
|
| 55 |
-
8,fc2,0.
|
| 56 |
-
9,self_attn.k_proj,0.
|
| 57 |
-
9,self_attn.v_proj,0.
|
| 58 |
-
9,self_attn.q_proj,0.
|
| 59 |
-
9,self_attn.out_proj,0.
|
| 60 |
-
9,fc1,0.
|
| 61 |
-
9,fc2,0.
|
| 62 |
-
10,self_attn.k_proj,0.
|
| 63 |
-
10,self_attn.v_proj,0.
|
| 64 |
-
10,self_attn.q_proj,0.
|
| 65 |
-
10,self_attn.out_proj,0.
|
| 66 |
-
10,fc1,0.
|
| 67 |
-
10,fc2,0.
|
| 68 |
-
11,self_attn.k_proj,0.
|
| 69 |
-
11,self_attn.v_proj,0.
|
| 70 |
-
11,self_attn.q_proj,0.
|
| 71 |
-
11,self_attn.out_proj,0.
|
| 72 |
-
11,fc1,0.
|
| 73 |
-
11,fc2,0.
|
| 74 |
-
12,self_attn.k_proj,0.
|
| 75 |
-
12,self_attn.v_proj,0.
|
| 76 |
-
12,self_attn.q_proj,0.
|
| 77 |
-
12,self_attn.out_proj,0.
|
| 78 |
-
12,fc1,0.
|
| 79 |
-
12,fc2,0.
|
| 80 |
-
13,self_attn.k_proj,0.
|
| 81 |
-
13,self_attn.v_proj,0.
|
| 82 |
-
13,self_attn.q_proj,0.
|
| 83 |
-
13,self_attn.out_proj,0.
|
| 84 |
-
13,fc1,0.
|
| 85 |
-
13,fc2,0.
|
| 86 |
-
14,self_attn.k_proj,0.
|
| 87 |
-
14,self_attn.v_proj,0.
|
| 88 |
-
14,self_attn.q_proj,0.
|
| 89 |
-
14,self_attn.out_proj,0.
|
| 90 |
-
14,fc1,0.
|
| 91 |
-
14,fc2,0.
|
| 92 |
-
15,self_attn.k_proj,0.
|
| 93 |
-
15,self_attn.v_proj,0.
|
| 94 |
-
15,self_attn.q_proj,0.
|
| 95 |
-
15,self_attn.out_proj,0.
|
| 96 |
-
15,fc1,0.
|
| 97 |
-
15,fc2,0.
|
| 98 |
-
16,self_attn.k_proj,0.
|
| 99 |
-
16,self_attn.v_proj,0.
|
| 100 |
-
16,self_attn.q_proj,0.
|
| 101 |
-
16,self_attn.out_proj,0.
|
| 102 |
-
16,fc1,0.
|
| 103 |
-
16,fc2,0.
|
| 104 |
-
17,self_attn.k_proj,0.
|
| 105 |
-
17,self_attn.v_proj,0.
|
| 106 |
-
17,self_attn.q_proj,0.
|
| 107 |
-
17,self_attn.out_proj,0.
|
| 108 |
-
17,fc1,0.
|
| 109 |
-
17,fc2,0.
|
| 110 |
-
18,self_attn.k_proj,0.
|
| 111 |
-
18,self_attn.v_proj,0.
|
| 112 |
-
18,self_attn.q_proj,0.
|
| 113 |
-
18,self_attn.out_proj,0.
|
| 114 |
-
18,fc1,0.
|
| 115 |
-
18,fc2,0.
|
| 116 |
-
19,self_attn.k_proj,0.
|
| 117 |
-
19,self_attn.v_proj,0.
|
| 118 |
-
19,self_attn.q_proj,0.
|
| 119 |
-
19,self_attn.out_proj,0.
|
| 120 |
-
19,fc1,0.
|
| 121 |
-
19,fc2,0.
|
| 122 |
-
20,self_attn.k_proj,0.
|
| 123 |
-
20,self_attn.v_proj,0.
|
| 124 |
-
20,self_attn.q_proj,0.
|
| 125 |
-
20,self_attn.out_proj,0.
|
| 126 |
-
20,fc1,0.
|
| 127 |
-
20,fc2,0.
|
| 128 |
-
21,self_attn.k_proj,0.
|
| 129 |
-
21,self_attn.v_proj,0.
|
| 130 |
-
21,self_attn.q_proj,0.
|
| 131 |
-
21,self_attn.out_proj,0.
|
| 132 |
-
21,fc1,0.
|
| 133 |
-
21,fc2,0.
|
| 134 |
-
22,self_attn.k_proj,0.
|
| 135 |
-
22,self_attn.v_proj,0.
|
| 136 |
-
22,self_attn.q_proj,0.
|
| 137 |
-
22,self_attn.out_proj,0.
|
| 138 |
-
22,fc1,0.
|
| 139 |
-
22,fc2,0.
|
| 140 |
-
23,self_attn.k_proj,0.
|
| 141 |
-
23,self_attn.v_proj,0.
|
| 142 |
-
23,self_attn.q_proj,0.
|
| 143 |
-
23,self_attn.out_proj,0.
|
| 144 |
-
23,fc1,0.
|
| 145 |
-
23,fc2,0.
|
|
|
|
| 1 |
layer,module,loss,samples,damp,time
|
| 2 |
+
0,self_attn.k_proj,0.00399020,0.01000,1.769
|
| 3 |
+
0,self_attn.v_proj,0.00070882,0.01000,0.943
|
| 4 |
+
0,self_attn.q_proj,0.00407133,0.01000,0.973
|
| 5 |
+
0,self_attn.out_proj,0.00002881,0.01000,0.954
|
| 6 |
+
0,fc1,0.00841130,0.01000,0.949
|
| 7 |
+
0,fc2,0.00023788,0.01000,4.238
|
| 8 |
+
1,self_attn.k_proj,0.00265268,0.01000,0.977
|
| 9 |
+
1,self_attn.v_proj,0.00033390,0.01000,0.980
|
| 10 |
+
1,self_attn.q_proj,0.00286817,0.01000,0.962
|
| 11 |
+
1,self_attn.out_proj,0.00000700,0.01000,0.976
|
| 12 |
+
1,fc1,0.00870118,0.01000,0.987
|
| 13 |
+
1,fc2,0.00015716,0.01000,4.314
|
| 14 |
+
2,self_attn.k_proj,0.00358274,0.01000,0.969
|
| 15 |
+
2,self_attn.v_proj,0.00055633,0.01000,0.966
|
| 16 |
+
2,self_attn.q_proj,0.00357723,0.01000,0.967
|
| 17 |
+
2,self_attn.out_proj,0.00000220,0.01000,0.976
|
| 18 |
+
2,fc1,0.00947768,0.01000,0.976
|
| 19 |
+
2,fc2,0.00006839,0.01000,4.106
|
| 20 |
+
3,self_attn.k_proj,0.00388418,0.01000,0.970
|
| 21 |
+
3,self_attn.v_proj,0.00075879,0.01000,0.992
|
| 22 |
+
3,self_attn.q_proj,0.00437635,0.01000,0.970
|
| 23 |
+
3,self_attn.out_proj,0.00000383,0.01000,1.023
|
| 24 |
+
3,fc1,0.00933810,0.01000,0.972
|
| 25 |
+
3,fc2,0.00007643,0.01000,4.081
|
| 26 |
+
4,self_attn.k_proj,0.00632844,0.01000,0.963
|
| 27 |
+
4,self_attn.v_proj,0.00101882,0.01000,0.968
|
| 28 |
+
4,self_attn.q_proj,0.00671913,0.01000,0.978
|
| 29 |
+
4,self_attn.out_proj,0.00000953,0.01000,0.974
|
| 30 |
+
4,fc1,0.01132140,0.01000,1.014
|
| 31 |
+
4,fc2,0.00009407,0.01000,4.120
|
| 32 |
+
5,self_attn.k_proj,0.00836137,0.01000,0.970
|
| 33 |
+
5,self_attn.v_proj,0.00141843,0.01000,0.966
|
| 34 |
+
5,self_attn.q_proj,0.00862948,0.01000,0.977
|
| 35 |
+
5,self_attn.out_proj,0.00001295,0.01000,0.976
|
| 36 |
+
5,fc1,0.01352680,0.01000,1.009
|
| 37 |
+
5,fc2,0.00010500,0.01000,4.146
|
| 38 |
+
6,self_attn.k_proj,0.01344048,0.01000,0.964
|
| 39 |
+
6,self_attn.v_proj,0.00186100,0.01000,0.983
|
| 40 |
+
6,self_attn.q_proj,0.01191495,0.01000,0.974
|
| 41 |
+
6,self_attn.out_proj,0.00002467,0.01000,0.959
|
| 42 |
+
6,fc1,0.01742047,0.01000,0.953
|
| 43 |
+
6,fc2,0.00013119,0.01000,4.097
|
| 44 |
+
7,self_attn.k_proj,0.01503923,0.01000,0.980
|
| 45 |
+
7,self_attn.v_proj,0.00229473,0.01000,0.966
|
| 46 |
+
7,self_attn.q_proj,0.01326014,0.01000,0.976
|
| 47 |
+
7,self_attn.out_proj,0.00003410,0.01000,0.946
|
| 48 |
+
7,fc1,0.02252554,0.01000,0.981
|
| 49 |
+
7,fc2,0.00018367,0.01000,4.157
|
| 50 |
+
8,self_attn.k_proj,0.01784026,0.01000,0.954
|
| 51 |
+
8,self_attn.v_proj,0.00280950,0.01000,0.951
|
| 52 |
+
8,self_attn.q_proj,0.01452082,0.01000,0.971
|
| 53 |
+
8,self_attn.out_proj,0.00004744,0.01000,1.036
|
| 54 |
+
8,fc1,0.02815095,0.01000,0.972
|
| 55 |
+
8,fc2,0.00025076,0.01000,4.100
|
| 56 |
+
9,self_attn.k_proj,0.01993856,0.01000,0.971
|
| 57 |
+
9,self_attn.v_proj,0.00336911,0.01000,0.985
|
| 58 |
+
9,self_attn.q_proj,0.01663456,0.01000,0.974
|
| 59 |
+
9,self_attn.out_proj,0.00007329,0.01000,0.988
|
| 60 |
+
9,fc1,0.03346713,0.01000,0.991
|
| 61 |
+
9,fc2,0.00033390,0.01000,4.126
|
| 62 |
+
10,self_attn.k_proj,0.02098462,0.01000,0.952
|
| 63 |
+
10,self_attn.v_proj,0.00442903,0.01000,0.972
|
| 64 |
+
10,self_attn.q_proj,0.01737896,0.01000,0.970
|
| 65 |
+
10,self_attn.out_proj,0.00009355,0.01000,0.956
|
| 66 |
+
10,fc1,0.03937871,0.01000,0.969
|
| 67 |
+
10,fc2,0.00054157,0.01000,4.074
|
| 68 |
+
11,self_attn.k_proj,0.02233104,0.01000,0.990
|
| 69 |
+
11,self_attn.v_proj,0.00575745,0.01000,0.969
|
| 70 |
+
11,self_attn.q_proj,0.01841100,0.01000,0.988
|
| 71 |
+
11,self_attn.out_proj,0.00012954,0.01000,0.975
|
| 72 |
+
11,fc1,0.04563497,0.01000,0.976
|
| 73 |
+
11,fc2,0.00075835,0.01000,4.148
|
| 74 |
+
12,self_attn.k_proj,0.02574713,0.01000,0.971
|
| 75 |
+
12,self_attn.v_proj,0.00661912,0.01000,0.965
|
| 76 |
+
12,self_attn.q_proj,0.02014845,0.01000,0.957
|
| 77 |
+
12,self_attn.out_proj,0.00022067,0.01000,0.963
|
| 78 |
+
12,fc1,0.04933250,0.01000,0.991
|
| 79 |
+
12,fc2,0.00101735,0.01000,4.085
|
| 80 |
+
13,self_attn.k_proj,0.02849308,0.01000,0.977
|
| 81 |
+
13,self_attn.v_proj,0.00755646,0.01000,0.960
|
| 82 |
+
13,self_attn.q_proj,0.02111352,0.01000,0.955
|
| 83 |
+
13,self_attn.out_proj,0.00024542,0.01000,0.983
|
| 84 |
+
13,fc1,0.05511705,0.01000,0.962
|
| 85 |
+
13,fc2,0.00135350,0.01000,4.206
|
| 86 |
+
14,self_attn.k_proj,0.02709149,0.01000,0.963
|
| 87 |
+
14,self_attn.v_proj,0.00964833,0.01000,0.956
|
| 88 |
+
14,self_attn.q_proj,0.02070987,0.01000,0.972
|
| 89 |
+
14,self_attn.out_proj,0.00027742,0.01000,0.963
|
| 90 |
+
14,fc1,0.06202892,0.01000,0.974
|
| 91 |
+
14,fc2,0.00191707,0.01000,4.080
|
| 92 |
+
15,self_attn.k_proj,0.02484710,0.01000,0.951
|
| 93 |
+
15,self_attn.v_proj,0.01247784,0.01000,0.952
|
| 94 |
+
15,self_attn.q_proj,0.02116217,0.01000,0.983
|
| 95 |
+
15,self_attn.out_proj,0.00025006,0.01000,0.970
|
| 96 |
+
15,fc1,0.06626023,0.01000,1.001
|
| 97 |
+
15,fc2,0.00237964,0.01000,4.261
|
| 98 |
+
16,self_attn.k_proj,0.02497149,0.01000,0.968
|
| 99 |
+
16,self_attn.v_proj,0.01356158,0.01000,0.954
|
| 100 |
+
16,self_attn.q_proj,0.01884268,0.01000,0.968
|
| 101 |
+
16,self_attn.out_proj,0.00038158,0.01000,0.967
|
| 102 |
+
16,fc1,0.07270662,0.01000,0.957
|
| 103 |
+
16,fc2,0.00306718,0.01000,4.124
|
| 104 |
+
17,self_attn.k_proj,0.02425222,0.01000,0.985
|
| 105 |
+
17,self_attn.v_proj,0.01491047,0.01000,0.990
|
| 106 |
+
17,self_attn.q_proj,0.01910833,0.01000,0.981
|
| 107 |
+
17,self_attn.out_proj,0.00044458,0.01000,0.949
|
| 108 |
+
17,fc1,0.08014514,0.01000,0.976
|
| 109 |
+
17,fc2,0.00390363,0.01000,4.102
|
| 110 |
+
18,self_attn.k_proj,0.02210768,0.01000,0.986
|
| 111 |
+
18,self_attn.v_proj,0.01711883,0.01000,1.004
|
| 112 |
+
18,self_attn.q_proj,0.01853545,0.01000,0.982
|
| 113 |
+
18,self_attn.out_proj,0.00051715,0.01000,0.989
|
| 114 |
+
18,fc1,0.08440722,0.01000,0.989
|
| 115 |
+
18,fc2,0.00473098,0.01000,4.083
|
| 116 |
+
19,self_attn.k_proj,0.02249363,0.01000,0.967
|
| 117 |
+
19,self_attn.v_proj,0.02193001,0.01000,0.960
|
| 118 |
+
19,self_attn.q_proj,0.01848070,0.01000,0.968
|
| 119 |
+
19,self_attn.out_proj,0.00083592,0.01000,1.005
|
| 120 |
+
19,fc1,0.09134186,0.01000,1.013
|
| 121 |
+
19,fc2,0.00585078,0.01000,4.103
|
| 122 |
+
20,self_attn.k_proj,0.02311379,0.01000,0.952
|
| 123 |
+
20,self_attn.v_proj,0.02502821,0.01000,0.955
|
| 124 |
+
20,self_attn.q_proj,0.01801062,0.01000,0.961
|
| 125 |
+
20,self_attn.out_proj,0.00098910,0.01000,0.968
|
| 126 |
+
20,fc1,0.09524175,0.01000,0.993
|
| 127 |
+
20,fc2,0.00739040,0.01000,4.123
|
| 128 |
+
21,self_attn.k_proj,0.02165480,0.01000,0.994
|
| 129 |
+
21,self_attn.v_proj,0.02562368,0.01000,0.961
|
| 130 |
+
21,self_attn.q_proj,0.01968897,0.01000,0.964
|
| 131 |
+
21,self_attn.out_proj,0.00083486,0.01000,0.955
|
| 132 |
+
21,fc1,0.09495520,0.01000,0.957
|
| 133 |
+
21,fc2,0.00845354,0.01000,4.166
|
| 134 |
+
22,self_attn.k_proj,0.02163469,0.01000,0.990
|
| 135 |
+
22,self_attn.v_proj,0.02802912,0.01000,1.023
|
| 136 |
+
22,self_attn.q_proj,0.02326590,0.01000,0.975
|
| 137 |
+
22,self_attn.out_proj,0.00088141,0.01000,0.967
|
| 138 |
+
22,fc1,0.09237307,0.01000,0.976
|
| 139 |
+
22,fc2,0.00874107,0.01000,4.192
|
| 140 |
+
23,self_attn.k_proj,0.02720753,0.01000,0.974
|
| 141 |
+
23,self_attn.v_proj,0.01896040,0.01000,0.957
|
| 142 |
+
23,self_attn.q_proj,0.04915815,0.01000,0.973
|
| 143 |
+
23,self_attn.out_proj,0.00157263,0.01000,0.961
|
| 144 |
+
23,fc1,0.08544412,0.01000,0.981
|
| 145 |
+
23,fc2,0.00670062,0.01000,4.107
|