Upload SIGMA_v13 checkpoints and results
Browse files- checkpoints_a3_best/best_meta.json +7 -0
- checkpoints_a3_best/best_model.pt +3 -0
- results_a3/a3_curve.png +0 -0
- results_a3/a3_final.json +153 -0
checkpoints_a3_best/best_meta.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 10,
|
| 3 |
+
"step": 3020,
|
| 4 |
+
"mrr": 0.9637941398196868,
|
| 5 |
+
"r1": 0.946,
|
| 6 |
+
"r10": 0.996
|
| 7 |
+
}
|
checkpoints_a3_best/best_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00f8ac69bda9ba7d11bb8d30563a93415933a85f958391df872244fb45f86da3
|
| 3 |
+
size 58984949
|
results_a3/a3_curve.png
ADDED
|
results_a3/a3_final.json
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"dim": 256,
|
| 4 |
+
"n_steps": 2,
|
| 5 |
+
"top_k": 16,
|
| 6 |
+
"rank": 4,
|
| 7 |
+
"n_heads": 2,
|
| 8 |
+
"warmup_ratio": 0.1,
|
| 9 |
+
"ema_decay": 0.999,
|
| 10 |
+
"batch_size": 128
|
| 11 |
+
},
|
| 12 |
+
"final_metrics": {
|
| 13 |
+
"mrr": 0.971924885660479,
|
| 14 |
+
"r1": 0.958,
|
| 15 |
+
"r5": 0.99,
|
| 16 |
+
"r10": 0.998
|
| 17 |
+
},
|
| 18 |
+
"best_mrr": 0.9637941398196868,
|
| 19 |
+
"history": {
|
| 20 |
+
"step": [
|
| 21 |
+
151,
|
| 22 |
+
302,
|
| 23 |
+
453,
|
| 24 |
+
604,
|
| 25 |
+
755,
|
| 26 |
+
906,
|
| 27 |
+
1057,
|
| 28 |
+
1208,
|
| 29 |
+
1359,
|
| 30 |
+
1510,
|
| 31 |
+
1661,
|
| 32 |
+
1812,
|
| 33 |
+
1963,
|
| 34 |
+
2114,
|
| 35 |
+
2265,
|
| 36 |
+
2416,
|
| 37 |
+
2567,
|
| 38 |
+
2718,
|
| 39 |
+
2869,
|
| 40 |
+
3020
|
| 41 |
+
],
|
| 42 |
+
"loss": [
|
| 43 |
+
4.18417600764344,
|
| 44 |
+
3.4565066201797383,
|
| 45 |
+
1.407371061921909,
|
| 46 |
+
1.094078879087966,
|
| 47 |
+
0.3985431365027333,
|
| 48 |
+
0.3693263522836546,
|
| 49 |
+
0.20021145346741012,
|
| 50 |
+
0.1884872525288964,
|
| 51 |
+
0.1103784946219021,
|
| 52 |
+
0.10422923401096798,
|
| 53 |
+
0.06833229100467353,
|
| 54 |
+
0.06716461970612703,
|
| 55 |
+
0.05489896299527181,
|
| 56 |
+
0.054119892233353575,
|
| 57 |
+
0.04856092991023664,
|
| 58 |
+
0.048116377746032564,
|
| 59 |
+
0.04458457325270634,
|
| 60 |
+
0.042907547224189664,
|
| 61 |
+
0.04156933941983229,
|
| 62 |
+
0.04068860187756502
|
| 63 |
+
],
|
| 64 |
+
"mrr": [
|
| 65 |
+
0.011174324410224933,
|
| 66 |
+
0.015715928576686647,
|
| 67 |
+
0.01904165526061832,
|
| 68 |
+
0.026162325605719954,
|
| 69 |
+
0.10163972497412811,
|
| 70 |
+
0.28441519777968016,
|
| 71 |
+
0.5058603058619553,
|
| 72 |
+
0.7110317882665718,
|
| 73 |
+
0.8148222379150314,
|
| 74 |
+
0.865726731550368,
|
| 75 |
+
0.8932233508158076,
|
| 76 |
+
0.9137322042842024,
|
| 77 |
+
0.9355255315145559,
|
| 78 |
+
0.9403312601613473,
|
| 79 |
+
0.9484149910255737,
|
| 80 |
+
0.9540925315425316,
|
| 81 |
+
0.9555926131339925,
|
| 82 |
+
0.9586867965367964,
|
| 83 |
+
0.9618191602099245,
|
| 84 |
+
0.9637941398196868
|
| 85 |
+
],
|
| 86 |
+
"r1": [
|
| 87 |
+
0.002,
|
| 88 |
+
0.004,
|
| 89 |
+
0.006,
|
| 90 |
+
0.006,
|
| 91 |
+
0.048,
|
| 92 |
+
0.178,
|
| 93 |
+
0.374,
|
| 94 |
+
0.616,
|
| 95 |
+
0.742,
|
| 96 |
+
0.812,
|
| 97 |
+
0.848,
|
| 98 |
+
0.874,
|
| 99 |
+
0.91,
|
| 100 |
+
0.914,
|
| 101 |
+
0.928,
|
| 102 |
+
0.934,
|
| 103 |
+
0.934,
|
| 104 |
+
0.938,
|
| 105 |
+
0.944,
|
| 106 |
+
0.946
|
| 107 |
+
],
|
| 108 |
+
"grad_norm": [
|
| 109 |
+
4.954794731140137,
|
| 110 |
+
6.383665900230408,
|
| 111 |
+
4.369035260677338,
|
| 112 |
+
2.579656716585159,
|
| 113 |
+
1.6275103175640107,
|
| 114 |
+
1.3361677485704422,
|
| 115 |
+
0.859700264930725,
|
| 116 |
+
0.7522292715311051,
|
| 117 |
+
0.4702634619176388,
|
| 118 |
+
0.45671847328543663,
|
| 119 |
+
0.28335013419389726,
|
| 120 |
+
0.2868455497920513,
|
| 121 |
+
0.20585744976997375,
|
| 122 |
+
0.20741430923342705,
|
| 123 |
+
0.17761142425239085,
|
| 124 |
+
0.17609935820102693,
|
| 125 |
+
0.16816730216145515,
|
| 126 |
+
0.15923061683773995,
|
| 127 |
+
0.15379879228770732,
|
| 128 |
+
0.1542398615926504
|
| 129 |
+
],
|
| 130 |
+
"lr": [
|
| 131 |
+
0.0005,
|
| 132 |
+
0.001,
|
| 133 |
+
0.000992403876506104,
|
| 134 |
+
0.0009698463103929542,
|
| 135 |
+
0.0009330127018922195,
|
| 136 |
+
0.000883022221559489,
|
| 137 |
+
0.0008213938048432696,
|
| 138 |
+
0.00075,
|
| 139 |
+
0.0006710100716628344,
|
| 140 |
+
0.0005868240888334653,
|
| 141 |
+
0.0005,
|
| 142 |
+
0.00041317591116653486,
|
| 143 |
+
0.0003289899283371657,
|
| 144 |
+
0.0002500000000000001,
|
| 145 |
+
0.0001786061951567303,
|
| 146 |
+
0.00011697777844051105,
|
| 147 |
+
6.698729810778065e-05,
|
| 148 |
+
3.0153689607045842e-05,
|
| 149 |
+
7.59612349389599e-06,
|
| 150 |
+
0.0
|
| 151 |
+
]
|
| 152 |
+
}
|
| 153 |
+
}
|