Delete sigma_v5_ablation
Browse files- sigma_v5_ablation/V0_Baseline_best.pt +0 -3
- sigma_v5_ablation/V0_Baseline_result.json +0 -60
- sigma_v5_ablation/V1_Pos2D_best.pt +0 -3
- sigma_v5_ablation/V1_Pos2D_result.json +0 -46
- sigma_v5_ablation/V2_Pos4D_best.pt +0 -3
- sigma_v5_ablation/V2_Pos4D_result.json +0 -46
- sigma_v5_ablation/V3_AlphaAux_best.pt +0 -3
- sigma_v5_ablation/V3_AlphaAux_result.json +0 -46
- sigma_v5_ablation/V4_DynMu_best.pt +0 -3
- sigma_v5_ablation/V4_DynMu_result.json +0 -46
- sigma_v5_ablation/V5_Norm_best.pt +0 -3
- sigma_v5_ablation/V5_Norm_result.json +0 -60
- sigma_v5_ablation/V6_GeoOut_best.pt +0 -3
- sigma_v5_ablation/V6_GeoOut_result.json +0 -53
- sigma_v5_ablation/V7_GeoLoss_best.pt +0 -3
- sigma_v5_ablation/V7_GeoLoss_result.json +0 -46
sigma_v5_ablation/V0_Baseline_best.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:64480a6d82d63d22e2a82972bc2824a115c665f5b7fac521e6ad61d2ea849fed
|
| 3 |
-
size 229075493
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V0_Baseline_result.json
DELETED
|
@@ -1,60 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"variant": "V0_Baseline",
|
| 3 |
-
"best_fr_acc": 0.22655750091411855,
|
| 4 |
-
"final_tf_acc": 0.4801695074743581,
|
| 5 |
-
"final_ppl": 3753.319173060765,
|
| 6 |
-
"mean_step_var": 0.13670697510242463,
|
| 7 |
-
"mean_alpha_grad": 0.0,
|
| 8 |
-
"n_epochs": 5,
|
| 9 |
-
"history": {
|
| 10 |
-
"epoch": [
|
| 11 |
-
1,
|
| 12 |
-
2,
|
| 13 |
-
3,
|
| 14 |
-
4,
|
| 15 |
-
5
|
| 16 |
-
],
|
| 17 |
-
"train_loss": [
|
| 18 |
-
15.60134063898071,
|
| 19 |
-
14.074771997742255,
|
| 20 |
-
24.51269090143497,
|
| 21 |
-
36.598425387693545,
|
| 22 |
-
40.809407898679254
|
| 23 |
-
],
|
| 24 |
-
"ppl": [
|
| 25 |
-
785.429961776258,
|
| 26 |
-
72.68692165847291,
|
| 27 |
-
113.43548809059638,
|
| 28 |
-
763.954343276401,
|
| 29 |
-
3753.319173060765
|
| 30 |
-
],
|
| 31 |
-
"tf_acc": [
|
| 32 |
-
0.40090594346938707,
|
| 33 |
-
0.4735112123483469,
|
| 34 |
-
0.48212935652704364,
|
| 35 |
-
0.4815842329494755,
|
| 36 |
-
0.4801695074743581
|
| 37 |
-
],
|
| 38 |
-
"fr_acc": [
|
| 39 |
-
0.12128853581059464,
|
| 40 |
-
0.10486046837149778,
|
| 41 |
-
0.21377412416015357,
|
| 42 |
-
0.2106175585035879,
|
| 43 |
-
0.22655750091411855
|
| 44 |
-
],
|
| 45 |
-
"step_var": [
|
| 46 |
-
0.13966141641139984,
|
| 47 |
-
0.13741886615753174,
|
| 48 |
-
0.13594350218772888,
|
| 49 |
-
0.13533863425254822,
|
| 50 |
-
0.13517245650291443
|
| 51 |
-
],
|
| 52 |
-
"alpha_grad": [
|
| 53 |
-
0.0,
|
| 54 |
-
0.0,
|
| 55 |
-
0.0,
|
| 56 |
-
0.0,
|
| 57 |
-
0.0
|
| 58 |
-
]
|
| 59 |
-
}
|
| 60 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V1_Pos2D_best.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b5e8aa080fc95b4a0594304d3a595f3de2ed2a94725a412a67c83ab54d3896e8
|
| 3 |
-
size 229097468
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V1_Pos2D_result.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"variant": "V1_Pos2D",
|
| 3 |
-
"best_fr_acc": 0.1875417780748663,
|
| 4 |
-
"final_tf_acc": 0.3510563391706983,
|
| 5 |
-
"final_ppl": 153.6817406044875,
|
| 6 |
-
"mean_step_var": 0.14404772718747458,
|
| 7 |
-
"mean_alpha_grad": 0.003913327430685361,
|
| 8 |
-
"n_epochs": 3,
|
| 9 |
-
"history": {
|
| 10 |
-
"epoch": [
|
| 11 |
-
1,
|
| 12 |
-
2,
|
| 13 |
-
3
|
| 14 |
-
],
|
| 15 |
-
"train_loss": [
|
| 16 |
-
17.392262780441428,
|
| 17 |
-
15.179841164630057,
|
| 18 |
-
16.81793132593047
|
| 19 |
-
],
|
| 20 |
-
"ppl": [
|
| 21 |
-
1235.3568111623497,
|
| 22 |
-
311.3961552773866,
|
| 23 |
-
153.6817406044875
|
| 24 |
-
],
|
| 25 |
-
"tf_acc": [
|
| 26 |
-
0.3510563391706983,
|
| 27 |
-
0.3510563391706983,
|
| 28 |
-
0.3510563391706983
|
| 29 |
-
],
|
| 30 |
-
"fr_acc": [
|
| 31 |
-
0.1875417780748663,
|
| 32 |
-
0.1875417780748663,
|
| 33 |
-
0.1875417780748663
|
| 34 |
-
],
|
| 35 |
-
"step_var": [
|
| 36 |
-
0.141316756606102,
|
| 37 |
-
0.14358700811862946,
|
| 38 |
-
0.14723941683769226
|
| 39 |
-
],
|
| 40 |
-
"alpha_grad": [
|
| 41 |
-
0.0,
|
| 42 |
-
0.0,
|
| 43 |
-
0.011739982292056084
|
| 44 |
-
]
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V2_Pos4D_best.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:dd5831ee15a76d4005090231e5115a61db5d68e1bcfa49389a583cc5fb2f48fb
|
| 3 |
-
size 229118012
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V2_Pos4D_result.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"variant": "V2_Pos4D",
|
| 3 |
-
"best_fr_acc": 0.1875417780748663,
|
| 4 |
-
"final_tf_acc": 0.3510563391706983,
|
| 5 |
-
"final_ppl": 253.87578636764255,
|
| 6 |
-
"mean_step_var": 0.14349867403507233,
|
| 7 |
-
"mean_alpha_grad": 0.0,
|
| 8 |
-
"n_epochs": 3,
|
| 9 |
-
"history": {
|
| 10 |
-
"epoch": [
|
| 11 |
-
1,
|
| 12 |
-
2,
|
| 13 |
-
3
|
| 14 |
-
],
|
| 15 |
-
"train_loss": [
|
| 16 |
-
17.31664431448574,
|
| 17 |
-
17.39737941087738,
|
| 18 |
-
24.57313331053906
|
| 19 |
-
],
|
| 20 |
-
"ppl": [
|
| 21 |
-
1198.821591346908,
|
| 22 |
-
214.63826338557135,
|
| 23 |
-
253.87578636764255
|
| 24 |
-
],
|
| 25 |
-
"tf_acc": [
|
| 26 |
-
0.3510563391706983,
|
| 27 |
-
0.3510563391706983,
|
| 28 |
-
0.3510563391706983
|
| 29 |
-
],
|
| 30 |
-
"fr_acc": [
|
| 31 |
-
0.1875417780748663,
|
| 32 |
-
0.1875417780748663,
|
| 33 |
-
0.1875417780748663
|
| 34 |
-
],
|
| 35 |
-
"step_var": [
|
| 36 |
-
0.14082492887973785,
|
| 37 |
-
0.14323490858078003,
|
| 38 |
-
0.1464361846446991
|
| 39 |
-
],
|
| 40 |
-
"alpha_grad": [
|
| 41 |
-
0.0,
|
| 42 |
-
0.0,
|
| 43 |
-
0.0
|
| 44 |
-
]
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V3_AlphaAux_best.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b938d7814f3501e3079aa176f2f9833abfda0c406ae45d89e3204cbe2b6833f3
|
| 3 |
-
size 229075493
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V3_AlphaAux_result.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"variant": "V3_AlphaAux",
|
| 3 |
-
"best_fr_acc": 0.1875417780748663,
|
| 4 |
-
"final_tf_acc": 0.3510563391706983,
|
| 5 |
-
"final_ppl": 183.78234464366375,
|
| 6 |
-
"mean_step_var": 0.1451136122147242,
|
| 7 |
-
"mean_alpha_grad": 0.18352858699735022,
|
| 8 |
-
"n_epochs": 3,
|
| 9 |
-
"history": {
|
| 10 |
-
"epoch": [
|
| 11 |
-
1,
|
| 12 |
-
2,
|
| 13 |
-
3
|
| 14 |
-
],
|
| 15 |
-
"train_loss": [
|
| 16 |
-
17.26420250607308,
|
| 17 |
-
15.259289013086304,
|
| 18 |
-
19.729494179676806
|
| 19 |
-
],
|
| 20 |
-
"ppl": [
|
| 21 |
-
1293.0097668989301,
|
| 22 |
-
285.79184800120186,
|
| 23 |
-
183.78234464366375
|
| 24 |
-
],
|
| 25 |
-
"tf_acc": [
|
| 26 |
-
0.3510563391706983,
|
| 27 |
-
0.3510563391706983,
|
| 28 |
-
0.3510563391706983
|
| 29 |
-
],
|
| 30 |
-
"fr_acc": [
|
| 31 |
-
0.1875417780748663,
|
| 32 |
-
0.1875417780748663,
|
| 33 |
-
0.1875417780748663
|
| 34 |
-
],
|
| 35 |
-
"step_var": [
|
| 36 |
-
0.14159607887268066,
|
| 37 |
-
0.14445264637470245,
|
| 38 |
-
0.14929211139678955
|
| 39 |
-
],
|
| 40 |
-
"alpha_grad": [
|
| 41 |
-
0.5482562377946445,
|
| 42 |
-
0.0023274669932523734,
|
| 43 |
-
2.056204153863074e-06
|
| 44 |
-
]
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V4_DynMu_best.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:f10e6eb5fb7dbb4d752fdb04dfd3412459f96ba7d686939e824ba0d7ec40eb2c
|
| 3 |
-
size 230126966
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V4_DynMu_result.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"variant": "V4_DynMu",
|
| 3 |
-
"best_fr_acc": 0.18943321792586498,
|
| 4 |
-
"final_tf_acc": 0.4841638355933248,
|
| 5 |
-
"final_ppl": 74.94092686946688,
|
| 6 |
-
"mean_step_var": 0.13881311813990274,
|
| 7 |
-
"mean_alpha_grad": 1.332800347123945e-07,
|
| 8 |
-
"n_epochs": 3,
|
| 9 |
-
"history": {
|
| 10 |
-
"epoch": [
|
| 11 |
-
1,
|
| 12 |
-
2,
|
| 13 |
-
3
|
| 14 |
-
],
|
| 15 |
-
"train_loss": [
|
| 16 |
-
15.708697366907268,
|
| 17 |
-
13.720215440439086,
|
| 18 |
-
19.28240414610449
|
| 19 |
-
],
|
| 20 |
-
"ppl": [
|
| 21 |
-
903.8067424539117,
|
| 22 |
-
78.64624914333437,
|
| 23 |
-
74.94092686946688
|
| 24 |
-
],
|
| 25 |
-
"tf_acc": [
|
| 26 |
-
0.3843802626327522,
|
| 27 |
-
0.4694811916141823,
|
| 28 |
-
0.4841638355933248
|
| 29 |
-
],
|
| 30 |
-
"fr_acc": [
|
| 31 |
-
0.18943321792586498,
|
| 32 |
-
0.05183016534119475,
|
| 33 |
-
0.14025399641208464
|
| 34 |
-
],
|
| 35 |
-
"step_var": [
|
| 36 |
-
0.14010637998580933,
|
| 37 |
-
0.13857358694076538,
|
| 38 |
-
0.13775938749313354
|
| 39 |
-
],
|
| 40 |
-
"alpha_grad": [
|
| 41 |
-
3.998401041371835e-07,
|
| 42 |
-
0.0,
|
| 43 |
-
0.0
|
| 44 |
-
]
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V5_Norm_best.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:2c37051b729286e36326fe161272a206ee283cf00dd3215a5ce162b8f858728a
|
| 3 |
-
size 229075333
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V5_Norm_result.json
DELETED
|
@@ -1,60 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"variant": "V5_Norm",
|
| 3 |
-
"best_fr_acc": 0.26766998320307145,
|
| 4 |
-
"final_tf_acc": 0.49702940098057347,
|
| 5 |
-
"final_ppl": 61.42505665318243,
|
| 6 |
-
"mean_step_var": 0.14897080659866332,
|
| 7 |
-
"mean_alpha_grad": 0.0,
|
| 8 |
-
"n_epochs": 5,
|
| 9 |
-
"history": {
|
| 10 |
-
"epoch": [
|
| 11 |
-
1,
|
| 12 |
-
2,
|
| 13 |
-
3,
|
| 14 |
-
4,
|
| 15 |
-
5
|
| 16 |
-
],
|
| 17 |
-
"train_loss": [
|
| 18 |
-
15.23357816140928,
|
| 19 |
-
11.30786015402596,
|
| 20 |
-
12.723735770125273,
|
| 21 |
-
15.796974123327558,
|
| 22 |
-
17.051196255773867
|
| 23 |
-
],
|
| 24 |
-
"ppl": [
|
| 25 |
-
2328.6654053399775,
|
| 26 |
-
73.83668606269272,
|
| 27 |
-
39.19834289015506,
|
| 28 |
-
46.67963716860922,
|
| 29 |
-
61.42505665318243
|
| 30 |
-
],
|
| 31 |
-
"tf_acc": [
|
| 32 |
-
0.45193989363600673,
|
| 33 |
-
0.4750784426338554,
|
| 34 |
-
0.49069233939134355,
|
| 35 |
-
0.49618900213182254,
|
| 36 |
-
0.49702940098057347
|
| 37 |
-
],
|
| 38 |
-
"fr_acc": [
|
| 39 |
-
0.09082589183692125,
|
| 40 |
-
0.24302091903194845,
|
| 41 |
-
0.20578737030942912,
|
| 42 |
-
0.26766998320307145,
|
| 43 |
-
0.22474069027377852
|
| 44 |
-
],
|
| 45 |
-
"step_var": [
|
| 46 |
-
0.14152660965919495,
|
| 47 |
-
0.1455157846212387,
|
| 48 |
-
0.14943934977054596,
|
| 49 |
-
0.15299569070339203,
|
| 50 |
-
0.155376598238945
|
| 51 |
-
],
|
| 52 |
-
"alpha_grad": [
|
| 53 |
-
0.0,
|
| 54 |
-
0.0,
|
| 55 |
-
0.0,
|
| 56 |
-
0.0,
|
| 57 |
-
0.0
|
| 58 |
-
]
|
| 59 |
-
}
|
| 60 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V6_GeoOut_best.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:911cea01274d41a29fc3edc7f900842e086874f48b88b22e1303ba70bd895faa
|
| 3 |
-
size 434960853
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V6_GeoOut_result.json
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"variant": "V6_GeoOut",
|
| 3 |
-
"best_fr_acc": 0.25967108871520633,
|
| 4 |
-
"final_tf_acc": 0.2948177567515826,
|
| 5 |
-
"final_ppl": 154.63605340145725,
|
| 6 |
-
"mean_step_var": 0.14678223431110382,
|
| 7 |
-
"mean_alpha_grad": 0.5744691959887251,
|
| 8 |
-
"n_epochs": 4,
|
| 9 |
-
"history": {
|
| 10 |
-
"epoch": [
|
| 11 |
-
1,
|
| 12 |
-
2,
|
| 13 |
-
3,
|
| 14 |
-
4
|
| 15 |
-
],
|
| 16 |
-
"train_loss": [
|
| 17 |
-
14.759893344419021,
|
| 18 |
-
11.246362841675545,
|
| 19 |
-
12.54583325090434,
|
| 20 |
-
22.460847154460506
|
| 21 |
-
],
|
| 22 |
-
"ppl": [
|
| 23 |
-
1420.1345154460182,
|
| 24 |
-
1181.355302200477,
|
| 25 |
-
145.64523762015884,
|
| 26 |
-
154.63605340145725
|
| 27 |
-
],
|
| 28 |
-
"tf_acc": [
|
| 29 |
-
0.4136222488294445,
|
| 30 |
-
0.45071985515287794,
|
| 31 |
-
0.465668571354405,
|
| 32 |
-
0.2948177567515826
|
| 33 |
-
],
|
| 34 |
-
"fr_acc": [
|
| 35 |
-
0.1953542780748663,
|
| 36 |
-
0.25967108871520633,
|
| 37 |
-
0.06445642796745737,
|
| 38 |
-
0.1599443244663833
|
| 39 |
-
],
|
| 40 |
-
"step_var": [
|
| 41 |
-
0.14110127091407776,
|
| 42 |
-
0.14495781064033508,
|
| 43 |
-
0.14907562732696533,
|
| 44 |
-
0.1519942283630371
|
| 45 |
-
],
|
| 46 |
-
"alpha_grad": [
|
| 47 |
-
2.232577130465826,
|
| 48 |
-
0.05733954775998447,
|
| 49 |
-
0.004667596272261929,
|
| 50 |
-
0.003292509456828169
|
| 51 |
-
]
|
| 52 |
-
}
|
| 53 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V7_GeoLoss_best.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:847902def8d25da4612c899402e8cd2e143259fd6a51c6db68e55076eae281f1
|
| 3 |
-
size 229075453
|
|
|
|
|
|
|
|
|
|
|
|
sigma_v5_ablation/V7_GeoLoss_result.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"variant": "V7_GeoLoss",
|
| 3 |
-
"best_fr_acc": 0.1875417780748663,
|
| 4 |
-
"final_tf_acc": 0.3510563391706983,
|
| 5 |
-
"final_ppl": 184.28581252195386,
|
| 6 |
-
"mean_step_var": 0.1445392370223999,
|
| 7 |
-
"mean_alpha_grad": 0.1394764928556775,
|
| 8 |
-
"n_epochs": 3,
|
| 9 |
-
"history": {
|
| 10 |
-
"epoch": [
|
| 11 |
-
1,
|
| 12 |
-
2,
|
| 13 |
-
3
|
| 14 |
-
],
|
| 15 |
-
"train_loss": [
|
| 16 |
-
17.477056487872595,
|
| 17 |
-
15.440700682025714,
|
| 18 |
-
15.867711470133532
|
| 19 |
-
],
|
| 20 |
-
"ppl": [
|
| 21 |
-
1207.411879920045,
|
| 22 |
-
276.30482110337414,
|
| 23 |
-
184.28581252195386
|
| 24 |
-
],
|
| 25 |
-
"tf_acc": [
|
| 26 |
-
0.3510563391706983,
|
| 27 |
-
0.3510563391706983,
|
| 28 |
-
0.3510563391706983
|
| 29 |
-
],
|
| 30 |
-
"fr_acc": [
|
| 31 |
-
0.1875417780748663,
|
| 32 |
-
0.1875417780748663,
|
| 33 |
-
0.1875417780748663
|
| 34 |
-
],
|
| 35 |
-
"step_var": [
|
| 36 |
-
0.1416991949081421,
|
| 37 |
-
0.14468589425086975,
|
| 38 |
-
0.14723262190818787
|
| 39 |
-
],
|
| 40 |
-
"alpha_grad": [
|
| 41 |
-
0.41642071172059075,
|
| 42 |
-
0.001486396164589168,
|
| 43 |
-
0.0005223706818525892
|
| 44 |
-
]
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|