1-parameter-classifier / stage_4b /training_log.json
phanerozoic's picture
Stage 4B: 15.67M student + cosine loss on 768-D, F1 0.723 (+0.013 over Stage 4)
c75b31a verified
{
"student_params": 15672192,
"loss": "cosine_1_minus_sim",
"target_dim": 768,
"epochs": [
{
"epoch": 1,
"loss": 0.07253991591294745,
"F1": 0.7222222089767456,
"precision": 0.5759493708610535,
"recall": 0.9680851101875305,
"threshold": 77.3359375
},
{
"epoch": 2,
"loss": 0.06178997803267662,
"F1": 0.7249357104301453,
"precision": 0.5685483813285828,
"recall": 1.0,
"threshold": 107.1875
},
{
"epoch": 3,
"loss": 0.0617156123302943,
"F1": 0.7195902466773987,
"precision": 0.563126266002655,
"recall": 0.9964538812637329,
"threshold": 148.1875
},
{
"epoch": 4,
"loss": 0.061347012896584736,
"F1": 0.7253885865211487,
"precision": 0.5714285969734192,
"recall": 0.9929078221321106,
"threshold": 153.75
},
{
"epoch": 5,
"loss": 0.06122595644468568,
"F1": 0.7237353920936584,
"precision": 0.5705521702766418,
"recall": 0.9893617033958435,
"threshold": 159.734375
},
{
"epoch": 6,
"loss": 0.06132089458562277,
"F1": 0.7195902466773987,
"precision": 0.563126266002655,
"recall": 0.9964538812637329,
"threshold": 156.5625
},
{
"epoch": 7,
"loss": 0.061309009904699646,
"F1": 0.7220779061317444,
"precision": 0.5696721076965332,
"recall": 0.9858155846595764,
"threshold": 174.6875
},
{
"epoch": 8,
"loss": 0.06113341519135151,
"F1": 0.720720648765564,
"precision": 0.5656565427780151,
"recall": 0.9929078221321106,
"threshold": 168.375
},
{
"epoch": 9,
"loss": 0.060983790468551714,
"F1": 0.7205128073692322,
"precision": 0.564257025718689,
"recall": 0.9964538812637329,
"threshold": 168.28125
},
{
"epoch": 10,
"loss": 0.061084045586111,
"F1": 0.7260638475418091,
"precision": 0.5808510780334473,
"recall": 0.9680851101875305,
"threshold": 165.9375
},
{
"epoch": 11,
"loss": 0.061100886026898504,
"F1": 0.7221510410308838,
"precision": 0.5651302337646484,
"recall": 1.0,
"threshold": 171.53125
},
{
"epoch": 12,
"loss": 0.06096925960034696,
"F1": 0.7195902466773987,
"precision": 0.563126266002655,
"recall": 0.9964538812637329,
"threshold": 168.1875
},
{
"epoch": 13,
"loss": 0.060978461868108035,
"F1": 0.7253613471984863,
"precision": 0.5762004256248474,
"recall": 0.978723406791687,
"threshold": 167.40625
},
{
"epoch": 14,
"loss": 0.060976944405298966,
"F1": 0.7216494679450989,
"precision": 0.5668016076087952,
"recall": 0.9929078221321106,
"threshold": 168.125
},
{
"epoch": 15,
"loss": 0.06102545032333958,
"F1": 0.7230768799781799,
"precision": 0.5662650465965271,
"recall": 1.0,
"threshold": 168.09375
}
]
}