| {"llama3_1_8b_validation/lm_loss": 0.6857601530834182, "llama3_1_8b_validation/probe_loss": 2.7089261656792454, "llama3_1_8b_validation/sparsity": 0.10497343787100426, "llama3_1_8b_validation/probe_threshold": 0.5, "llama3_1_8b_validation/all_tokens_accuracy": 0.8950558444793525, "llama3_1_8b_validation/all_tokens_precision": 0.23229881154499152, "llama3_1_8b_validation/all_tokens_recall": 0.39632479028595263, "llama3_1_8b_validation/all_tokens_f1": 0.29291225305919727, "llama3_1_8b_validation/all_tokens_auc": 0.7902774734594318, "llama3_1_8b_validation/entity_tokens_accuracy": 0.6700355467681888, "llama3_1_8b_validation/entity_tokens_precision": 0.7626075712311053, "llama3_1_8b_validation/entity_tokens_recall": 0.39632479028595263, "llama3_1_8b_validation/entity_tokens_f1": 0.521583995364511, "llama3_1_8b_validation/entity_tokens_auc": 0.7608780819528738, "llama3_1_8b_validation/entity_span_max_accuracy": 0.7439642324888227, "llama3_1_8b_validation/entity_span_max_precision": 0.6275850282003076, "llama3_1_8b_validation/entity_span_max_recall": 0.7449786975045648, "llama3_1_8b_validation/entity_span_max_f1": 0.6812615955473098, "llama3_1_8b_validation/entity_span_max_auc": 0.8152420357774306, "epoch": 1.0, "global_step": 798, "training_progress": 1.0, "dataset_id": "llama3_1_8b_validation"} |
| {"gemma2_9b_longfact_test/lm_loss": 0.5755876209932504, "gemma2_9b_longfact_test/probe_loss": 1.824633669157719, "gemma2_9b_longfact_test/sparsity": 0.04043448702000158, "gemma2_9b_longfact_test/probe_threshold": 0.5, "gemma2_9b_longfact_test/all_tokens_accuracy": 0.9602138753279532, "gemma2_9b_longfact_test/all_tokens_precision": 0.24976268915070635, "gemma2_9b_longfact_test/all_tokens_recall": 0.2406002904631273, "gemma2_9b_longfact_test/all_tokens_f1": 0.24509589041095892, "gemma2_9b_longfact_test/all_tokens_auc": 0.8147431010044623, "gemma2_9b_longfact_test/entity_tokens_accuracy": 0.766951903216288, "gemma2_9b_longfact_test/entity_tokens_precision": 0.7271988294586246, "gemma2_9b_longfact_test/entity_tokens_recall": 0.2406002904631273, "gemma2_9b_longfact_test/entity_tokens_f1": 0.36157141702368445, "gemma2_9b_longfact_test/entity_tokens_auc": 0.7466155394760451, "gemma2_9b_longfact_test/entity_span_max_accuracy": 0.8152651131374535, "gemma2_9b_longfact_test/entity_span_max_precision": 0.6145365455287872, "gemma2_9b_longfact_test/entity_span_max_recall": 0.5474718079301564, "gemma2_9b_longfact_test/entity_span_max_f1": 0.5790688726433244, "gemma2_9b_longfact_test/entity_span_max_auc": 0.8148793973959445, "epoch": 1.0, "global_step": 798, "training_progress": 1.0, "dataset_id": "gemma2_9b_longfact_test"} |
| {"gemma2_9b_longfact_augmented_test/lm_loss": 0.2728604237480861, "gemma2_9b_longfact_augmented_test/probe_loss": 2.3754780715753974, "gemma2_9b_longfact_augmented_test/sparsity": 0.07041370776459212, "gemma2_9b_longfact_augmented_test/probe_threshold": 0.5, "gemma2_9b_longfact_augmented_test/all_tokens_accuracy": 0.9442802937465392, "gemma2_9b_longfact_augmented_test/all_tokens_precision": 0.309374823834489, "gemma2_9b_longfact_augmented_test/all_tokens_recall": 0.30848791455874086, "gemma2_9b_longfact_augmented_test/all_tokens_f1": 0.30893073264094123, "gemma2_9b_longfact_augmented_test/all_tokens_auc": 0.8440259713255813, "gemma2_9b_longfact_augmented_test/entity_tokens_accuracy": 0.7373854715107455, "gemma2_9b_longfact_augmented_test/entity_tokens_precision": 0.7620105526242711, "gemma2_9b_longfact_augmented_test/entity_tokens_recall": 0.30848791455874086, "gemma2_9b_longfact_augmented_test/entity_tokens_f1": 0.43918053777208704, "gemma2_9b_longfact_augmented_test/entity_tokens_auc": 0.7776467019435929, "gemma2_9b_longfact_augmented_test/entity_span_max_accuracy": 0.795701581027668, "gemma2_9b_longfact_augmented_test/entity_span_max_precision": 0.6020174232003668, "gemma2_9b_longfact_augmented_test/entity_span_max_recall": 0.6255359695092901, "gemma2_9b_longfact_augmented_test/entity_span_max_f1": 0.6135514018691589, "gemma2_9b_longfact_augmented_test/entity_span_max_auc": 0.8404392366105239, "epoch": 1.0, "global_step": 798, "training_progress": 1.0, "dataset_id": "gemma2_9b_longfact_augmented_test"} |
| {"llama3_1_8b_validation/lm_loss": 0.6699889287227342, "llama3_1_8b_validation/probe_loss": 2.5472369915297053, "llama3_1_8b_validation/sparsity": 0.11641409348289505, "llama3_1_8b_validation/probe_threshold": 0.5, "llama3_1_8b_validation/all_tokens_accuracy": 0.8842834774563183, "llama3_1_8b_validation/all_tokens_precision": 0.19383254279760154, "llama3_1_8b_validation/all_tokens_recall": 0.3513231681883487, "llama3_1_8b_validation/all_tokens_f1": 0.24982903659028927, "llama3_1_8b_validation/all_tokens_auc": 0.7595831330463472, "llama3_1_8b_validation/entity_tokens_accuracy": 0.6445954188839577, "llama3_1_8b_validation/entity_tokens_precision": 0.7232611392042744, "llama3_1_8b_validation/entity_tokens_recall": 0.3513231681883487, "llama3_1_8b_validation/entity_tokens_f1": 0.47292407511385615, "llama3_1_8b_validation/entity_tokens_auc": 0.7189595849351357, "llama3_1_8b_validation/entity_span_max_accuracy": 0.716467958271237, "llama3_1_8b_validation/entity_span_max_precision": 0.5898912348048625, "llama3_1_8b_validation/entity_span_max_recall": 0.7482247920470684, "llama3_1_8b_validation/entity_span_max_f1": 0.6596905464627493, "llama3_1_8b_validation/entity_span_max_auc": 0.7902627509671609, "epoch": 1.0, "global_step": 100, "training_progress": 1.0, "dataset_id": "llama3_1_8b_validation"} |
| {"gemma2_9b_longfact_test/lm_loss": 0.5747948997333402, "gemma2_9b_longfact_test/probe_loss": 1.5762638045383282, "gemma2_9b_longfact_test/sparsity": 0.05290687029234699, "gemma2_9b_longfact_test/probe_threshold": 0.5, "gemma2_9b_longfact_test/all_tokens_accuracy": 0.9493771595820103, "gemma2_9b_longfact_test/all_tokens_precision": 0.1775027416575278, "gemma2_9b_longfact_test/all_tokens_recall": 0.24377386907643483, "gemma2_9b_longfact_test/all_tokens_f1": 0.20542574167667657, "gemma2_9b_longfact_test/all_tokens_auc": 0.7932095715283792, "gemma2_9b_longfact_test/entity_tokens_accuracy": 0.7566096193567424, "gemma2_9b_longfact_test/entity_tokens_precision": 0.6502152080344333, "gemma2_9b_longfact_test/entity_tokens_recall": 0.24377386907643483, "gemma2_9b_longfact_test/entity_tokens_f1": 0.35460271507374513, "gemma2_9b_longfact_test/entity_tokens_auc": 0.7209027554577657, "gemma2_9b_longfact_test/entity_span_max_accuracy": 0.7771867612293144, "gemma2_9b_longfact_test/entity_span_max_precision": 0.5179621162638798, "gemma2_9b_longfact_test/entity_span_max_recall": 0.5769370680247363, "gemma2_9b_longfact_test/entity_span_max_f1": 0.5458612975391499, "gemma2_9b_longfact_test/entity_span_max_auc": 0.7920690036518852, "epoch": 1.0, "global_step": 100, "training_progress": 1.0, "dataset_id": "gemma2_9b_longfact_test"} |
| {"gemma2_9b_longfact_augmented_test/lm_loss": 0.27257265425422816, "gemma2_9b_longfact_augmented_test/probe_loss": 2.1039190561447327, "gemma2_9b_longfact_augmented_test/sparsity": 0.07143737458922331, "gemma2_9b_longfact_augmented_test/probe_threshold": 0.5, "gemma2_9b_longfact_augmented_test/all_tokens_accuracy": 0.93839583163131, "gemma2_9b_longfact_augmented_test/all_tokens_precision": 0.25240817190642534, "gemma2_9b_longfact_augmented_test/all_tokens_recall": 0.2680719505340079, "gemma2_9b_longfact_augmented_test/all_tokens_f1": 0.2600043615745284, "gemma2_9b_longfact_augmented_test/all_tokens_auc": 0.8142933951087963, "gemma2_9b_longfact_augmented_test/entity_tokens_accuracy": 0.7195855427104607, "gemma2_9b_longfact_augmented_test/entity_tokens_precision": 0.7103068215668752, "gemma2_9b_longfact_augmented_test/entity_tokens_recall": 0.2680719505340079, "gemma2_9b_longfact_augmented_test/entity_tokens_f1": 0.38924257264120143, "gemma2_9b_longfact_augmented_test/entity_tokens_auc": 0.740394699731032, "gemma2_9b_longfact_augmented_test/entity_span_max_accuracy": 0.7705039525691699, "gemma2_9b_longfact_augmented_test/entity_span_max_precision": 0.5497317375154767, "gemma2_9b_longfact_augmented_test/entity_span_max_recall": 0.6345878989995236, "gemma2_9b_longfact_augmented_test/entity_span_max_f1": 0.5891198584697037, "gemma2_9b_longfact_augmented_test/entity_span_max_auc": 0.8126757518826111, "epoch": 1.0, "global_step": 100, "training_progress": 1.0, "dataset_id": "gemma2_9b_longfact_augmented_test"} |
| {"llama3_1_8b_validation/lm_loss": 0.6699889287227342, "llama3_1_8b_validation/probe_loss": 2.0718594188449764, "llama3_1_8b_validation/sparsity": 0.12139198741241664, "llama3_1_8b_validation/probe_threshold": 0.5, "llama3_1_8b_validation/all_tokens_accuracy": 0.8897624385507379, "llama3_1_8b_validation/all_tokens_precision": 0.22390026353131967, "llama3_1_8b_validation/all_tokens_recall": 0.4095101265236131, "llama3_1_8b_validation/all_tokens_f1": 0.2895103291230484, "llama3_1_8b_validation/all_tokens_auc": 0.7843380065373811, "llama3_1_8b_validation/entity_tokens_accuracy": 0.6729171486864522, "llama3_1_8b_validation/entity_tokens_precision": 0.7587480142544331, "llama3_1_8b_validation/entity_tokens_recall": 0.4095101265236131, "llama3_1_8b_validation/entity_tokens_f1": 0.5319286628038227, "llama3_1_8b_validation/entity_tokens_auc": 0.7566700332734465, "llama3_1_8b_validation/entity_span_max_accuracy": 0.7383755588673622, "llama3_1_8b_validation/entity_span_max_precision": 0.615811826200588, "llama3_1_8b_validation/entity_span_max_recall": 0.7648610265773991, "llama3_1_8b_validation/entity_span_max_f1": 0.6822911953669352, "llama3_1_8b_validation/entity_span_max_auc": 0.8174462241941803, "epoch": 1.0, "global_step": 200, "training_progress": 1.0, "dataset_id": "llama3_1_8b_validation"} |
| {"gemma2_9b_longfact_test/lm_loss": 0.5747948997333402, "gemma2_9b_longfact_test/probe_loss": 1.3399851017585787, "gemma2_9b_longfact_test/sparsity": 0.04920747144850063, "gemma2_9b_longfact_test/probe_threshold": 0.5, "gemma2_9b_longfact_test/all_tokens_accuracy": 0.9562690508885241, "gemma2_9b_longfact_test/all_tokens_precision": 0.2237967030371735, "gemma2_9b_longfact_test/all_tokens_recall": 0.2548544994890001, "gemma2_9b_longfact_test/all_tokens_f1": 0.23831799205271365, "gemma2_9b_longfact_test/all_tokens_auc": 0.8080841685884488, "gemma2_9b_longfact_test/entity_tokens_accuracy": 0.7664502803186781, "gemma2_9b_longfact_test/entity_tokens_precision": 0.705584512285927, "gemma2_9b_longfact_test/entity_tokens_recall": 0.2548544994890001, "gemma2_9b_longfact_test/entity_tokens_f1": 0.3744566505966964, "gemma2_9b_longfact_test/entity_tokens_auc": 0.7484999964990753, "gemma2_9b_longfact_test/entity_span_max_accuracy": 0.8055555555555556, "gemma2_9b_longfact_test/entity_span_max_precision": 0.581031976744186, "gemma2_9b_longfact_test/entity_span_max_recall": 0.5816660603855948, "gemma2_9b_longfact_test/entity_span_max_f1": 0.5813488456644247, "gemma2_9b_longfact_test/entity_span_max_auc": 0.8200260537541664, "epoch": 1.0, "global_step": 200, "training_progress": 1.0, "dataset_id": "gemma2_9b_longfact_test"} |
| {"gemma2_9b_longfact_augmented_test/lm_loss": 0.27257265425422816, "gemma2_9b_longfact_augmented_test/probe_loss": 1.8090517246028752, "gemma2_9b_longfact_augmented_test/sparsity": 0.0697531582227031, "gemma2_9b_longfact_augmented_test/probe_threshold": 0.5, "gemma2_9b_longfact_augmented_test/all_tokens_accuracy": 0.9426350044933417, "gemma2_9b_longfact_augmented_test/all_tokens_precision": 0.28952102541038904, "gemma2_9b_longfact_augmented_test/all_tokens_recall": 0.2894884766722878, "gemma2_9b_longfact_augmented_test/all_tokens_f1": 0.28950475012648264, "gemma2_9b_longfact_augmented_test/all_tokens_auc": 0.825353270994191, "gemma2_9b_longfact_augmented_test/entity_tokens_accuracy": 0.7318206516647617, "gemma2_9b_longfact_augmented_test/entity_tokens_precision": 0.7547999413747618, "gemma2_9b_longfact_augmented_test/entity_tokens_recall": 0.2894884766722878, "gemma2_9b_longfact_augmented_test/entity_tokens_f1": 0.41847804006013084, "gemma2_9b_longfact_augmented_test/entity_tokens_auc": 0.7630741321778702, "gemma2_9b_longfact_augmented_test/entity_span_max_accuracy": 0.7961956521739131, "gemma2_9b_longfact_augmented_test/entity_span_max_precision": 0.6000891662951404, "gemma2_9b_longfact_augmented_test/entity_span_max_recall": 0.6412577417818008, "gemma2_9b_longfact_augmented_test/entity_span_max_f1": 0.6199907876554583, "gemma2_9b_longfact_augmented_test/entity_span_max_auc": 0.8385596641420598, "epoch": 1.0, "global_step": 200, "training_progress": 1.0, "dataset_id": "gemma2_9b_longfact_augmented_test"} |
|
|