sft_lora / workmem /validation /validation_step_300.jsonl
SUJQ's picture
Upload folder using huggingface_hub
389c0ee verified
{"global_idx": 0, "total_positions": 437, "gt_mem_token_count": 11, "pred_mem_token_count": 216, "correct_mem_token_count": 3, "mem_token_precision": 0.013888888888888888, "mem_token_recall": 0.2727272727272727, "mem_token_accuracy": 0.2727272727272727, "slot_norm_mean": 198.0, "slot_sim_mean": 1.0, "rank": 0}
{"global_idx": 1, "total_positions": 594, "gt_mem_token_count": 5, "pred_mem_token_count": 253, "correct_mem_token_count": 2, "mem_token_precision": 0.007905138339920948, "mem_token_recall": 0.4, "mem_token_accuracy": 0.4, "slot_norm_mean": 199.0, "slot_sim_mean": 0.99609375, "rank": 0}
{"global_idx": 2, "total_positions": 638, "gt_mem_token_count": 5, "pred_mem_token_count": 447, "correct_mem_token_count": 3, "mem_token_precision": 0.006711409395973154, "mem_token_recall": 0.6, "mem_token_accuracy": 0.6, "slot_norm_mean": 196.0, "slot_sim_mean": 0.99609375, "rank": 0}
{"global_idx": 3, "total_positions": 408, "gt_mem_token_count": 6, "pred_mem_token_count": 172, "correct_mem_token_count": 3, "mem_token_precision": 0.01744186046511628, "mem_token_recall": 0.5, "mem_token_accuracy": 0.5, "slot_norm_mean": 193.0, "slot_sim_mean": 1.0, "rank": 0}
{"global_idx": 4, "total_positions": 447, "gt_mem_token_count": 8, "pred_mem_token_count": 197, "correct_mem_token_count": 2, "mem_token_precision": 0.01015228426395939, "mem_token_recall": 0.25, "mem_token_accuracy": 0.25, "slot_norm_mean": 198.0, "slot_sim_mean": 1.0, "rank": 0}
{"global_idx": 5, "total_positions": 414, "gt_mem_token_count": 6, "pred_mem_token_count": 178, "correct_mem_token_count": 1, "mem_token_precision": 0.0056179775280898875, "mem_token_recall": 0.16666666666666666, "mem_token_accuracy": 0.16666666666666666, "slot_norm_mean": 194.0, "slot_sim_mean": 1.0, "rank": 0}
{"global_idx": 6, "total_positions": 577, "gt_mem_token_count": 11, "pred_mem_token_count": 363, "correct_mem_token_count": 1, "mem_token_precision": 0.0027548209366391185, "mem_token_recall": 0.09090909090909091, "mem_token_accuracy": 0.09090909090909091, "slot_norm_mean": 197.0, "slot_sim_mean": 0.9921875, "rank": 0}
{"global_idx": 7, "total_positions": 631, "gt_mem_token_count": 17, "pred_mem_token_count": 462, "correct_mem_token_count": 2, "mem_token_precision": 0.004329004329004329, "mem_token_recall": 0.11764705882352941, "mem_token_accuracy": 0.11764705882352941, "slot_norm_mean": 200.0, "slot_sim_mean": 0.9921875, "rank": 1}
{"global_idx": 8, "total_positions": 591, "gt_mem_token_count": 7, "pred_mem_token_count": 316, "correct_mem_token_count": 1, "mem_token_precision": 0.0031645569620253164, "mem_token_recall": 0.14285714285714285, "mem_token_accuracy": 0.14285714285714285, "slot_norm_mean": 200.0, "slot_sim_mean": 0.99609375, "rank": 1}
{"global_idx": 9, "total_positions": 579, "gt_mem_token_count": 13, "pred_mem_token_count": 406, "correct_mem_token_count": 4, "mem_token_precision": 0.009852216748768473, "mem_token_recall": 0.3076923076923077, "mem_token_accuracy": 0.3076923076923077, "slot_norm_mean": 199.0, "slot_sim_mean": 1.0, "rank": 1}
{"global_idx": 10, "total_positions": 596, "gt_mem_token_count": 13, "pred_mem_token_count": 260, "correct_mem_token_count": 1, "mem_token_precision": 0.0038461538461538464, "mem_token_recall": 0.07692307692307693, "mem_token_accuracy": 0.07692307692307693, "slot_norm_mean": 200.0, "slot_sim_mean": 0.99609375, "rank": 1}
{"global_idx": 11, "total_positions": 353, "gt_mem_token_count": 7, "pred_mem_token_count": 148, "correct_mem_token_count": 1, "mem_token_precision": 0.006756756756756757, "mem_token_recall": 0.14285714285714285, "mem_token_accuracy": 0.14285714285714285, "slot_norm_mean": 197.0, "slot_sim_mean": 1.0, "rank": 1}
{"global_idx": 12, "total_positions": 378, "gt_mem_token_count": 15, "pred_mem_token_count": 181, "correct_mem_token_count": 2, "mem_token_precision": 0.011049723756906077, "mem_token_recall": 0.13333333333333333, "mem_token_accuracy": 0.13333333333333333, "slot_norm_mean": 196.0, "slot_sim_mean": 0.99609375, "rank": 1}
{"global_idx": 13, "total_positions": 412, "gt_mem_token_count": 11, "pred_mem_token_count": 222, "correct_mem_token_count": 2, "mem_token_precision": 0.009009009009009009, "mem_token_recall": 0.18181818181818182, "mem_token_accuracy": 0.18181818181818182, "slot_norm_mean": 197.0, "slot_sim_mean": 1.0, "rank": 1}
{"global_idx": 14, "total_positions": 665, "gt_mem_token_count": 10, "pred_mem_token_count": 518, "correct_mem_token_count": 4, "mem_token_precision": 0.007722007722007722, "mem_token_recall": 0.4, "mem_token_accuracy": 0.4, "slot_norm_mean": 198.0, "slot_sim_mean": 1.0, "rank": 2}
{"global_idx": 15, "total_positions": 529, "gt_mem_token_count": 12, "pred_mem_token_count": 362, "correct_mem_token_count": 2, "mem_token_precision": 0.0055248618784530384, "mem_token_recall": 0.16666666666666666, "mem_token_accuracy": 0.16666666666666666, "slot_norm_mean": 199.0, "slot_sim_mean": 0.9921875, "rank": 2}
{"global_idx": 16, "total_positions": 383, "gt_mem_token_count": 6, "pred_mem_token_count": 144, "correct_mem_token_count": 2, "mem_token_precision": 0.013888888888888888, "mem_token_recall": 0.3333333333333333, "mem_token_accuracy": 0.3333333333333333, "slot_norm_mean": 200.0, "slot_sim_mean": 0.99609375, "rank": 2}
{"global_idx": 17, "total_positions": 370, "gt_mem_token_count": 10, "pred_mem_token_count": 178, "correct_mem_token_count": 2, "mem_token_precision": 0.011235955056179775, "mem_token_recall": 0.2, "mem_token_accuracy": 0.2, "slot_norm_mean": 197.0, "slot_sim_mean": 0.99609375, "rank": 2}
{"global_idx": 18, "total_positions": 647, "gt_mem_token_count": 9, "pred_mem_token_count": 428, "correct_mem_token_count": 2, "mem_token_precision": 0.004672897196261682, "mem_token_recall": 0.2222222222222222, "mem_token_accuracy": 0.2222222222222222, "slot_norm_mean": 195.0, "slot_sim_mean": 0.9921875, "rank": 2}
{"global_idx": 19, "total_positions": 261, "gt_mem_token_count": 6, "pred_mem_token_count": 110, "correct_mem_token_count": 3, "mem_token_precision": 0.02727272727272727, "mem_token_recall": 0.5, "mem_token_accuracy": 0.5, "slot_norm_mean": 194.0, "slot_sim_mean": 0.99609375, "rank": 2}
{"global_idx": 20, "total_positions": 437, "gt_mem_token_count": 11, "pred_mem_token_count": 216, "correct_mem_token_count": 3, "mem_token_precision": 0.013888888888888888, "mem_token_recall": 0.2727272727272727, "mem_token_accuracy": 0.2727272727272727, "slot_norm_mean": 198.0, "slot_sim_mean": 1.0, "rank": 2}
{"global_idx": 21, "total_positions": 594, "gt_mem_token_count": 5, "pred_mem_token_count": 253, "correct_mem_token_count": 2, "mem_token_precision": 0.007905138339920948, "mem_token_recall": 0.4, "mem_token_accuracy": 0.4, "slot_norm_mean": 199.0, "slot_sim_mean": 0.99609375, "rank": 2}
{"global_idx": 22, "total_positions": 302, "gt_mem_token_count": 6, "pred_mem_token_count": 119, "correct_mem_token_count": 2, "mem_token_precision": 0.01680672268907563, "mem_token_recall": 0.3333333333333333, "mem_token_accuracy": 0.3333333333333333, "slot_norm_mean": 194.0, "slot_sim_mean": 0.99609375, "rank": 3}
{"global_idx": 23, "total_positions": 366, "gt_mem_token_count": 6, "pred_mem_token_count": 146, "correct_mem_token_count": 1, "mem_token_precision": 0.00684931506849315, "mem_token_recall": 0.16666666666666666, "mem_token_accuracy": 0.16666666666666666, "slot_norm_mean": 198.0, "slot_sim_mean": 1.0, "rank": 3}
{"global_idx": 24, "total_positions": 662, "gt_mem_token_count": 14, "pred_mem_token_count": 377, "correct_mem_token_count": 3, "mem_token_precision": 0.007957559681697613, "mem_token_recall": 0.21428571428571427, "mem_token_accuracy": 0.21428571428571427, "slot_norm_mean": 198.0, "slot_sim_mean": 1.0, "rank": 3}
{"global_idx": 25, "total_positions": 362, "gt_mem_token_count": 8, "pred_mem_token_count": 209, "correct_mem_token_count": 2, "mem_token_precision": 0.009569377990430622, "mem_token_recall": 0.25, "mem_token_accuracy": 0.25, "slot_norm_mean": 199.0, "slot_sim_mean": 1.0, "rank": 3}
{"global_idx": 26, "total_positions": 466, "gt_mem_token_count": 13, "pred_mem_token_count": 187, "correct_mem_token_count": 1, "mem_token_precision": 0.0053475935828877, "mem_token_recall": 0.07692307692307693, "mem_token_accuracy": 0.07692307692307693, "slot_norm_mean": 199.0, "slot_sim_mean": 0.99609375, "rank": 3}
{"global_idx": 27, "total_positions": 712, "gt_mem_token_count": 7, "pred_mem_token_count": 453, "correct_mem_token_count": 3, "mem_token_precision": 0.006622516556291391, "mem_token_recall": 0.42857142857142855, "mem_token_accuracy": 0.42857142857142855, "slot_norm_mean": 197.0, "slot_sim_mean": 0.9921875, "rank": 3}
{"global_idx": 28, "total_positions": 631, "gt_mem_token_count": 17, "pred_mem_token_count": 462, "correct_mem_token_count": 2, "mem_token_precision": 0.004329004329004329, "mem_token_recall": 0.11764705882352941, "mem_token_accuracy": 0.11764705882352941, "slot_norm_mean": 200.0, "slot_sim_mean": 0.9921875, "rank": 3}
{"global_idx": 29, "total_positions": 591, "gt_mem_token_count": 7, "pred_mem_token_count": 316, "correct_mem_token_count": 1, "mem_token_precision": 0.0031645569620253164, "mem_token_recall": 0.14285714285714285, "mem_token_accuracy": 0.14285714285714285, "slot_norm_mean": 200.0, "slot_sim_mean": 0.99609375, "rank": 3}