{ "model": "Qwen/Qwen3.5-2B-Base", "n_trials": 3, "epochs": 15, "regularization_ratio": 0.33, "aggregate": { "recall": { "pooled_correct": 61, "pooled_total": 105, "pooled_rate": 0.580952380952381, "per_trial_rates": [ 0.6571428571428571, 0.5428571428571428, 0.5428571428571428 ], "mean": 0.5809523809523809, "stdev": 0.06598288790738582, "ci_95_lower": 0.4853552056582404, "ci_95_upper": 0.670835074528747 }, "general_knowledge": { "pooled_correct": 60, "pooled_total": 60, "pooled_rate": 1.0, "per_trial_rates": [ 1.0, 1.0, 1.0 ], "mean": 1.0, "stdev": 0.0, "ci_95_lower": 0.9398260695220669, "ci_95_upper": 0.9999999999999999 }, "training": { "mean_time_s": 69.6302502155304, "stdev_time_s": 1.185997256195759, "mean_steps": 180, "per_trial_times": [ 68.26203393936157, 70.36512899398804, 70.26358771324158 ] } }, "trials": [ { "trial_id": 1, "n_confirmed_unknown": 35, "n_training_pairs": 52, "training_steps": 180, "training_time_s": 68.26203393936157, "initial_loss": 1.290154, "final_loss": 0.451566, "recall_correct": 23, "recall_total": 35, "recall_rate": 0.6571428571428571, "general_correct": 20, "general_total": 20, "general_rate": 1.0, "category_scores": { "Awards": { "correct": 7, "total": 7 }, "Entertainment": { "correct": 1, "total": 4 }, "Weather/Natural Events": { "correct": 4, "total": 5 }, "Sports": { "correct": 5, "total": 6 }, "Deaths/Obituaries": { "correct": 4, "total": 11 }, "Science": { "correct": 1, "total": 1 }, "Technology/Business": { "correct": 1, "total": 1 } } }, { "trial_id": 2, "n_confirmed_unknown": 35, "n_training_pairs": 52, "training_steps": 180, "training_time_s": 70.36512899398804, "initial_loss": 2.056952, "final_loss": 0.260391, "recall_correct": 19, "recall_total": 35, "recall_rate": 0.5428571428571428, "general_correct": 20, "general_total": 20, "general_rate": 1.0, "category_scores": { "Deaths/Obituaries": { "correct": 0, "total": 11 }, "Awards": { "correct": 6, "total": 7 }, "Weather/Natural Events": { "correct": 4, "total": 5 }, "Technology/Business": { "correct": 1, "total": 1 }, "Entertainment": { "correct": 1, "total": 4 }, "Sports": { "correct": 6, "total": 6 }, "Science": { "correct": 1, "total": 1 } } }, { "trial_id": 3, "n_confirmed_unknown": 35, "n_training_pairs": 52, "training_steps": 180, "training_time_s": 70.26358771324158, "initial_loss": 1.984214, "final_loss": 0.381513, "recall_correct": 19, "recall_total": 35, "recall_rate": 0.5428571428571428, "general_correct": 20, "general_total": 20, "general_rate": 1.0, "category_scores": { "Deaths/Obituaries": { "correct": 2, "total": 11 }, "Awards": { "correct": 5, "total": 7 }, "Technology/Business": { "correct": 0, "total": 1 }, "Weather/Natural Events": { "correct": 4, "total": 5 }, "Entertainment": { "correct": 2, "total": 4 }, "Sports": { "correct": 5, "total": 6 }, "Science": { "correct": 1, "total": 1 } } } ] }