| { | |
| "timestamp": "2025-12-02T17:19:38.011582", | |
| "total_time_sec": 3143.6702983379364, | |
| "n_helpers": 13, | |
| "results": { | |
| "emotional_intelligence": { | |
| "domain": "emotional_intelligence", | |
| "dataset": "dair-ai/emotion", | |
| "description": "Twitter emotions (anger, fear, joy, love, sadness, surprise)", | |
| "n_samples": 3000, | |
| "n_features": 110, | |
| "n_classes": 6, | |
| "train_accuracy": 0.435, | |
| "test_accuracy": 0.35833333333333334, | |
| "train_test_gap": 0.07666666666666666, | |
| "cv_mean": 0.36875, | |
| "cv_std": 0.009592387027684456, | |
| "train_time_sec": 0.816157341003418 | |
| }, | |
| "decision_making": { | |
| "domain": "decision_making", | |
| "dataset": "openai/gsm8k", | |
| "description": "Grade school math - multi-step reasoning", | |
| "n_samples": 3000, | |
| "n_features": 110, | |
| "n_classes": 5, | |
| "train_accuracy": 0.8420833333333333, | |
| "test_accuracy": 0.84, | |
| "train_test_gap": 0.002083333333333326, | |
| "cv_mean": 0.8404166666666667, | |
| "cv_std": 0.001020620726159654, | |
| "train_time_sec": 0.8619897365570068 | |
| }, | |
| "adaptive_learning": { | |
| "domain": "adaptive_learning", | |
| "dataset": "rajpurkar/squad", | |
| "description": "SQuAD reading comprehension", | |
| "n_samples": 3000, | |
| "n_features": 110, | |
| "n_classes": 4, | |
| "train_accuracy": 0.6195833333333334, | |
| "test_accuracy": 0.5983333333333334, | |
| "train_test_gap": 0.02124999999999999, | |
| "cv_mean": 0.6083333333333334, | |
| "cv_std": 0.008436856971381926, | |
| "train_time_sec": 0.33698034286499023 | |
| }, | |
| "logical_reasoning": { | |
| "domain": "logical_reasoning", | |
| "dataset": "lucasmccabe/logiqa", | |
| "description": "Logical reasoning questions", | |
| "n_samples": 1000, | |
| "n_features": 110, | |
| "n_classes": 5, | |
| "train_accuracy": 1.0, | |
| "test_accuracy": 0.19, | |
| "train_test_gap": 0.81, | |
| "cv_mean": 0.1775, | |
| "cv_std": 0.020000000000000007, | |
| "train_time_sec": 0.4216756820678711 | |
| }, | |
| "memory_formation": { | |
| "domain": "memory_formation", | |
| "dataset": "mandarjoshi/trivia_qa", | |
| "description": "TriviaQA fact recall", | |
| "n_samples": 3000, | |
| "n_features": 110, | |
| "n_classes": 4, | |
| "train_accuracy": 0.7108333333333333, | |
| "test_accuracy": 0.7133333333333334, | |
| "train_test_gap": -0.0025000000000000577, | |
| "cv_mean": 0.69875, | |
| "cv_std": 0.015172617880027592, | |
| "train_time_sec": 0.3318939208984375 | |
| }, | |
| "pattern_recognition": { | |
| "domain": "pattern_recognition", | |
| "dataset": "allenai/ai2_arc", | |
| "description": "ARC science reasoning", | |
| "n_samples": 1119, | |
| "n_features": 110, | |
| "n_classes": 8, | |
| "train_accuracy": 0.5217877094972067, | |
| "test_accuracy": 0.26785714285714285, | |
| "train_test_gap": 0.2539305666400638, | |
| "cv_mean": 0.23016759776536314, | |
| "cv_std": 0.0227340669825428, | |
| "train_time_sec": 0.19740653038024902 | |
| }, | |
| "metacognition": { | |
| "domain": "metacognition", | |
| "dataset": "tau/commonsense_qa", | |
| "description": "CommonsenseQA reasoning", | |
| "n_samples": 3000, | |
| "n_features": 110, | |
| "n_classes": 5, | |
| "train_accuracy": 0.3595833333333333, | |
| "test_accuracy": 0.22333333333333333, | |
| "train_test_gap": 0.13624999999999998, | |
| "cv_mean": 0.21416666666666667, | |
| "cv_std": 0.013906932723565526, | |
| "train_time_sec": 0.23867106437683105 | |
| }, | |
| "self_awareness": { | |
| "domain": "self_awareness", | |
| "dataset": "dair-ai/emotion", | |
| "description": "Self-awareness through emotion recognition", | |
| "n_samples": 3000, | |
| "n_features": 110, | |
| "n_classes": 6, | |
| "train_accuracy": 0.435, | |
| "test_accuracy": 0.35833333333333334, | |
| "train_test_gap": 0.07666666666666666, | |
| "cv_mean": 0.36875, | |
| "cv_std": 0.009592387027684456, | |
| "train_time_sec": 0.24677586555480957 | |
| }, | |
| "social_learning": { | |
| "domain": "social_learning", | |
| "dataset": "allenai/social_i_qa", | |
| "description": "Social intelligence QA", | |
| "n_samples": 1000, | |
| "n_features": 110, | |
| "n_classes": 5, | |
| "train_accuracy": 1.0, | |
| "test_accuracy": 0.19, | |
| "train_test_gap": 0.81, | |
| "cv_mean": 0.1775, | |
| "cv_std": 0.020000000000000007, | |
| "train_time_sec": 0.24502849578857422 | |
| }, | |
| "spatial_reasoning": { | |
| "domain": "spatial_reasoning", | |
| "dataset": "ybisk/piqa", | |
| "description": "Physical intuition QA", | |
| "n_samples": 1000, | |
| "n_features": 110, | |
| "n_classes": 5, | |
| "train_accuracy": 1.0, | |
| "test_accuracy": 0.19, | |
| "train_test_gap": 0.81, | |
| "cv_mean": 0.1775, | |
| "cv_std": 0.020000000000000007, | |
| "train_time_sec": 0.24609684944152832 | |
| }, | |
| "language_processing": { | |
| "domain": "language_processing", | |
| "dataset": "nyu-mll/glue", | |
| "description": "CoLA linguistic acceptability", | |
| "n_samples": 3000, | |
| "n_features": 110, | |
| "n_classes": 2, | |
| "train_accuracy": 0.7208333333333333, | |
| "test_accuracy": 0.71, | |
| "train_test_gap": 0.010833333333333361, | |
| "cv_mean": 0.7108333333333332, | |
| "cv_std": 0.004639803635691711, | |
| "train_time_sec": 0.23175382614135742 | |
| }, | |
| "creative_thinking": { | |
| "domain": "creative_thinking", | |
| "dataset": "Abirate/english_quotes", | |
| "description": "Creative quotes and writing", | |
| "n_samples": 2508, | |
| "n_features": 110, | |
| "n_classes": 3, | |
| "train_accuracy": 0.8035892323030908, | |
| "test_accuracy": 0.7051792828685259, | |
| "train_test_gap": 0.09840994943456483, | |
| "cv_mean": 0.7437662063746108, | |
| "cv_std": 0.00565980154039184, | |
| "train_time_sec": 0.2100505828857422 | |
| }, | |
| "abstract_thinking": { | |
| "domain": "abstract_thinking", | |
| "dataset": "lighteval/MATH", | |
| "description": "Abstract mathematical reasoning", | |
| "n_samples": 1000, | |
| "n_features": 110, | |
| "n_classes": 5, | |
| "train_accuracy": 1.0, | |
| "test_accuracy": 0.19, | |
| "train_test_gap": 0.81, | |
| "cv_mean": 0.1775, | |
| "cv_std": 0.020000000000000007, | |
| "train_time_sec": 0.19356322288513184 | |
| } | |
| } | |
| } |