{ "timestamp": "2025-12-02T17:19:38.011582", "total_time_sec": 3143.6702983379364, "n_helpers": 13, "results": { "emotional_intelligence": { "domain": "emotional_intelligence", "dataset": "dair-ai/emotion", "description": "Twitter emotions (anger, fear, joy, love, sadness, surprise)", "n_samples": 3000, "n_features": 110, "n_classes": 6, "train_accuracy": 0.435, "test_accuracy": 0.35833333333333334, "train_test_gap": 0.07666666666666666, "cv_mean": 0.36875, "cv_std": 0.009592387027684456, "train_time_sec": 0.816157341003418 }, "decision_making": { "domain": "decision_making", "dataset": "openai/gsm8k", "description": "Grade school math - multi-step reasoning", "n_samples": 3000, "n_features": 110, "n_classes": 5, "train_accuracy": 0.8420833333333333, "test_accuracy": 0.84, "train_test_gap": 0.002083333333333326, "cv_mean": 0.8404166666666667, "cv_std": 0.001020620726159654, "train_time_sec": 0.8619897365570068 }, "adaptive_learning": { "domain": "adaptive_learning", "dataset": "rajpurkar/squad", "description": "SQuAD reading comprehension", "n_samples": 3000, "n_features": 110, "n_classes": 4, "train_accuracy": 0.6195833333333334, "test_accuracy": 0.5983333333333334, "train_test_gap": 0.02124999999999999, "cv_mean": 0.6083333333333334, "cv_std": 0.008436856971381926, "train_time_sec": 0.33698034286499023 }, "logical_reasoning": { "domain": "logical_reasoning", "dataset": "lucasmccabe/logiqa", "description": "Logical reasoning questions", "n_samples": 1000, "n_features": 110, "n_classes": 5, "train_accuracy": 1.0, "test_accuracy": 0.19, "train_test_gap": 0.81, "cv_mean": 0.1775, "cv_std": 0.020000000000000007, "train_time_sec": 0.4216756820678711 }, "memory_formation": { "domain": "memory_formation", "dataset": "mandarjoshi/trivia_qa", "description": "TriviaQA fact recall", "n_samples": 3000, "n_features": 110, "n_classes": 4, "train_accuracy": 0.7108333333333333, "test_accuracy": 0.7133333333333334, "train_test_gap": -0.0025000000000000577, "cv_mean": 0.69875, "cv_std": 0.015172617880027592, "train_time_sec": 0.3318939208984375 }, "pattern_recognition": { "domain": "pattern_recognition", "dataset": "allenai/ai2_arc", "description": "ARC science reasoning", "n_samples": 1119, "n_features": 110, "n_classes": 8, "train_accuracy": 0.5217877094972067, "test_accuracy": 0.26785714285714285, "train_test_gap": 0.2539305666400638, "cv_mean": 0.23016759776536314, "cv_std": 0.0227340669825428, "train_time_sec": 0.19740653038024902 }, "metacognition": { "domain": "metacognition", "dataset": "tau/commonsense_qa", "description": "CommonsenseQA reasoning", "n_samples": 3000, "n_features": 110, "n_classes": 5, "train_accuracy": 0.3595833333333333, "test_accuracy": 0.22333333333333333, "train_test_gap": 0.13624999999999998, "cv_mean": 0.21416666666666667, "cv_std": 0.013906932723565526, "train_time_sec": 0.23867106437683105 }, "self_awareness": { "domain": "self_awareness", "dataset": "dair-ai/emotion", "description": "Self-awareness through emotion recognition", "n_samples": 3000, "n_features": 110, "n_classes": 6, "train_accuracy": 0.435, "test_accuracy": 0.35833333333333334, "train_test_gap": 0.07666666666666666, "cv_mean": 0.36875, "cv_std": 0.009592387027684456, "train_time_sec": 0.24677586555480957 }, "social_learning": { "domain": "social_learning", "dataset": "allenai/social_i_qa", "description": "Social intelligence QA", "n_samples": 1000, "n_features": 110, "n_classes": 5, "train_accuracy": 1.0, "test_accuracy": 0.19, "train_test_gap": 0.81, "cv_mean": 0.1775, "cv_std": 0.020000000000000007, "train_time_sec": 0.24502849578857422 }, "spatial_reasoning": { "domain": "spatial_reasoning", "dataset": "ybisk/piqa", "description": "Physical intuition QA", "n_samples": 1000, "n_features": 110, "n_classes": 5, "train_accuracy": 1.0, "test_accuracy": 0.19, "train_test_gap": 0.81, "cv_mean": 0.1775, "cv_std": 0.020000000000000007, "train_time_sec": 0.24609684944152832 }, "language_processing": { "domain": "language_processing", "dataset": "nyu-mll/glue", "description": "CoLA linguistic acceptability", "n_samples": 3000, "n_features": 110, "n_classes": 2, "train_accuracy": 0.7208333333333333, "test_accuracy": 0.71, "train_test_gap": 0.010833333333333361, "cv_mean": 0.7108333333333332, "cv_std": 0.004639803635691711, "train_time_sec": 0.23175382614135742 }, "creative_thinking": { "domain": "creative_thinking", "dataset": "Abirate/english_quotes", "description": "Creative quotes and writing", "n_samples": 2508, "n_features": 110, "n_classes": 3, "train_accuracy": 0.8035892323030908, "test_accuracy": 0.7051792828685259, "train_test_gap": 0.09840994943456483, "cv_mean": 0.7437662063746108, "cv_std": 0.00565980154039184, "train_time_sec": 0.2100505828857422 }, "abstract_thinking": { "domain": "abstract_thinking", "dataset": "lighteval/MATH", "description": "Abstract mathematical reasoning", "n_samples": 1000, "n_features": 110, "n_classes": 5, "train_accuracy": 1.0, "test_accuracy": 0.19, "train_test_gap": 0.81, "cv_mean": 0.1775, "cv_std": 0.020000000000000007, "train_time_sec": 0.19356322288513184 } } }