IAMVC commited on
Commit
7d6c131
·
verified ·
1 Parent(s): 4a8114f

Upload models/real_data_training_summary.json with huggingface_hub

Browse files
models/real_data_training_summary.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2025-12-02T17:19:38.011582",
3
+ "total_time_sec": 3143.6702983379364,
4
+ "n_helpers": 13,
5
+ "results": {
6
+ "emotional_intelligence": {
7
+ "domain": "emotional_intelligence",
8
+ "dataset": "dair-ai/emotion",
9
+ "description": "Twitter emotions (anger, fear, joy, love, sadness, surprise)",
10
+ "n_samples": 3000,
11
+ "n_features": 110,
12
+ "n_classes": 6,
13
+ "train_accuracy": 0.435,
14
+ "test_accuracy": 0.35833333333333334,
15
+ "train_test_gap": 0.07666666666666666,
16
+ "cv_mean": 0.36875,
17
+ "cv_std": 0.009592387027684456,
18
+ "train_time_sec": 0.816157341003418
19
+ },
20
+ "decision_making": {
21
+ "domain": "decision_making",
22
+ "dataset": "openai/gsm8k",
23
+ "description": "Grade school math - multi-step reasoning",
24
+ "n_samples": 3000,
25
+ "n_features": 110,
26
+ "n_classes": 5,
27
+ "train_accuracy": 0.8420833333333333,
28
+ "test_accuracy": 0.84,
29
+ "train_test_gap": 0.002083333333333326,
30
+ "cv_mean": 0.8404166666666667,
31
+ "cv_std": 0.001020620726159654,
32
+ "train_time_sec": 0.8619897365570068
33
+ },
34
+ "adaptive_learning": {
35
+ "domain": "adaptive_learning",
36
+ "dataset": "rajpurkar/squad",
37
+ "description": "SQuAD reading comprehension",
38
+ "n_samples": 3000,
39
+ "n_features": 110,
40
+ "n_classes": 4,
41
+ "train_accuracy": 0.6195833333333334,
42
+ "test_accuracy": 0.5983333333333334,
43
+ "train_test_gap": 0.02124999999999999,
44
+ "cv_mean": 0.6083333333333334,
45
+ "cv_std": 0.008436856971381926,
46
+ "train_time_sec": 0.33698034286499023
47
+ },
48
+ "logical_reasoning": {
49
+ "domain": "logical_reasoning",
50
+ "dataset": "lucasmccabe/logiqa",
51
+ "description": "Logical reasoning questions",
52
+ "n_samples": 1000,
53
+ "n_features": 110,
54
+ "n_classes": 5,
55
+ "train_accuracy": 1.0,
56
+ "test_accuracy": 0.19,
57
+ "train_test_gap": 0.81,
58
+ "cv_mean": 0.1775,
59
+ "cv_std": 0.020000000000000007,
60
+ "train_time_sec": 0.4216756820678711
61
+ },
62
+ "memory_formation": {
63
+ "domain": "memory_formation",
64
+ "dataset": "mandarjoshi/trivia_qa",
65
+ "description": "TriviaQA fact recall",
66
+ "n_samples": 3000,
67
+ "n_features": 110,
68
+ "n_classes": 4,
69
+ "train_accuracy": 0.7108333333333333,
70
+ "test_accuracy": 0.7133333333333334,
71
+ "train_test_gap": -0.0025000000000000577,
72
+ "cv_mean": 0.69875,
73
+ "cv_std": 0.015172617880027592,
74
+ "train_time_sec": 0.3318939208984375
75
+ },
76
+ "pattern_recognition": {
77
+ "domain": "pattern_recognition",
78
+ "dataset": "allenai/ai2_arc",
79
+ "description": "ARC science reasoning",
80
+ "n_samples": 1119,
81
+ "n_features": 110,
82
+ "n_classes": 8,
83
+ "train_accuracy": 0.5217877094972067,
84
+ "test_accuracy": 0.26785714285714285,
85
+ "train_test_gap": 0.2539305666400638,
86
+ "cv_mean": 0.23016759776536314,
87
+ "cv_std": 0.0227340669825428,
88
+ "train_time_sec": 0.19740653038024902
89
+ },
90
+ "metacognition": {
91
+ "domain": "metacognition",
92
+ "dataset": "tau/commonsense_qa",
93
+ "description": "CommonsenseQA reasoning",
94
+ "n_samples": 3000,
95
+ "n_features": 110,
96
+ "n_classes": 5,
97
+ "train_accuracy": 0.3595833333333333,
98
+ "test_accuracy": 0.22333333333333333,
99
+ "train_test_gap": 0.13624999999999998,
100
+ "cv_mean": 0.21416666666666667,
101
+ "cv_std": 0.013906932723565526,
102
+ "train_time_sec": 0.23867106437683105
103
+ },
104
+ "self_awareness": {
105
+ "domain": "self_awareness",
106
+ "dataset": "dair-ai/emotion",
107
+ "description": "Self-awareness through emotion recognition",
108
+ "n_samples": 3000,
109
+ "n_features": 110,
110
+ "n_classes": 6,
111
+ "train_accuracy": 0.435,
112
+ "test_accuracy": 0.35833333333333334,
113
+ "train_test_gap": 0.07666666666666666,
114
+ "cv_mean": 0.36875,
115
+ "cv_std": 0.009592387027684456,
116
+ "train_time_sec": 0.24677586555480957
117
+ },
118
+ "social_learning": {
119
+ "domain": "social_learning",
120
+ "dataset": "allenai/social_i_qa",
121
+ "description": "Social intelligence QA",
122
+ "n_samples": 1000,
123
+ "n_features": 110,
124
+ "n_classes": 5,
125
+ "train_accuracy": 1.0,
126
+ "test_accuracy": 0.19,
127
+ "train_test_gap": 0.81,
128
+ "cv_mean": 0.1775,
129
+ "cv_std": 0.020000000000000007,
130
+ "train_time_sec": 0.24502849578857422
131
+ },
132
+ "spatial_reasoning": {
133
+ "domain": "spatial_reasoning",
134
+ "dataset": "ybisk/piqa",
135
+ "description": "Physical intuition QA",
136
+ "n_samples": 1000,
137
+ "n_features": 110,
138
+ "n_classes": 5,
139
+ "train_accuracy": 1.0,
140
+ "test_accuracy": 0.19,
141
+ "train_test_gap": 0.81,
142
+ "cv_mean": 0.1775,
143
+ "cv_std": 0.020000000000000007,
144
+ "train_time_sec": 0.24609684944152832
145
+ },
146
+ "language_processing": {
147
+ "domain": "language_processing",
148
+ "dataset": "nyu-mll/glue",
149
+ "description": "CoLA linguistic acceptability",
150
+ "n_samples": 3000,
151
+ "n_features": 110,
152
+ "n_classes": 2,
153
+ "train_accuracy": 0.7208333333333333,
154
+ "test_accuracy": 0.71,
155
+ "train_test_gap": 0.010833333333333361,
156
+ "cv_mean": 0.7108333333333332,
157
+ "cv_std": 0.004639803635691711,
158
+ "train_time_sec": 0.23175382614135742
159
+ },
160
+ "creative_thinking": {
161
+ "domain": "creative_thinking",
162
+ "dataset": "Abirate/english_quotes",
163
+ "description": "Creative quotes and writing",
164
+ "n_samples": 2508,
165
+ "n_features": 110,
166
+ "n_classes": 3,
167
+ "train_accuracy": 0.8035892323030908,
168
+ "test_accuracy": 0.7051792828685259,
169
+ "train_test_gap": 0.09840994943456483,
170
+ "cv_mean": 0.7437662063746108,
171
+ "cv_std": 0.00565980154039184,
172
+ "train_time_sec": 0.2100505828857422
173
+ },
174
+ "abstract_thinking": {
175
+ "domain": "abstract_thinking",
176
+ "dataset": "lighteval/MATH",
177
+ "description": "Abstract mathematical reasoning",
178
+ "n_samples": 1000,
179
+ "n_features": 110,
180
+ "n_classes": 5,
181
+ "train_accuracy": 1.0,
182
+ "test_accuracy": 0.19,
183
+ "train_test_gap": 0.81,
184
+ "cv_mean": 0.1775,
185
+ "cv_std": 0.020000000000000007,
186
+ "train_time_sec": 0.19356322288513184
187
+ }
188
+ }
189
+ }