File size: 4,156 Bytes
208eb59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
{
  "model": "Qwen/Qwen3.5-2B-Base",
  "n_trials": 3,
  "epochs": 15,
  "regularization_ratio": 0.33,
  "aggregate": {
    "recall": {
      "pooled_correct": 61,
      "pooled_total": 105,
      "pooled_rate": 0.580952380952381,
      "per_trial_rates": [
        0.6571428571428571,
        0.5428571428571428,
        0.5428571428571428
      ],
      "mean": 0.5809523809523809,
      "stdev": 0.06598288790738582,
      "ci_95_lower": 0.4853552056582404,
      "ci_95_upper": 0.670835074528747
    },
    "general_knowledge": {
      "pooled_correct": 60,
      "pooled_total": 60,
      "pooled_rate": 1.0,
      "per_trial_rates": [
        1.0,
        1.0,
        1.0
      ],
      "mean": 1.0,
      "stdev": 0.0,
      "ci_95_lower": 0.9398260695220669,
      "ci_95_upper": 0.9999999999999999
    },
    "training": {
      "mean_time_s": 69.6302502155304,
      "stdev_time_s": 1.185997256195759,
      "mean_steps": 180,
      "per_trial_times": [
        68.26203393936157,
        70.36512899398804,
        70.26358771324158
      ]
    }
  },
  "trials": [
    {
      "trial_id": 1,
      "n_confirmed_unknown": 35,
      "n_training_pairs": 52,
      "training_steps": 180,
      "training_time_s": 68.26203393936157,
      "initial_loss": 1.290154,
      "final_loss": 0.451566,
      "recall_correct": 23,
      "recall_total": 35,
      "recall_rate": 0.6571428571428571,
      "general_correct": 20,
      "general_total": 20,
      "general_rate": 1.0,
      "category_scores": {
        "Awards": {
          "correct": 7,
          "total": 7
        },
        "Entertainment": {
          "correct": 1,
          "total": 4
        },
        "Weather/Natural Events": {
          "correct": 4,
          "total": 5
        },
        "Sports": {
          "correct": 5,
          "total": 6
        },
        "Deaths/Obituaries": {
          "correct": 4,
          "total": 11
        },
        "Science": {
          "correct": 1,
          "total": 1
        },
        "Technology/Business": {
          "correct": 1,
          "total": 1
        }
      }
    },
    {
      "trial_id": 2,
      "n_confirmed_unknown": 35,
      "n_training_pairs": 52,
      "training_steps": 180,
      "training_time_s": 70.36512899398804,
      "initial_loss": 2.056952,
      "final_loss": 0.260391,
      "recall_correct": 19,
      "recall_total": 35,
      "recall_rate": 0.5428571428571428,
      "general_correct": 20,
      "general_total": 20,
      "general_rate": 1.0,
      "category_scores": {
        "Deaths/Obituaries": {
          "correct": 0,
          "total": 11
        },
        "Awards": {
          "correct": 6,
          "total": 7
        },
        "Weather/Natural Events": {
          "correct": 4,
          "total": 5
        },
        "Technology/Business": {
          "correct": 1,
          "total": 1
        },
        "Entertainment": {
          "correct": 1,
          "total": 4
        },
        "Sports": {
          "correct": 6,
          "total": 6
        },
        "Science": {
          "correct": 1,
          "total": 1
        }
      }
    },
    {
      "trial_id": 3,
      "n_confirmed_unknown": 35,
      "n_training_pairs": 52,
      "training_steps": 180,
      "training_time_s": 70.26358771324158,
      "initial_loss": 1.984214,
      "final_loss": 0.381513,
      "recall_correct": 19,
      "recall_total": 35,
      "recall_rate": 0.5428571428571428,
      "general_correct": 20,
      "general_total": 20,
      "general_rate": 1.0,
      "category_scores": {
        "Deaths/Obituaries": {
          "correct": 2,
          "total": 11
        },
        "Awards": {
          "correct": 5,
          "total": 7
        },
        "Technology/Business": {
          "correct": 0,
          "total": 1
        },
        "Weather/Natural Events": {
          "correct": 4,
          "total": 5
        },
        "Entertainment": {
          "correct": 2,
          "total": 4
        },
        "Sports": {
          "correct": 5,
          "total": 6
        },
        "Science": {
          "correct": 1,
          "total": 1
        }
      }
    }
  ]
}