scrubdata / eval /results /generalization_roadmap.json
OpenAI Codex
deploy: add sponsor:openai tag (Best Use of Codex) + Codex-hardened build
16dc556
Raw
History Blame Contribute Delete
1.26 kB
[
{
"system": "roadmap+hardening v2",
"sources": [
"flights",
"rayyan",
"ed2_restaurants"
],
"gen_f1": 0.05810120393130177,
"variant_recall": 0.10838756027134193,
"variant_precision": 0.3999487967229903,
"other_recall": 0.027985877895461985,
"damage": 0.03617515897817843,
"per_source": [
{
"source": "flights",
"f1": 0.16395283548568218,
"precision": 0.26497277676951,
"recall": 0.11869918699186992,
"damage": 0.08393101092896176,
"variant_errors": 1049,
"variant_fixed": 259,
"variant_changed": 465,
"variant_good": 259,
"other_errors": 3871,
"other_fixed": 325
},
{
"source": "rayyan",
"f1": 0.0,
"precision": 0.0,
"recall": 0.0,
"damage": 0.021687226422602467,
"variant_errors": 171,
"variant_fixed": 0,
"variant_changed": 4,
"variant_good": 0,
"other_errors": 777,
"other_fixed": 0
},
{
"source": "ed2_restaurants",
"f1": 0.010350776308223116,
"precision": 0.007081038552321007,
"recall": 0.019230769230769232,
"damage": 0.0029072395829710875,
"variant_errors": 115,
"variant_fixed": 9,
"variant_changed": 14,
"variant_good": 9,
"other_errors": 353,
"other_fixed": 0
}
]
}
]