scrubdata / eval /results /generalization_baseline.json
OpenAI Codex
deploy: add sponsor:openai tag (Best Use of Codex) + Codex-hardened build
16dc556
Raw
History Blame Contribute Delete
2.2 kB
[
{
"system": "grounded heuristic",
"sources": [
"flights",
"rayyan",
"ed2_restaurants"
],
"gen_f1": 0.014530914764969767,
"variant_recall": 0.04734667937718462,
"variant_precision": 0.45019607843137255,
"other_recall": 0.0,
"damage": 0.10044392423085945,
"per_source": [
{
"source": "flights",
"f1": 0.0435927442949093,
"precision": 0.07776617954070981,
"recall": 0.030284552845528456,
"damage": 0.08196721311475409,
"variant_errors": 1049,
"variant_fixed": 149,
"variant_changed": 425,
"variant_good": 149,
"other_errors": 3871,
"other_fixed": 0
},
{
"source": "rayyan",
"f1": 0.0,
"precision": 0.0,
"recall": 0.0,
"damage": 0.1445483485873458,
"variant_errors": 171,
"variant_fixed": 0,
"variant_changed": 67,
"variant_good": 0,
"other_errors": 777,
"other_fixed": 0
},
{
"source": "ed2_restaurants",
"f1": 0.0,
"precision": 0.0,
"recall": 0.0,
"damage": 0.07481621099047844,
"variant_errors": 115,
"variant_fixed": 0,
"variant_changed": 0,
"variant_good": 0,
"other_errors": 353,
"other_fixed": 0
}
]
},
{
"system": "no-op",
"sources": [
"flights",
"rayyan",
"ed2_restaurants"
],
"gen_f1": 0.0,
"variant_recall": 0.0,
"variant_precision": 1.0,
"other_recall": 0.0,
"damage": 0.0,
"per_source": [
{
"source": "flights",
"f1": 0.0,
"precision": 1.0,
"recall": 0.0,
"damage": 0.0,
"variant_errors": 1049,
"variant_fixed": 0,
"variant_changed": 0,
"variant_good": 0,
"other_errors": 3871,
"other_fixed": 0
},
{
"source": "rayyan",
"f1": 0.0,
"precision": 1.0,
"recall": 0.0,
"damage": 0.0,
"variant_errors": 171,
"variant_fixed": 0,
"variant_changed": 0,
"variant_good": 0,
"other_errors": 777,
"other_fixed": 0
},
{
"source": "ed2_restaurants",
"f1": 0.0,
"precision": 1.0,
"recall": 0.0,
"damage": 0.0,
"variant_errors": 115,
"variant_fixed": 0,
"variant_changed": 0,
"variant_good": 0,
"other_errors": 353,
"other_fixed": 0
}
]
}
]