demo-deploy / checkpoints /final_results.json
ravindranv's picture
Upload folder using huggingface_hub
5dabb85 verified
{
"Equal Fusion": {
"mean_anls": 0.5136354004166895,
"mean_f1": 0.5237649141912695,
"mean_em": 0.45
},
"Proposed Fixed": {
"mean_anls": 0.5451455242797546,
"mean_f1": 0.5509710563188824,
"mean_em": 0.48
},
"Text-Only": {
"mean_anls": 0.7642224473566777,
"mean_f1": 0.7935144736858293,
"mean_em": 0.7
},
"LLMLingua-style": {
"mean_anls": 0.1892322383498854,
"mean_f1": 0.210276221599751,
"mean_em": 0.17
},
"Selective Context-style": {
"mean_anls": 0.3046072383498854,
"mean_f1": 0.3247206660441955,
"mean_em": 0.27
},
"CAFP (paper checkpoint)": {
"mean_anls": 0.7531033997376301,
"mean_f1": 0.7755144736858292,
"mean_em": 0.68
},
"CAFP-Hard Oracle": {
"mean_anls": 0.7542224473566776,
"mean_f1": 0.7721811403524959,
"mean_em": 0.69
},
"CAFP-Soft Oracle": {
"mean_anls": 0.5610757568378942,
"mean_f1": 0.5647372900851162,
"mean_em": 0.5
},
"CAFP+REINFORCE": {
"mean_anls": 0.7084701316595168,
"rl_curve": [
0.5759529617062865,
0.5927631990609283,
0.6183613555884967,
0.655312951977503,
0.6726479882862236,
0.6637648504900422,
0.6849515365259237,
0.6891216978125647,
0.7084701316595168
]
}
}