davanstrien's picture
download
raw
21.8 kB
{
"summary": {
"model": "davanstrien/qwen35-4b-iconclass-grpo-v3",
"dataset": "davanstrien/iconclass-vlm-sft",
"split": "test",
"num_evaluated": 40,
"num_skipped": 0,
"elapsed_seconds": 275.0,
"exact_match_accuracy": 0.05,
"hierarchical_f1": 0.5763,
"code_precision": 0.2812,
"code_recall": 0.2858,
"code_f1": 0.2835,
"valid_json_pct": 1.0,
"valid_code_pct": 1.0
},
"results": [
{
"index": 0,
"gt_codes": [
"92B361",
"12B13",
"12EE13",
"25F33(EAGLE)",
"31E3"
],
"predicted_codes": [
"73",
"73D721",
"73D7211",
"73D7212"
],
"generated_text": "{\"iconclass-codes\": [\"73\", \"73D721\", \"73D7211\", \"73D7212\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 0.0,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 1,
"gt_codes": [
"48C27(+321)",
"92F2"
],
"predicted_codes": [
"48C24"
],
"generated_text": "{\"iconclass-codes\": [\"48C24\"]}",
"valid_json": true,
"valid_code_count": 1,
"total_predicted": 1,
"exact_match": false,
"hierarchical_f1": 0.5333333333333333,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 2,
"gt_codes": [
"86(MONSTRATUR IN UNDIS)",
"22C5:24A6",
"46C24",
"25H214",
"25I1"
],
"predicted_codes": [
"25H23",
"25I12",
"25I1",
"25H11",
"86(AMOR OMNIA VINCIT)"
],
"generated_text": "{\"iconclass-codes\": [\"25H23\", \"25I12\", \"25I1\", \"25H11\", \"86(AMOR OMNIA VINCIT)\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.6855397727272728,
"code_precision": 0.2,
"code_recall": 0.2
},
{
"index": 3,
"gt_codes": [
"11I613",
"11I1"
],
"predicted_codes": [
"44B113",
"61B2(ROMAN EMPIRE)"
],
"generated_text": "{\"iconclass-codes\": [\"44B113\", \"61B2(ROMAN EMPIRE)\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.0,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 4,
"gt_codes": [
"25H11",
"25H213",
"25I",
"46C112"
],
"predicted_codes": [
"25H11",
"25H213",
"46C112",
"46C131",
"46C13141"
],
"generated_text": "{\"iconclass-codes\": [\"25H11\", \"25H213\", \"46C112\", \"46C131\", \"46C13141\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.8772225384177574,
"code_precision": 0.6,
"code_recall": 0.75
},
{
"index": 5,
"gt_codes": [
"25G31",
"31D15",
"25H11",
"46C13142",
"25H"
],
"predicted_codes": [
"25H11",
"25H213",
"25I",
"25I2",
"46C131"
],
"generated_text": "{\"iconclass-codes\": [\"25H11\", \"25H213\", \"25I\", \"25I2\", \"46C131\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.6101424489795918,
"code_precision": 0.2,
"code_recall": 0.2
},
{
"index": 6,
"gt_codes": [
"25G41(CARNATION)",
"31A2421",
"31D15:61BB",
"41D221(SLUIERKAP)",
"61B(+53)"
],
"predicted_codes": [
"31A2421",
"31D15:61BB",
"61B(+53)",
"61B:31A2212(+1)"
],
"generated_text": "{\"iconclass-codes\": [\"31A2421\", \"31D15:61BB\", \"61B(+53)\", \"61B:31A2212(+1)\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 0.7175879396984924,
"code_precision": 0.75,
"code_recall": 0.6
},
{
"index": 7,
"gt_codes": [
"61B(+51)",
"41D221(TURBAN)",
"73",
"32B321",
"61B:31D14"
],
"predicted_codes": [
"31A2211",
"31A2421",
"61B(+52)",
"61B:31A2212(+1)",
"61B:31D14"
],
"generated_text": "{\"iconclass-codes\": [\"31A2211\", \"31A2421\", \"61B(+52)\", \"61B:31A2212(+1)\", \"61B:31D14\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.5254399269072636,
"code_precision": 0.2,
"code_recall": 0.2
},
{
"index": 8,
"gt_codes": [
"49L171",
"49M413",
"49M3",
"49L12(V)",
"49L12(E)"
],
"predicted_codes": [
"49L171",
"49M413",
"49M3",
"49L12(V)"
],
"generated_text": "{\"iconclass-codes\": [\"49L171\", \"49M413\", \"49M3\", \"49L12(V)\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 1.0,
"code_precision": 1.0,
"code_recall": 0.8
},
{
"index": 9,
"gt_codes": [
"31A231",
"31A2421",
"61B(+53)"
],
"predicted_codes": [
"31A231",
"31A2421",
"61B(+54)",
"61B:31A2212(+1)"
],
"generated_text": "{\"iconclass-codes\": [\"31A231\", \"31A2421\", \"61B(+54)\", \"61B:31A2212(+1)\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 0.9431968295904887,
"code_precision": 0.5,
"code_recall": 0.6666666666666666
},
{
"index": 10,
"gt_codes": [
"41C5",
"44B113",
"44B1511"
],
"predicted_codes": [
"11P3113",
"44B113",
"44B192",
"44B193",
"44B194"
],
"generated_text": "{\"iconclass-codes\": [\"11P3113\", \"44B113\", \"44B192\", \"44B193\", \"44B194\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.6646616541353384,
"code_precision": 0.2,
"code_recall": 0.3333333333333333
},
{
"index": 11,
"gt_codes": [
"25F711"
],
"predicted_codes": [
"25F711"
],
"generated_text": "{\"iconclass-codes\": [\"25F711\"]}",
"valid_json": true,
"valid_code_count": 1,
"total_predicted": 1,
"exact_match": true,
"hierarchical_f1": 1.0,
"code_precision": 1.0,
"code_recall": 1.0
},
{
"index": 12,
"gt_codes": [
"11Q712",
"25I22",
"34B11"
],
"predicted_codes": [
"11Q712",
"25I241"
],
"generated_text": "{\"iconclass-codes\": [\"11Q712\", \"25I241\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.704,
"code_precision": 0.5,
"code_recall": 0.3333333333333333
},
{
"index": 13,
"gt_codes": [
"54A44(+4)",
"54D3(+4)",
"51MM11(+4)",
"46C24111",
"25F44(+5246)"
],
"predicted_codes": [
"25F44(+51)",
"25F44(TURTLE)(+512)",
"86(QUOD NEMO VITAT)",
"56F2(+4)",
"86(QUOD NEMO VITAT, NEMO VITAT)"
],
"generated_text": "{\"iconclass-codes\": [\"25F44(+51)\", \"25F44(TURTLE)(+512)\", \"86(QUOD NEMO VITAT)\", \"56F2(+4)\", \"86(QUOD NEMO VITAT, NEMO VITAT)\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.3678306878306879,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 14,
"gt_codes": [
"61B:31A2212(+1)",
"61B:31D14",
"31A2518",
"41D223(JERKIN)",
"31A231"
],
"predicted_codes": [
"31A231",
"31A2422",
"61B(+53)",
"61B:31A2212(+1)",
"61B:31D14"
],
"generated_text": "{\"iconclass-codes\": [\"31A231\", \"31A2422\", \"61B(+53)\", \"61B:31A2212(+1)\", \"61B:31D14\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.8130232558139536,
"code_precision": 0.6,
"code_recall": 0.6
},
{
"index": 15,
"gt_codes": [
"94C133"
],
"predicted_codes": [
"98B(HERACLIDES)68"
],
"generated_text": "{\"iconclass-codes\": [\"98B(HERACLIDES)68\"]}",
"valid_json": true,
"valid_code_count": 1,
"total_predicted": 1,
"exact_match": false,
"hierarchical_f1": 0.19,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 16,
"gt_codes": [
"31A2422",
"61B(+52)",
"61B:31D14"
],
"predicted_codes": [
"11H(JOHN)"
],
"generated_text": "{\"iconclass-codes\": [\"11H(JOHN)\"]}",
"valid_json": true,
"valid_code_count": 1,
"total_predicted": 1,
"exact_match": false,
"hierarchical_f1": 0.0,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 17,
"gt_codes": [
"31A2422",
"45B",
"61B(+52)",
"61B:31D14"
],
"predicted_codes": [
"31A2422",
"45B",
"61B(+52)",
"61B:31A2212(+1)",
"61B:31D14"
],
"generated_text": "{\"iconclass-codes\": [\"31A2422\", \"45B\", \"61B(+52)\", \"61B:31A2212(+1)\", \"61B:31D14\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.990154001514769,
"code_precision": 0.8,
"code_recall": 1.0
},
{
"index": 18,
"gt_codes": [
"46C13141(+763)"
],
"predicted_codes": [
"46C1314"
],
"generated_text": "{\"iconclass-codes\": [\"46C1314\"]}",
"valid_json": true,
"valid_code_count": 1,
"total_predicted": 1,
"exact_match": false,
"hierarchical_f1": 0.9500000000000001,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 19,
"gt_codes": [
"31D15",
"49M32"
],
"predicted_codes": [
"31A2421",
"31D15:61BB",
"61B(+53)",
"61B:31A2212(+1)"
],
"generated_text": "{\"iconclass-codes\": [\"31A2421\", \"31D15:61BB\", \"61B(+53)\", \"61B:31A2212(+1)\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 0.3585483870967741,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 20,
"gt_codes": [
"48C1612",
"48C2",
"11Q712",
"25H155",
"25I13"
],
"predicted_codes": [
"25G3",
"25G41(ROSE)",
"25H",
"25I",
"41A646"
],
"generated_text": "{\"iconclass-codes\": [\"25G3\", \"25G41(ROSE)\", \"25H\", \"25I\", \"41A646\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.3534104683195592,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 21,
"gt_codes": [
"31D15",
"41A1",
"31D112",
"25H15",
"25G3"
],
"predicted_codes": [
"25G3",
"25H15",
"25I",
"46C11"
],
"generated_text": "{\"iconclass-codes\": [\"25G3\", \"25H15\", \"25I\", \"46C11\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 0.5208977325312355,
"code_precision": 0.5,
"code_recall": 0.4
},
{
"index": 22,
"gt_codes": [
"41D221"
],
"predicted_codes": [
"44B112",
"61B2(OSMAN II, Sultan of the Ottoman Empire)1(+52)"
],
"generated_text": "{\"iconclass-codes\": [\"44B112\", \"61B2(OSMAN II, Sultan of the Ottoman Empire)1(+52)\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.13333333333333333,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 23,
"gt_codes": [
"73A6"
],
"predicted_codes": [
"73B641",
"73B6411",
"25H11",
"41A3211",
"41A33"
],
"generated_text": "{\"iconclass-codes\": [\"73B641\", \"73B6411\", \"25H11\", \"41A3211\", \"41A33\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.21714285714285714,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 24,
"gt_codes": [
"25F24(STAG)",
"25B3",
"48A9876",
"25F43",
"12Q21"
],
"predicted_codes": [
"25F23(LION)",
"48A9876",
"48A9875",
"48A9878",
"48C161"
],
"generated_text": "{\"iconclass-codes\": [\"25F23(LION)\", \"48A9876\", \"48A9875\", \"48A9878\", \"48C161\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.6613644314868805,
"code_precision": 0.2,
"code_recall": 0.2
},
{
"index": 25,
"gt_codes": [
"73C8212",
"73C8214",
"73C8215"
],
"predicted_codes": [
"73G326",
"73G3261"
],
"generated_text": "{\"iconclass-codes\": [\"73G326\", \"73G3261\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.39493670886075954,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 26,
"gt_codes": [
"73A11"
],
"predicted_codes": [
"73C721",
"73C7211"
],
"generated_text": "{\"iconclass-codes\": [\"73C721\", \"73C7211\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.38,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 27,
"gt_codes": [
"49L171",
"49M413",
"49M3",
"49L12(S)"
],
"predicted_codes": [
"49L171",
"49M413",
"49M3",
"49L12(S)"
],
"generated_text": "{\"iconclass-codes\": [\"49L171\", \"49M413\", \"49M3\", \"49L12(S)\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": true,
"hierarchical_f1": 1.0,
"code_precision": 1.0,
"code_recall": 1.0
},
{
"index": 28,
"gt_codes": [
"41C741",
"43B31"
],
"predicted_codes": [
"41A33",
"41C12",
"41C741",
"41C741(+1)",
"41C741(+1)"
],
"generated_text": "{\"iconclass-codes\": [\"41A33\", \"41C12\", \"41C741\", \"41C741(+1)\", \"41C741(+1)\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.6820143884892086,
"code_precision": 0.25,
"code_recall": 0.5
},
{
"index": 29,
"gt_codes": [
"46C21",
"46C223"
],
"predicted_codes": [
"25I",
"46C24"
],
"generated_text": "{\"iconclass-codes\": [\"25I\", \"46C24\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.5288135593220339,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 30,
"gt_codes": [
"49E391"
],
"predicted_codes": [
"41C326",
"41C741",
"41C741(+1)"
],
"generated_text": "{\"iconclass-codes\": [\"41C326\", \"41C741\", \"41C741(+1)\"]}",
"valid_json": true,
"valid_code_count": 3,
"total_predicted": 3,
"exact_match": false,
"hierarchical_f1": 0.20000000000000004,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 31,
"gt_codes": [
"31A231",
"31D15",
"41D2321",
"61B(+55)"
],
"predicted_codes": [
"31A231",
"31D15",
"31D15:61BB",
"61B(+55)"
],
"generated_text": "{\"iconclass-codes\": [\"31A231\", \"31D15\", \"31D15:61BB\", \"61B(+55)\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 0.8435448577680524,
"code_precision": 0.75,
"code_recall": 0.75
},
{
"index": 32,
"gt_codes": [
"73D58",
"73D641"
],
"predicted_codes": [
"73D641",
"73D643"
],
"generated_text": "{\"iconclass-codes\": [\"73D641\", \"73D643\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.8596541786743516,
"code_precision": 0.5,
"code_recall": 0.5
},
{
"index": 33,
"gt_codes": [
"33B3",
"41A221",
"41A722",
"56DD1"
],
"predicted_codes": [
"31D112",
"31D15",
"41A2",
"41A721",
"41A761"
],
"generated_text": "{\"iconclass-codes\": [\"31D112\", \"31D15\", \"41A2\", \"41A721\", \"41A761\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.5098495875788452,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 34,
"gt_codes": [
"25A3",
"45C41",
"45K21"
],
"predicted_codes": [
"25A3",
"45K"
],
"generated_text": "{\"iconclass-codes\": [\"25A3\", \"45K\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.711149825783972,
"code_precision": 0.5,
"code_recall": 0.3333333333333333
},
{
"index": 35,
"gt_codes": [
"73C8172",
"73C8171"
],
"predicted_codes": [
"73C721",
"73C7212"
],
"generated_text": "{\"iconclass-codes\": [\"73C721\", \"73C7212\"]}",
"valid_json": true,
"valid_code_count": 2,
"total_predicted": 2,
"exact_match": false,
"hierarchical_f1": 0.5924050632911392,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 36,
"gt_codes": [
"25F23(PANTHER)",
"31A2323",
"31A2518",
"42A1931",
"46C161"
],
"predicted_codes": [
"31A221",
"46C14",
"46A17",
"31D15",
"46C1463"
],
"generated_text": "{\"iconclass-codes\": [\"31A221\", \"46C14\", \"46A17\", \"31D15\", \"46C1463\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.5463188405797101,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 37,
"gt_codes": [
"12B61",
"25I",
"41A12"
],
"predicted_codes": [
"25I",
"25I1",
"25I146",
"41A12"
],
"generated_text": "{\"iconclass-codes\": [\"25I\", \"25I1\", \"25I146\", \"41A12\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 0.7210103329506313,
"code_precision": 0.5,
"code_recall": 0.6666666666666666
},
{
"index": 38,
"gt_codes": [
"23I41",
"41C32",
"41C12",
"31D14",
"47I4221"
],
"predicted_codes": [
"23U22",
"23O22",
"41C7111",
"23U23",
"47I4223"
],
"generated_text": "{\"iconclass-codes\": [\"23U22\", \"23O22\", \"41C7111\", \"23U23\", \"47I4223\"]}",
"valid_json": true,
"valid_code_count": 5,
"total_predicted": 5,
"exact_match": false,
"hierarchical_f1": 0.5199846449136276,
"code_precision": 0.0,
"code_recall": 0.0
},
{
"index": 39,
"gt_codes": [
"31D13",
"61B:31A2212(+1)",
"31A2421",
"41D222(RUFF)",
"31AA533(EMPIREKRULLEN)"
],
"predicted_codes": [
"31A2421",
"31D15:61BB",
"61B(+52)",
"61B:31A2212(+1)"
],
"generated_text": "{\"iconclass-codes\": [\"31A2421\", \"31D15:61BB\", \"61B(+52)\", \"61B:31A2212(+1)\"]}",
"valid_json": true,
"valid_code_count": 4,
"total_predicted": 4,
"exact_match": false,
"hierarchical_f1": 0.7436889332003987,
"code_precision": 0.5,
"code_recall": 0.4
}
]
}

Xet Storage Details

Size:
21.8 kB
·
Xet hash:
0fd4b11bf703b922cc9fcf80610446ec66577f9bc06133e46184039a7d391ae5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.