IrishCore-GlobalPointer-135M-v1-rc2 / eval /benchmark_summary.json
temsa's picture
Publish IrishCore-GlobalPointer-135M-v1-rc2
6edc303 verified
{
"release": "IrishCore-GlobalPointer-135M-v1-rc2",
"repo_id": "temsa/IrishCore-GlobalPointer-135M-v1-rc2",
"architecture": {
"family": "DistilBERT-size GlobalPointer span extractor",
"raw_only": true,
"scanner_free": true,
"validator_free": true,
"head": "typed span matrix",
"uses_rope": true,
"runtime_diffusion": false
},
"base_model": "OpenMed/OpenMed-PII-mLiteClinical-Base-135M-v1",
"notes": [
"This release uses a GlobalPointer-style typed span matrix over the OpenMed encoder.",
"ONNX q8 is the recommended CPU deployment artifact.",
"The release inference scripts emit [PII:LABEL] placeholders.",
"The multilingual PPSN benchmark's overall score is penalized by extra name detections; the PPSN label-only F1 is reported separately."
],
"full": {
"core_f1": 0.9933774834437086,
"uat_exact_f1": 1.0,
"fresh_holdout_f1": 0.9841269841269841,
"hardening_f1": 1.0,
"edge_f1": 1.0,
"multilingual_f1": 0.9325842696629214,
"multilingual_ppsn_label_f1": 0.9940119760479043,
"runtime_profile_f1": 0.49464194590916827,
"core_examples_per_second": 41.03159388686429,
"uat_examples_per_second": 26.245745060126804,
"fresh_holdout_examples_per_second": 31.151268103583075,
"hardening_examples_per_second": 24.794782789454274,
"edge_examples_per_second": 33.353197990887175,
"multilingual_examples_per_second": 58.64551489471138,
"runtime_profile_examples_per_second": 68.25930179596385,
"chatbot_redteam_f1": 1.0,
"chatbot_redteam_exact_f1": 1.0,
"chatbot_redteam_failures": 0,
"chatbot_gap_holdout_f1": 1.0,
"chatbot_gap_holdout_exact_f1": 1.0,
"chatbot_gap_holdout_failures": 0
},
"onnx_q8": {
"core_f1": 0.9933774834437086,
"uat_exact_f1": 1.0,
"fresh_holdout_f1": 0.9841269841269841,
"hardening_f1": 1.0,
"edge_f1": 1.0,
"multilingual_f1": 0.9325842696629214,
"multilingual_ppsn_label_f1": 0.9940119760479043,
"runtime_profile_f1": 0.49456152277362336,
"core_examples_per_second": 231.22220142890342,
"uat_examples_per_second": 118.60420792880863,
"fresh_holdout_examples_per_second": 217.11713280220687,
"hardening_examples_per_second": 230.30107818589826,
"edge_examples_per_second": 213.90854550873703,
"multilingual_examples_per_second": 170.37957136472622,
"runtime_profile_examples_per_second": 133.46095291725894,
"chatbot_redteam_f1": 1.0,
"chatbot_redteam_exact_f1": 1.0,
"chatbot_redteam_failures": 0,
"chatbot_gap_holdout_f1": 1.0,
"chatbot_gap_holdout_exact_f1": 1.0,
"chatbot_gap_holdout_failures": 0
},
"comparison": {
"public_diffmask_rc6_onnx_q8": {
"core_f1": 0.9733333333333333,
"hardening_f1": 0.975609756097561,
"multilingual_f1": 0.9273743016759777,
"edge_f1": 0.9500000000000001,
"core_examples_per_second": 130.34154354965295
},
"public_openmed_rc8_onnx_q8": {
"core_f1": 0.9736842105263158,
"hardening_f1": null,
"multilingual_f1": 0.9176470588235294,
"edge_f1": 1.0,
"core_examples_per_second": 46.14201741375802
}
}
}