IrishCore-DiffMask-135M-v1-rc3 / eval /benchmark_summary.json
temsa's picture
Add files using upload-large-folder tool
85b0a00 verified
{
"release": "IrishCore-DiffMask-135M-v1-rc3",
"repo_id": "temsa/IrishCore-DiffMask-135M-v1-rc3",
"architecture": {
"family": "DistilBERT-size token-span extractor",
"diffusion_style_training": true,
"runtime_diffusion": false,
"scanner_free": true,
"validator_free": true,
"heads": [
"token_presence_head",
"typed_start_boundary_head",
"typed_end_boundary_head"
]
},
"base_model": "OpenMed/OpenMed-PII-mLiteClinical-Base-135M-v1",
"notes": [
"DiffMask uses a masked denoising training schedule, not a generative diffusion runtime.",
"ONNX q8 is the recommended CPU deployment artifact.",
"The release inference scripts emit [PII:LABEL] placeholders.",
"rc3 keeps the stronger focusv3 checkpoint and publishes a narrower decoder profile tuned for email continuation and q8 passport recovery.",
"A UAT replay exact suite is part of the rc3 selection gate."
],
"full": {
"core_f1": 0.9664429530201343,
"edge_f1": 1.0,
"multilingual_f1": 0.95906432748538,
"hardening_f1": 1.0
},
"onnx_q8": {
"core_f1": 0.9664429530201343,
"edge_f1": 1.0,
"finance_f1": 1.0,
"finance_boundary_f1": 1.0,
"user_ppsn_f1": 0.8571428571428571,
"gaelic_weak_ppsn_f1": 1.0,
"multilingual_f1": 0.95906432748538,
"hardening_f1": 1.0,
"uat_replay_exact_f1": 0.9032258064516129,
"core_examples_per_second": 29.967569646866203,
"multilingual_examples_per_second": 54.221898586885715,
"runtime_profile_examples_per_second": 46.15186090893423,
"uat_replay_examples_per_second": 31.715926977807822
},
"comparison": {
"public_rc5_onnx_q8": {
"core": 0.9668874172185431,
"edge": 0.9743589743589743,
"remaining_gaps": 0.888888888888889,
"finance": 0.9361702127659575,
"finance_boundary": 0.8750000000000001,
"multilingual_ppsn": 0.9333333333333333,
"user_ppsn": 1.0,
"gaelic_weak_ppsn": 1.0,
"overlap_ppsn": 1.0
},
"public_rc8_onnx_q8": {
"min_score": 0.5,
"core": 0.9736842105263158,
"edge": 1.0,
"finance": 1.0,
"finance_boundary": 1.0,
"user_ppsn": 1.0,
"gaelic_weak_ppsn": 1.0,
"multilingual_ppsn": 0.9176470588235294,
"hardening": 0.7058823529411765,
"core_examples_per_second": 46.14201741375802,
"multilingual_examples_per_second": 99.71655616732895
}
},
"uat_replay_exact_suite": {
"dataset": "diffmask_gap_uat_exact_v1",
"rc1_q8": {
"f1": 0.45454545454545453,
"precision": 1.0,
"recall": 0.29411764705882354,
"examples_per_second": 238.65238838006147
},
"rc2_q8": {
"f1": 0.8275862068965517,
"precision": 1.0,
"recall": 0.7058823529411765,
"examples_per_second": 183.66747754925285
},
"rc8_q8": {
"f1": 0.3636363636363636,
"precision": 0.375,
"recall": 0.35294117647058826,
"examples_per_second": 110.75945444067594
},
"rc3_q8": {
"f1": 0.9032258064516129,
"precision": 1.0,
"recall": 0.8235294117647058,
"examples_per_second": 31.715926977807822
}
},
"benchmark_caveats": [
"user_raw_regression_cases_v1 is a legacy PPSN-only regression set; rc3 now masks 0871234567 as PHONE_NUMBER, which counts as a false positive in that older suite.",
"CPU throughput numbers were measured on this host and should be compared only within the same harness."
],
"known_remaining_misses": [
"Second phone number inside the long Client Identity Services sentence: 071 967 2616",
"Postcode inside the longer allocation-centre block: R93 EC57",
"Email mailbox form: EPStamp4@enterprise.gov.ie"
]
}