temsa's picture
Publish v2-rc4 release with bundled ONNX q8 artifact and cleaned benchmarks
2a38b32 verified
{
"base_model": "OpenMed/OpenMed-PII-mLiteClinical-Base-135M-v1",
"current_public_reference": "temsa/OpenMed-mLiteClinical-IrishCorePII-135M-v2-rc3",
"stable_public_reference": "temsa/OpenMed-mLiteClinical-IrishCorePII-135M-v1",
"release_purpose": "Fourth v2 release candidate for the IrishCorePII line. This round strengthens the public rc3 candidate by repairing PPSN false positives in mixed numeric contexts while preserving the overlap improvements and the broader Irish core PII coverage.",
"recommended_thresholds": {
"full_checkpoint": {
"ppsn_min_score": 0.71,
"other_min_score": 0.5
},
"onnx_q8": {
"ppsn_min_score": 0.71,
"other_min_score": 0.6
}
},
"training_mix_summary": [
{
"component": "previous Irish core release mix replay",
"weight": 4.0
},
{
"component": "synthetic PPSN false-positive repair for short invalid compact forms and routing-number confusion",
"weight": 2.5
},
{
"component": "exact PPSN overlap replay from mixed PII feedback cases",
"weight": 1.5
},
{
"component": "postcode replay to preserve Eircode span quality",
"weight": 1.0
},
{
"component": "multilingual PPSN replay",
"weight": 1.0
}
],
"upstream_attribution": [
{
"name": "joelniklaus/mapa",
"license": "CC-BY-4.0"
},
{
"name": "gretelai/synthetic_pii_finance_multilingual",
"license": "Apache-2.0"
}
],
"quantization_notes": {
"promoted_q8_recipe": "ONNX Runtime dynamic int8 with per-channel quantization over MatMul,Gemm,Attention",
"not_promoted": [
"QAT in this DistilBERT token-classification stack",
"Mezzanine Qwen-focused weight transforms"
]
},
"known_limitations": [
"Passport values written as prefix plus space plus digits remain the main strict-boundary failure mode.",
"Compact 10-digit Irish mobile numbers in very short English contexts should still be QA tested carefully.",
"The ONNX q8 artifact remains weaker than the full checkpoint on the small edge suite."
]
}