IrishCore-DiffMask-135M-v1-rc4 / training_sources.json
temsa's picture
Release IrishCore-DiffMask-135M-v1-rc4
9416dba verified
{
"release": "IrishCore-DiffMask-135M-v1-rc4",
"base_model": "OpenMed/OpenMed-PII-mLiteClinical-Base-135M-v1",
"public_references": {
"rc5": "temsa/OpenMed-mLiteClinical-IrishCorePII-135M-v2-rc5",
"rc8": "temsa/OpenMed-mLiteClinical-IrishCorePII-135M-v2-rc8"
},
"task": "Irish core PII detection and masking in English and Irish Gaelic",
"coverage": [
"PPSN",
"ACCOUNT_NUMBER",
"BANK_ROUTING_NUMBER",
"CREDIT_DEBIT_CARD",
"PASSPORT_NUMBER",
"POSTCODE",
"PHONE_NUMBER",
"EMAIL",
"FIRST_NAME",
"LAST_NAME",
"SWIFT_BIC"
],
"architecture": {
"family": "DistilBERT-size token-span extractor",
"diffusion_style_training": true,
"runtime_diffusion": false,
"scanner_free": true,
"validator_free": true,
"heads": [
"token_presence_head",
"typed_start_boundary_head",
"typed_end_boundary_head"
]
},
"training_data": {
"published": [
"temsa/OpenMed-Irish-CorePII-TrainMix-v1",
"temsa/OpenMed-Irish-PPSN-Eircode-Spec-v1",
"joelniklaus/mapa",
"gretelai/synthetic_pii_finance_multilingual"
],
"local_synthetic_hardening_sets": [
"irish_dllm_hardening_v1",
"dllm_gap_patch_v1",
"dllm_gap_patch_v2",
"dllm_gap_patch_v3",
"dllm_gap_patch_v4",
"dllm_rc3_feedback_patch_v6",
"irish_core_diffmask_v4_mix",
"irish_core_diffmask_v5_mix",
"irish_core_diffmask_focus_v6"
],
"selection_note": "The published checkpoint was selected from multiple ROCm continuation runs to balance Irish core, multilingual PPSN, hardening, and the post-rc3 QA feedback suites."
},
"training_recipe": {
"noise_schedule_family": "linear masked denoising schedule",
"runtime_diffusion": false,
"train_time_diffusion_steps": 4,
"start_noise_fraction": 0.65,
"end_noise_fraction": 0.05,
"loss": "average BCE losses over token presence and typed boundaries across noised passes"
},
"references": [
{
"title": "BERT",
"url": "https://arxiv.org/abs/1810.04805"
},
{
"title": "DistilBERT",
"url": "https://arxiv.org/abs/1910.01108"
},
{
"title": "Boundary Smoothing for Named Entity Recognition",
"url": "https://aclanthology.org/2022.acl-long.490/"
},
{
"title": "SPANNER: Named Entity Re-/Recognition as Span Prediction",
"url": "https://aclanthology.org/2021.acl-long.558/"
},
{
"title": "LLaDA 2.0: Scaling Up Diffusion Language Models to 100B",
"url": "https://arxiv.org/abs/2512.15745"
},
{
"title": "Scaling Diffusion Language Models via Adaptation from Autoregressive Models",
"url": "https://arxiv.org/abs/2410.17891"
}
]
}