| { |
| "release": "IrishCore-DiffMask-135M-v1-rc4", |
| "base_model": "OpenMed/OpenMed-PII-mLiteClinical-Base-135M-v1", |
| "public_references": { |
| "rc5": "temsa/OpenMed-mLiteClinical-IrishCorePII-135M-v2-rc5", |
| "rc8": "temsa/OpenMed-mLiteClinical-IrishCorePII-135M-v2-rc8" |
| }, |
| "task": "Irish core PII detection and masking in English and Irish Gaelic", |
| "coverage": [ |
| "PPSN", |
| "ACCOUNT_NUMBER", |
| "BANK_ROUTING_NUMBER", |
| "CREDIT_DEBIT_CARD", |
| "PASSPORT_NUMBER", |
| "POSTCODE", |
| "PHONE_NUMBER", |
| "EMAIL", |
| "FIRST_NAME", |
| "LAST_NAME", |
| "SWIFT_BIC" |
| ], |
| "architecture": { |
| "family": "DistilBERT-size token-span extractor", |
| "diffusion_style_training": true, |
| "runtime_diffusion": false, |
| "scanner_free": true, |
| "validator_free": true, |
| "heads": [ |
| "token_presence_head", |
| "typed_start_boundary_head", |
| "typed_end_boundary_head" |
| ] |
| }, |
| "training_data": { |
| "published": [ |
| "temsa/OpenMed-Irish-CorePII-TrainMix-v1", |
| "temsa/OpenMed-Irish-PPSN-Eircode-Spec-v1", |
| "joelniklaus/mapa", |
| "gretelai/synthetic_pii_finance_multilingual" |
| ], |
| "local_synthetic_hardening_sets": [ |
| "irish_dllm_hardening_v1", |
| "dllm_gap_patch_v1", |
| "dllm_gap_patch_v2", |
| "dllm_gap_patch_v3", |
| "dllm_gap_patch_v4", |
| "dllm_rc3_feedback_patch_v6", |
| "irish_core_diffmask_v4_mix", |
| "irish_core_diffmask_v5_mix", |
| "irish_core_diffmask_focus_v6" |
| ], |
| "selection_note": "The published checkpoint was selected from multiple ROCm continuation runs to balance Irish core, multilingual PPSN, hardening, and the post-rc3 QA feedback suites." |
| }, |
| "training_recipe": { |
| "noise_schedule_family": "linear masked denoising schedule", |
| "runtime_diffusion": false, |
| "train_time_diffusion_steps": 4, |
| "start_noise_fraction": 0.65, |
| "end_noise_fraction": 0.05, |
| "loss": "average BCE losses over token presence and typed boundaries across noised passes" |
| }, |
| "references": [ |
| { |
| "title": "BERT", |
| "url": "https://arxiv.org/abs/1810.04805" |
| }, |
| { |
| "title": "DistilBERT", |
| "url": "https://arxiv.org/abs/1910.01108" |
| }, |
| { |
| "title": "Boundary Smoothing for Named Entity Recognition", |
| "url": "https://aclanthology.org/2022.acl-long.490/" |
| }, |
| { |
| "title": "SPANNER: Named Entity Re-/Recognition as Span Prediction", |
| "url": "https://aclanthology.org/2021.acl-long.558/" |
| }, |
| { |
| "title": "LLaDA 2.0: Scaling Up Diffusion Language Models to 100B", |
| "url": "https://arxiv.org/abs/2512.15745" |
| }, |
| { |
| "title": "Scaling Diffusion Language Models via Adaptation from Autoregressive Models", |
| "url": "https://arxiv.org/abs/2410.17891" |
| } |
| ] |
| } |
|
|