IrishCore-GlobalPointer-135M-v1-rc2 / training_sources.json
temsa's picture
Publish IrishCore-GlobalPointer-135M-v1-rc2
6edc303 verified
{
"release": "IrishCore-GlobalPointer-135M-v1-rc2",
"base_model": "OpenMed/OpenMed-PII-mLiteClinical-Base-135M-v1",
"public_references": {
"closest_raw_only_release": "temsa/IrishCore-DiffMask-135M-v1-rc6",
"closest_openmed_release": "temsa/OpenMed-mLiteClinical-IrishCorePII-135M-v2-rc8"
},
"task": "Irish core PII detection and masking in English and Irish Gaelic",
"coverage": [
"ACCOUNT_NUMBER",
"BANK_ROUTING_NUMBER",
"CREDIT_DEBIT_CARD",
"EMAIL",
"FIRST_NAME",
"LAST_NAME",
"PASSPORT_NUMBER",
"PHONE_NUMBER",
"POSTCODE",
"PPSN",
"SWIFT_BIC"
],
"architecture": {
"family": "DistilBERT-size GlobalPointer span extractor",
"raw_only": true,
"scanner_free": true,
"validator_free": true,
"head": "typed span matrix",
"uses_rope": true
},
"training_data": {
"published": [
"temsa/OpenMed-Irish-CorePII-TrainMix-v1",
"temsa/OpenMed-Irish-PPSN-Eircode-Spec-v1",
"joelniklaus/mapa",
"gretelai/synthetic_pii_finance_multilingual"
],
"local_selection_mix": "irish_core_globalpointer_focus_chatbot_v2",
"selection_note": "The published checkpoint was selected after comparing span-head variants against Irish core, UAT replay, hardening, and fresh holdout suites."
},
"training_recipe": {
"init_mode": "full_global_pointer",
"initialized_encoder_from": "local DiffMask rc6 family checkpoint",
"freeze_layers": 2,
"max_length": 128,
"span_positive_weight": 8.0,
"head_size": 64,
"use_rope": true,
"negative_ratio": 16,
"min_negatives": 256,
"loss": "positive-weighted BCE over upper-triangular spans with hard-negative mining"
},
"references": [
{
"title": "DistilBERT",
"url": "https://arxiv.org/abs/1910.01108"
},
{
"title": "Global Pointer: Novel Efficient Span-based Approach for Named Entity Recognition",
"url": "https://arxiv.org/abs/2208.03054"
},
{
"title": "RoFormer: Enhanced Transformer with Rotary Position Embedding",
"url": "https://arxiv.org/abs/2104.09864"
}
]
}