[ { "name": "concat_bpe_8000", "source": "ours", "algorithm": "bpe", "architecture": "concatenated", "vocab_size": 8000, "fertility_ar": 1.677255929872786, "fertility_az": 1.5274486975717831, "fertility_overall": 1.625089409755206, "disparity": 0.0893168595399542, "cpt_ar": 3.3745329363977343, "cpt_az": 3.4826508889784487, "exact_match_ar": 0.9989120580235721, "exact_match_az": 0.9964364500254539 }, { "name": "concat_wordpiece_16000", "source": "ours", "algorithm": "wordpiece", "architecture": "concatenated", "vocab_size": 16000, "fertility_ar": 1.4835885687430344, "fertility_az": 1.3635334584201908, "fertility_overall": 1.441782460775622, "disparity": 0.08092210526032824, "cpt_ar": 3.81082194270718, "cpt_az": 3.9034221417934196, "exact_match_ar": 0.0, "exact_match_az": 0.0 }, { "name": "concat_bpe_32000", "source": "ours", "algorithm": "bpe", "architecture": "concatenated", "vocab_size": 32000, "fertility_ar": 1.3077466165859795, "fertility_az": 1.1981161479838143, "fertility_overall": 1.2695706222898995, "disparity": 0.08383158267185423, "cpt_ar": 4.388664556275568, "cpt_az": 4.487021365858093, "exact_match_ar": 0.0, "exact_match_az": 0.0 }, { "name": "CaMeLBERT-MSA", "source": "external_msa", "algorithm": "WordPiece", "architecture": "shared", "vocab_size": 30000, "fertility_ar": 1.816901627794181, "fertility_az": 3.173471216645639, "fertility_overall": 2.2892921369888652, "disparity": 0.42747184273672184, "cpt_ar": 3.1140517054288748, "cpt_az": 1.6388977943422216, "exact_match_ar": 0.2987307343608341, "exact_match_az": 0.3885117936534872 }, { "name": "Asafaya-BERT", "source": "external_msa", "algorithm": "WordPiece", "architecture": "shared", "vocab_size": 32000, "fertility_ar": 1.7951679730188856, "fertility_az": 2.794867411003224, "fertility_overall": 2.143287620128837, "disparity": 0.35769118565288, "cpt_ar": 3.1312619206100933, "cpt_az": 1.8618984630783484, "exact_match_ar": 0.19895738893925657, "exact_match_az": 0.15238418462582726 }, { "name": "Aranizer-SP-86k", "source": "external_msa", "algorithm": "SentencePiece", "architecture": "shared", "vocab_size": 86000, "fertility_ar": 1.5940707675919776, "fertility_az": 2.5232344233957935, "fertility_overall": 1.9176281406140119, "disparity": 0.3682430959202508, "cpt_ar": 3.532878084819776, "cpt_az": 2.06546468780593, "exact_match_ar": 0.9976881233000907, "exact_match_az": 0.9955031393178347 }, { "name": "B2BERT", "source": "external_msa", "algorithm": "WordPiece", "architecture": "shared", "vocab_size": 30000, "fertility_ar": 1.816901627794181, "fertility_az": 3.173471216645639, "fertility_overall": 2.2892921369888652, "disparity": 0.42747184273672184, "cpt_ar": 3.1140517054288748, "cpt_az": 1.6388977943422216, "exact_match_ar": 0.2987307343608341, "exact_match_az": 0.3885117936534872 }, { "name": "DarijaBERT-ar", "source": "external_darija", "algorithm": "WordPiece", "architecture": "shared", "vocab_size": 80000, "fertility_ar": 1.4174523931345464, "fertility_az": 2.4040608107988306, "fertility_overall": 1.7610134287249042, "disparity": 0.41039245481334147, "cpt_ar": 3.9815436288421893, "cpt_az": 2.170223436755666, "exact_match_ar": 0.13721668177697188, "exact_match_az": 0.07984048871542508 }, { "name": "DarijaBERT-az", "source": "external_darija", "algorithm": "WordPiece", "architecture": "shared", "vocab_size": 110000, "fertility_ar": 1.6053190877519825, "fertility_az": 1.5171613637119812, "fertility_overall": 1.5746204251172413, "disparity": 0.05491601309210961, "cpt_ar": 3.476952784741954, "cpt_az": 3.451281188374697, "exact_match_ar": 0.14777878513145964, "exact_match_az": 0.07984048871542508 }, { "name": "Moroccan-Darija-Tokenizer", "source": "external_darija", "algorithm": "BPE", "architecture": "shared", "vocab_size": 30000, "fertility_ar": 1.5701289164575563, "fertility_az": 2.9024430947615336, "fertility_overall": 2.034073102047897, "disparity": 0.4590319723093283, "cpt_ar": 3.626351312053874, "cpt_az": 1.7930892097539761, "exact_match_ar": 0.00018132366273798732, "exact_match_az": 0.0 }, { "name": "Translit-Darija", "source": "external_darija", "algorithm": "BPE", "architecture": "shared", "vocab_size": 30000, "fertility_ar": 1.7964830125579598, "fertility_az": 1.6576487239140443, "fertility_overall": 1.7481375381752506, "disparity": 0.0772811586157074, "cpt_ar": 3.1115965796575096, "cpt_az": 3.1602226731778864, "exact_match_ar": 0.0, "exact_match_az": 0.0 }, { "name": "Qwen2.5-Darija", "source": "external_darija", "algorithm": "SentencePiece", "architecture": "shared", "vocab_size": 151643, "fertility_ar": 2.340330300987948, "fertility_az": 2.245717380136819, "fertility_overall": 2.307383781897024, "disparity": 0.04042716569160743, "cpt_ar": 2.3801472507543755, "cpt_az": 2.325374802332435, "exact_match_ar": 1.0, "exact_match_az": 1.0 } ]