| [ |
| { |
| "name": "concat_bpe_8000", |
| "source": "ours", |
| "algorithm": "bpe", |
| "architecture": "concatenated", |
| "vocab_size": 8000, |
| "fertility_ar": 1.677255929872786, |
| "fertility_az": 1.5274486975717831, |
| "fertility_overall": 1.625089409755206, |
| "disparity": 0.0893168595399542, |
| "cpt_ar": 3.3745329363977343, |
| "cpt_az": 3.4826508889784487, |
| "exact_match_ar": 0.9989120580235721, |
| "exact_match_az": 0.9964364500254539 |
| }, |
| { |
| "name": "concat_wordpiece_16000", |
| "source": "ours", |
| "algorithm": "wordpiece", |
| "architecture": "concatenated", |
| "vocab_size": 16000, |
| "fertility_ar": 1.4835885687430344, |
| "fertility_az": 1.3635334584201908, |
| "fertility_overall": 1.441782460775622, |
| "disparity": 0.08092210526032824, |
| "cpt_ar": 3.81082194270718, |
| "cpt_az": 3.9034221417934196, |
| "exact_match_ar": 0.0, |
| "exact_match_az": 0.0 |
| }, |
| { |
| "name": "concat_bpe_32000", |
| "source": "ours", |
| "algorithm": "bpe", |
| "architecture": "concatenated", |
| "vocab_size": 32000, |
| "fertility_ar": 1.3077466165859795, |
| "fertility_az": 1.1981161479838143, |
| "fertility_overall": 1.2695706222898995, |
| "disparity": 0.08383158267185423, |
| "cpt_ar": 4.388664556275568, |
| "cpt_az": 4.487021365858093, |
| "exact_match_ar": 0.0, |
| "exact_match_az": 0.0 |
| }, |
| { |
| "name": "CaMeLBERT-MSA", |
| "source": "external_msa", |
| "algorithm": "WordPiece", |
| "architecture": "shared", |
| "vocab_size": 30000, |
| "fertility_ar": 1.816901627794181, |
| "fertility_az": 3.173471216645639, |
| "fertility_overall": 2.2892921369888652, |
| "disparity": 0.42747184273672184, |
| "cpt_ar": 3.1140517054288748, |
| "cpt_az": 1.6388977943422216, |
| "exact_match_ar": 0.2987307343608341, |
| "exact_match_az": 0.3885117936534872 |
| }, |
| { |
| "name": "Asafaya-BERT", |
| "source": "external_msa", |
| "algorithm": "WordPiece", |
| "architecture": "shared", |
| "vocab_size": 32000, |
| "fertility_ar": 1.7951679730188856, |
| "fertility_az": 2.794867411003224, |
| "fertility_overall": 2.143287620128837, |
| "disparity": 0.35769118565288, |
| "cpt_ar": 3.1312619206100933, |
| "cpt_az": 1.8618984630783484, |
| "exact_match_ar": 0.19895738893925657, |
| "exact_match_az": 0.15238418462582726 |
| }, |
| { |
| "name": "Aranizer-SP-86k", |
| "source": "external_msa", |
| "algorithm": "SentencePiece", |
| "architecture": "shared", |
| "vocab_size": 86000, |
| "fertility_ar": 1.5940707675919776, |
| "fertility_az": 2.5232344233957935, |
| "fertility_overall": 1.9176281406140119, |
| "disparity": 0.3682430959202508, |
| "cpt_ar": 3.532878084819776, |
| "cpt_az": 2.06546468780593, |
| "exact_match_ar": 0.9976881233000907, |
| "exact_match_az": 0.9955031393178347 |
| }, |
| { |
| "name": "B2BERT", |
| "source": "external_msa", |
| "algorithm": "WordPiece", |
| "architecture": "shared", |
| "vocab_size": 30000, |
| "fertility_ar": 1.816901627794181, |
| "fertility_az": 3.173471216645639, |
| "fertility_overall": 2.2892921369888652, |
| "disparity": 0.42747184273672184, |
| "cpt_ar": 3.1140517054288748, |
| "cpt_az": 1.6388977943422216, |
| "exact_match_ar": 0.2987307343608341, |
| "exact_match_az": 0.3885117936534872 |
| }, |
| { |
| "name": "DarijaBERT-ar", |
| "source": "external_darija", |
| "algorithm": "WordPiece", |
| "architecture": "shared", |
| "vocab_size": 80000, |
| "fertility_ar": 1.4174523931345464, |
| "fertility_az": 2.4040608107988306, |
| "fertility_overall": 1.7610134287249042, |
| "disparity": 0.41039245481334147, |
| "cpt_ar": 3.9815436288421893, |
| "cpt_az": 2.170223436755666, |
| "exact_match_ar": 0.13721668177697188, |
| "exact_match_az": 0.07984048871542508 |
| }, |
| { |
| "name": "DarijaBERT-az", |
| "source": "external_darija", |
| "algorithm": "WordPiece", |
| "architecture": "shared", |
| "vocab_size": 110000, |
| "fertility_ar": 1.6053190877519825, |
| "fertility_az": 1.5171613637119812, |
| "fertility_overall": 1.5746204251172413, |
| "disparity": 0.05491601309210961, |
| "cpt_ar": 3.476952784741954, |
| "cpt_az": 3.451281188374697, |
| "exact_match_ar": 0.14777878513145964, |
| "exact_match_az": 0.07984048871542508 |
| }, |
| { |
| "name": "Moroccan-Darija-Tokenizer", |
| "source": "external_darija", |
| "algorithm": "BPE", |
| "architecture": "shared", |
| "vocab_size": 30000, |
| "fertility_ar": 1.5701289164575563, |
| "fertility_az": 2.9024430947615336, |
| "fertility_overall": 2.034073102047897, |
| "disparity": 0.4590319723093283, |
| "cpt_ar": 3.626351312053874, |
| "cpt_az": 1.7930892097539761, |
| "exact_match_ar": 0.00018132366273798732, |
| "exact_match_az": 0.0 |
| }, |
| { |
| "name": "Translit-Darija", |
| "source": "external_darija", |
| "algorithm": "BPE", |
| "architecture": "shared", |
| "vocab_size": 30000, |
| "fertility_ar": 1.7964830125579598, |
| "fertility_az": 1.6576487239140443, |
| "fertility_overall": 1.7481375381752506, |
| "disparity": 0.0772811586157074, |
| "cpt_ar": 3.1115965796575096, |
| "cpt_az": 3.1602226731778864, |
| "exact_match_ar": 0.0, |
| "exact_match_az": 0.0 |
| }, |
| { |
| "name": "Qwen2.5-Darija", |
| "source": "external_darija", |
| "algorithm": "SentencePiece", |
| "architecture": "shared", |
| "vocab_size": 151643, |
| "fertility_ar": 2.340330300987948, |
| "fertility_az": 2.245717380136819, |
| "fertility_overall": 2.307383781897024, |
| "disparity": 0.04042716569160743, |
| "cpt_ar": 2.3801472507543755, |
| "cpt_az": 2.325374802332435, |
| "exact_match_ar": 1.0, |
| "exact_match_az": 1.0 |
| } |
| ] |