MASRIBERTv2 / training_metadata.json
T0KII's picture
MASRIBERTv2 — continued pretraining (ppl=16.00)
0b182e2 verified
raw
history blame contribute delete
801 Bytes
{
"base_model": "T0KII/MASRIBERT",
"grandparent_model": "UBC-NLP/MARBERTv2",
"version": "v2",
"new_data_sources": [
"faisalq/EFC-mini",
"MBZUAI-Paris/Egyptian-SFT-Mixture"
],
"v1_data_sources": [
"hard",
"ar_res_reviews",
"arbml/TEAD",
"AOC_youm7_comments",
"RestOf_AOC_youm7_comments",
"Egyptian_Tweets",
"TaghreedT",
"TE_Telecom",
"TE_Tweets"
],
"block_size": 64,
"mlm_probability": 0.2,
"whole_word_mask": true,
"learning_rate": 2e-05,
"epochs": 2,
"effective_batch_size": 128,
"train_blocks": 1849729,
"eval_blocks": 18685,
"final_eval_loss": 2.772667407989502,
"final_perplexity": 16.00125902152841,
"intended_downstream_tasks": [
"sentiment_analysis",
"emotion_classification",
"sarcasm_detection"
]
}