bonus2-multitask-moe / metrics.json
Deepu1965's picture
Upload folder using huggingface_hub
37abcc2 verified
{
"history": [
{
"epoch": 1,
"train_sum_loss": 6.272962152862549,
"train_class_loss": 1.0185239918708802,
"train_class_acc": 0.4368,
"val_sum_loss": 5.8008860549926755,
"val_class_loss": 1.1023406219482421,
"val_class_acc": 0.304,
"val_rouge1": 0.17660690248954364,
"val_rouge2": 0.018371370658335634,
"val_rougeL": 0.15689190075772386
},
{
"epoch": 2,
"train_sum_loss": 5.219159950256348,
"train_class_loss": 1.099994719696045,
"train_class_acc": 0.3462,
"val_sum_loss": 5.416218318939209,
"val_class_loss": 1.0967657680511476,
"val_class_acc": 0.386,
"val_rouge1": 0.21779871449217764,
"val_rouge2": 0.028536134623968267,
"val_rougeL": 0.19329056631523484
},
{
"epoch": 3,
"train_sum_loss": 4.404316672515869,
"train_class_loss": 1.0899620735168456,
"train_class_acc": 0.361,
"val_sum_loss": 5.30168932723999,
"val_class_loss": 1.7727555074691772,
"val_class_acc": 0.342,
"val_rouge1": 0.22501668712961082,
"val_rouge2": 0.03326989668183761,
"val_rougeL": 0.20778061653444058
}
],
"config": {
"tokenizer": "t5-small",
"max_input_len": 512,
"max_target_len": 64,
"hidden_dim": 512,
"num_experts": 6,
"top_k": 2,
"batch_size": 8,
"learning_rate": 0.0005,
"num_epochs": 3,
"seed": 42,
"device": "cuda",
"hf_repo": "Deepu1965/bonus2-multitask-moe"
}
}