Upload folder using huggingface_hub
Browse files- README.md +1 -1
- history.csv +5 -5
- lora_adapters.pt +1 -1
- lora_moe_training.png +2 -2
- metrics.json +23 -23
- model.pt +1 -1
- tokenizer/tokenizer_config.json +2 -1
README.md
CHANGED
|
@@ -22,7 +22,7 @@ Parameter-efficient fine-tuning of Mixture-of-Experts using **LoRA (Low-Rank Ada
|
|
| 22 |
|
| 23 |
- **Validation Accuracy**: 0.6400
|
| 24 |
- **Dataset**: XSum (topic classification)
|
| 25 |
-
- **Training Samples**:
|
| 26 |
|
| 27 |
## LoRA Benefits
|
| 28 |
|
|
|
|
| 22 |
|
| 23 |
- **Validation Accuracy**: 0.6400
|
| 24 |
- **Dataset**: XSum (topic classification)
|
| 25 |
+
- **Training Samples**: 5,000
|
| 26 |
|
| 27 |
## LoRA Benefits
|
| 28 |
|
history.csv
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
epoch,train_loss,train_accuracy,val_loss,val_accuracy
|
| 2 |
-
1,0.
|
| 3 |
-
2,0.
|
| 4 |
-
3,0.
|
| 5 |
-
4,0.
|
| 6 |
-
5,0.
|
|
|
|
| 1 |
epoch,train_loss,train_accuracy,val_loss,val_accuracy
|
| 2 |
+
1,0.8147811661720276,0.6266,0.8106175279617309,0.64
|
| 3 |
+
2,0.8049529413223266,0.6282,0.8057486724853515,0.64
|
| 4 |
+
3,0.79402887840271,0.6384,0.8648435598611832,0.64
|
| 5 |
+
4,0.7893773549079895,0.6438,0.7850593781471252,0.64
|
| 6 |
+
5,0.7901758761405945,0.6414,0.7927370357513428,0.64
|
lora_adapters.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6334282
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aab3694f32c175f877bba6583c7ce94772f58557aa09a47a2da7c9d975c64e37
|
| 3 |
size 6334282
|
lora_moe_training.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
metrics.json
CHANGED
|
@@ -2,37 +2,37 @@
|
|
| 2 |
"history": [
|
| 3 |
{
|
| 4 |
"epoch": 1,
|
| 5 |
-
"train_loss": 0.
|
| 6 |
-
"train_accuracy": 0.
|
| 7 |
-
"val_loss": 0.
|
| 8 |
"val_accuracy": 0.64
|
| 9 |
},
|
| 10 |
{
|
| 11 |
"epoch": 2,
|
| 12 |
-
"train_loss": 0.
|
| 13 |
-
"train_accuracy": 0.
|
| 14 |
-
"val_loss": 0.
|
| 15 |
"val_accuracy": 0.64
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"epoch": 3,
|
| 19 |
-
"train_loss": 0.
|
| 20 |
-
"train_accuracy": 0.
|
| 21 |
-
"val_loss": 0.
|
| 22 |
"val_accuracy": 0.64
|
| 23 |
},
|
| 24 |
{
|
| 25 |
"epoch": 4,
|
| 26 |
-
"train_loss": 0.
|
| 27 |
-
"train_accuracy": 0.
|
| 28 |
-
"val_loss": 0.
|
| 29 |
"val_accuracy": 0.64
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"epoch": 5,
|
| 33 |
-
"train_loss": 0.
|
| 34 |
-
"train_accuracy": 0.
|
| 35 |
-
"val_loss": 0.
|
| 36 |
"val_accuracy": 0.64
|
| 37 |
}
|
| 38 |
],
|
|
@@ -57,13 +57,13 @@
|
|
| 57 |
"total": 55228676
|
| 58 |
},
|
| 59 |
"expert_usage": [
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
]
|
| 69 |
}
|
|
|
|
| 2 |
"history": [
|
| 3 |
{
|
| 4 |
"epoch": 1,
|
| 5 |
+
"train_loss": 0.8147811661720276,
|
| 6 |
+
"train_accuracy": 0.6266,
|
| 7 |
+
"val_loss": 0.8106175279617309,
|
| 8 |
"val_accuracy": 0.64
|
| 9 |
},
|
| 10 |
{
|
| 11 |
"epoch": 2,
|
| 12 |
+
"train_loss": 0.8049529413223266,
|
| 13 |
+
"train_accuracy": 0.6282,
|
| 14 |
+
"val_loss": 0.8057486724853515,
|
| 15 |
"val_accuracy": 0.64
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"epoch": 3,
|
| 19 |
+
"train_loss": 0.79402887840271,
|
| 20 |
+
"train_accuracy": 0.6384,
|
| 21 |
+
"val_loss": 0.8648435598611832,
|
| 22 |
"val_accuracy": 0.64
|
| 23 |
},
|
| 24 |
{
|
| 25 |
"epoch": 4,
|
| 26 |
+
"train_loss": 0.7893773549079895,
|
| 27 |
+
"train_accuracy": 0.6438,
|
| 28 |
+
"val_loss": 0.7850593781471252,
|
| 29 |
"val_accuracy": 0.64
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"epoch": 5,
|
| 33 |
+
"train_loss": 0.7901758761405945,
|
| 34 |
+
"train_accuracy": 0.6414,
|
| 35 |
+
"val_loss": 0.7927370357513428,
|
| 36 |
"val_accuracy": 0.64
|
| 37 |
}
|
| 38 |
],
|
|
|
|
| 57 |
"total": 55228676
|
| 58 |
},
|
| 59 |
"expert_usage": [
|
| 60 |
+
44.994998931884766,
|
| 61 |
+
48.994998931884766,
|
| 62 |
+
456.364990234375,
|
| 63 |
+
263.5,
|
| 64 |
+
714.1749877929688,
|
| 65 |
+
1520.7249755859375,
|
| 66 |
+
810.1400146484375,
|
| 67 |
+
237.10499572753906
|
| 68 |
]
|
| 69 |
}
|
model.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 221009538
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0f535a9b58723ef46475aa6861a36acc30e6249787e5866a7780cb7fd760907
|
| 3 |
size 221009538
|
tokenizer/tokenizer_config.json
CHANGED
|
@@ -41,9 +41,10 @@
|
|
| 41 |
"special": true
|
| 42 |
}
|
| 43 |
},
|
| 44 |
-
"clean_up_tokenization_spaces":
|
| 45 |
"cls_token": "[CLS]",
|
| 46 |
"do_lower_case": true,
|
|
|
|
| 47 |
"mask_token": "[MASK]",
|
| 48 |
"model_max_length": 512,
|
| 49 |
"pad_token": "[PAD]",
|
|
|
|
| 41 |
"special": true
|
| 42 |
}
|
| 43 |
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
"cls_token": "[CLS]",
|
| 46 |
"do_lower_case": true,
|
| 47 |
+
"extra_special_tokens": {},
|
| 48 |
"mask_token": "[MASK]",
|
| 49 |
"model_max_length": 512,
|
| 50 |
"pad_token": "[PAD]",
|