Deepu1965 commited on
Commit
fd546d3
·
verified ·
1 Parent(s): 1f7d75c

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ Parameter-efficient fine-tuning of Mixture-of-Experts using **LoRA (Low-Rank Ada
22
 
23
  - **Validation Accuracy**: 0.6400
24
  - **Dataset**: XSum (topic classification)
25
- - **Training Samples**: 4,000
26
 
27
  ## LoRA Benefits
28
 
 
22
 
23
  - **Validation Accuracy**: 0.6400
24
  - **Dataset**: XSum (topic classification)
25
+ - **Training Samples**: 5,000
26
 
27
  ## LoRA Benefits
28
 
history.csv CHANGED
@@ -1,6 +1,6 @@
1
  epoch,train_loss,train_accuracy,val_loss,val_accuracy
2
- 1,0.8074325952529907,0.62525,0.8184478509426117,0.64
3
- 2,0.7937552418708801,0.637,0.7908735847473145,0.64
4
- 3,0.7901616661548615,0.6455,0.798002507686615,0.64
5
- 4,0.7901241521835327,0.6365,0.8332968425750732,0.64
6
- 5,0.7865016897916793,0.6465,0.7994629460573196,0.64
 
1
  epoch,train_loss,train_accuracy,val_loss,val_accuracy
2
+ 1,0.8147811661720276,0.6266,0.8106175279617309,0.64
3
+ 2,0.8049529413223266,0.6282,0.8057486724853515,0.64
4
+ 3,0.79402887840271,0.6384,0.8648435598611832,0.64
5
+ 4,0.7893773549079895,0.6438,0.7850593781471252,0.64
6
+ 5,0.7901758761405945,0.6414,0.7927370357513428,0.64
lora_adapters.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cc75c50d7fd0e92374fc126b34a569515c0b2dca5e282ccd8466ce563c41d31
3
  size 6334282
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab3694f32c175f877bba6583c7ce94772f58557aa09a47a2da7c9d975c64e37
3
  size 6334282
lora_moe_training.png CHANGED

Git LFS Details

  • SHA256: 46131a0ccd7250ffecb578c2f6450d229741d14c7be5ed6de2863e6fa196b542
  • Pointer size: 131 Bytes
  • Size of remote file: 121 kB

Git LFS Details

  • SHA256: 693ba4f30429d43be92338c5fca72abeb911b610e3c2544dcfb165449fa9ba69
  • Pointer size: 131 Bytes
  • Size of remote file: 125 kB
metrics.json CHANGED
@@ -2,37 +2,37 @@
2
  "history": [
3
  {
4
  "epoch": 1,
5
- "train_loss": 0.8074325952529907,
6
- "train_accuracy": 0.62525,
7
- "val_loss": 0.8184478509426117,
8
  "val_accuracy": 0.64
9
  },
10
  {
11
  "epoch": 2,
12
- "train_loss": 0.7937552418708801,
13
- "train_accuracy": 0.637,
14
- "val_loss": 0.7908735847473145,
15
  "val_accuracy": 0.64
16
  },
17
  {
18
  "epoch": 3,
19
- "train_loss": 0.7901616661548615,
20
- "train_accuracy": 0.6455,
21
- "val_loss": 0.798002507686615,
22
  "val_accuracy": 0.64
23
  },
24
  {
25
  "epoch": 4,
26
- "train_loss": 0.7901241521835327,
27
- "train_accuracy": 0.6365,
28
- "val_loss": 0.8332968425750732,
29
  "val_accuracy": 0.64
30
  },
31
  {
32
  "epoch": 5,
33
- "train_loss": 0.7865016897916793,
34
- "train_accuracy": 0.6465,
35
- "val_loss": 0.7994629460573196,
36
  "val_accuracy": 0.64
37
  }
38
  ],
@@ -57,13 +57,13 @@
57
  "total": 55228676
58
  },
59
  "expert_usage": [
60
- 270.3500061035156,
61
- 583.625,
62
- 598.9650268554688,
63
- 359.67999267578125,
64
- 425.7900085449219,
65
- 603.489990234375,
66
- 1022.885009765625,
67
- 231.21499633789062
68
  ]
69
  }
 
2
  "history": [
3
  {
4
  "epoch": 1,
5
+ "train_loss": 0.8147811661720276,
6
+ "train_accuracy": 0.6266,
7
+ "val_loss": 0.8106175279617309,
8
  "val_accuracy": 0.64
9
  },
10
  {
11
  "epoch": 2,
12
+ "train_loss": 0.8049529413223266,
13
+ "train_accuracy": 0.6282,
14
+ "val_loss": 0.8057486724853515,
15
  "val_accuracy": 0.64
16
  },
17
  {
18
  "epoch": 3,
19
+ "train_loss": 0.79402887840271,
20
+ "train_accuracy": 0.6384,
21
+ "val_loss": 0.8648435598611832,
22
  "val_accuracy": 0.64
23
  },
24
  {
25
  "epoch": 4,
26
+ "train_loss": 0.7893773549079895,
27
+ "train_accuracy": 0.6438,
28
+ "val_loss": 0.7850593781471252,
29
  "val_accuracy": 0.64
30
  },
31
  {
32
  "epoch": 5,
33
+ "train_loss": 0.7901758761405945,
34
+ "train_accuracy": 0.6414,
35
+ "val_loss": 0.7927370357513428,
36
  "val_accuracy": 0.64
37
  }
38
  ],
 
57
  "total": 55228676
58
  },
59
  "expert_usage": [
60
+ 44.994998931884766,
61
+ 48.994998931884766,
62
+ 456.364990234375,
63
+ 263.5,
64
+ 714.1749877929688,
65
+ 1520.7249755859375,
66
+ 810.1400146484375,
67
+ 237.10499572753906
68
  ]
69
  }
model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53be695e8fa32b0f3f871d66bcf00394b365fd99e0c747ad7a2db73979d059cd
3
  size 221009538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0f535a9b58723ef46475aa6861a36acc30e6249787e5866a7780cb7fd760907
3
  size 221009538
tokenizer/tokenizer_config.json CHANGED
@@ -41,9 +41,10 @@
41
  "special": true
42
  }
43
  },
44
- "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
  "do_lower_case": true,
 
47
  "mask_token": "[MASK]",
48
  "model_max_length": 512,
49
  "pad_token": "[PAD]",
 
41
  "special": true
42
  }
43
  },
44
+ "clean_up_tokenization_spaces": false,
45
  "cls_token": "[CLS]",
46
  "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
  "mask_token": "[MASK]",
49
  "model_max_length": 512,
50
  "pad_token": "[PAD]",