End of training

Browse files

Files changed (6) hide show

README.md +34 -34
adapter_config.json +27 -2
adapter_model.bin +2 -2
model.safetensors +2 -2
tokenizer.json +6 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [allenai/OLMo-1B](https://huggingface.co/allenai/OLMo-1B) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1452
 ## Model description
@@ -50,39 +50,39 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.8046        | 0.09  | 10   | 0.6641          |
-| 0.2763        | 0.18  | 20   | 0.1556          |
-| 0.1495        | 0.27  | 30   | 0.1607          |
-| 0.1527        | 0.36  | 40   | 0.1512          |
-| 0.148         | 0.45  | 50   | 0.1490          |
-| 0.1487        | 0.54  | 60   | 0.1498          |
-| 0.1478        | 0.63  | 70   | 0.1489          |
-| 0.1488        | 0.73  | 80   | 0.1495          |
-| 0.1462        | 0.82  | 90   | 0.1497          |
-| 0.1478        | 0.91  | 100  | 0.1485          |
-| 0.1489        | 1.0   | 110  | 0.1505          |
-| 0.1461        | 1.09  | 120  | 0.1490          |
-| 0.1453        | 1.18  | 130  | 0.1512          |
-| 0.1463        | 1.27  | 140  | 0.1500          |
-| 0.1486        | 1.36  | 150  | 0.1461          |
-| 0.1443        | 1.45  | 160  | 0.1488          |
-| 0.1451        | 1.54  | 170  | 0.1471          |
-| 0.1465        | 1.63  | 180  | 0.1459          |
-| 0.1467        | 1.72  | 190  | 0.1507          |
-| 0.1448        | 1.81  | 200  | 0.1499          |
-| 0.1496        | 1.9   | 210  | 0.1465          |
-| 0.1459        | 1.99  | 220  | 0.1497          |
-| 0.146         | 2.08  | 230  | 0.1464          |
-| 0.1401        | 2.18  | 240  | 0.1460          |
-| 0.1427        | 2.27  | 250  | 0.1473          |
-| 0.1439        | 2.36  | 260  | 0.1472          |
-| 0.1421        | 2.45  | 270  | 0.1460          |
-| 0.1408        | 2.54  | 280  | 0.1458          |
-| 0.1406        | 2.63  | 290  | 0.1466          |
-| 0.1442        | 2.72  | 300  | 0.1452          |
-| 0.1436        | 2.81  | 310  | 0.1451          |
-| 0.1426        | 2.9   | 320  | 0.1452          |
-| 0.1438        | 2.99  | 330  | 0.1452          |
 ### Framework versions

 This model is a fine-tuned version of [allenai/OLMo-1B](https://huggingface.co/allenai/OLMo-1B) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0540
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.8099        | 0.09  | 10   | 0.1912          |
+| 0.1798        | 0.18  | 20   | 0.1531          |
+| 0.1494        | 0.27  | 30   | 0.1613          |
+| 0.1557        | 0.36  | 40   | 0.1576          |
+| 0.1505        | 0.45  | 50   | 0.1489          |
+| 0.1502        | 0.54  | 60   | 0.1467          |
+| 0.1486        | 0.63  | 70   | 0.1468          |
+| 0.1478        | 0.73  | 80   | 0.1530          |
+| 0.1418        | 0.82  | 90   | 0.1254          |
+| 0.1393        | 0.91  | 100  | 0.1264          |
+| 0.114         | 1.0   | 110  | 0.0868          |
+| 0.0713        | 1.09  | 120  | 0.0721          |
+| 0.0753        | 1.18  | 130  | 0.1096          |
+| 0.0868        | 1.27  | 140  | 0.0649          |
+| 0.124         | 1.36  | 150  | 0.0621          |
+| 0.058         | 1.45  | 160  | 0.0572          |
+| 0.0688        | 1.54  | 170  | 0.0600          |
+| 0.0626        | 1.63  | 180  | 0.0618          |
+| 0.0673        | 1.72  | 190  | 0.0575          |
+| 0.0579        | 1.81  | 200  | 0.0574          |
+| 0.0592        | 1.9   | 210  | 0.0554          |
+| 0.0577        | 1.99  | 220  | 0.0546          |
+| 0.0568        | 2.08  | 230  | 0.0548          |
+| 0.0807        | 2.18  | 240  | 0.0912          |
+| 0.0728        | 2.27  | 250  | 0.0610          |
+| 0.0629        | 2.36  | 260  | 0.0589          |
+| 0.0554        | 2.45  | 270  | 0.0552          |
+| 0.0523        | 2.54  | 280  | 0.0547          |
+| 0.0544        | 2.63  | 290  | 0.0560          |
+| 0.0551        | 2.72  | 300  | 0.0541          |
+| 0.056         | 2.81  | 310  | 0.0539          |
+| 0.0574        | 2.9   | 320  | 0.0540          |
+| 0.0586        | 2.99  | 330  | 0.0540          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -1,18 +1,43 @@
 {
   "auto_mapping": null,
   "base_model_name_or_path": "allenai/OLMo-1B",
   "bias": "none",
   "enable_lora": null,
   "fan_in_fan_out": false,
   "inference_mode": true,
   "lora_alpha": 16,
   "lora_dropout": 0.05,
   "merge_weights": false,
   "modules_to_save": null,
-  "peft_type": "LORA",
   "r": 16,
   "target_modules": [
     "attn_out"
   ],
-  "task_type": "CAUSAL_LM"
 }

 {
+  "adaptive_ratio": 0.01,
+  "adaptive_ratio_decay": 1.0,
+  "additive_modeling": false,
   "auto_mapping": null,
   "base_model_name_or_path": "allenai/OLMo-1B",
   "bias": "none",
+  "curr_learning": true,
+  "detached_training": true,
+  "dynamic_adapter_pool": true,
   "enable_lora": null,
+  "encoder_hidden_size": 2048,
   "fan_in_fan_out": false,
+  "hypernetwork": true,
   "inference_mode": true,
+  "input_based_adapter_selection": true,
   "lora_alpha": 16,
   "lora_dropout": 0.05,
   "merge_weights": false,
   "modules_to_save": null,
+  "num_attention_heads": 16,
+  "num_layers": 16,
+  "num_prefix_set": 3,
+  "num_transformer_submodules": 1,
+  "num_virtual_tokens": 30,
+  "number_of_adapter_pre_layer": 8,
+  "ot_diversified_dispatcher": false,
+  "ot_diversified_prefix": false,
+  "peft_type": "PREFIX_MA_LORA",
+  "pool_selective_inference": true,
+  "pool_selective_training": true,
+  "prefix_projection": true,
   "r": 16,
+  "scale": 64,
+  "selective_num": 8,
+  "simple_hidden_matching": false,
+  "simple_instance_matching": true,
   "target_modules": [
     "attn_out"
   ],
+  "task_type": "CAUSAL_LM",
+  "token_dim": 2048
 }

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd7392b0b684e4b27c1356675552fa4f046735d974180a3bf659c74097d1b697
-size 2108942

 version https://git-lfs.github.com/spec/v1
+oid sha256:af04a03c7f8ade6755b08dbb944d25d7535357db501c61546b46a66e094562ad
+size 101036502

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82450d3abb86d29402848cf283275cd633732f5c4fdf16a2d11ec85f5d0175cf
-size 4709168176

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f78a8ee84d1d34a459f88cef3445389313a40085cc2c581db4e64d0b9acca86
+size 4774383804

tokenizer.json CHANGED Viewed

@@ -1,6 +1,11 @@
 {
   "version": "1.0",
-  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 128,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
   "padding": null,
   "added_tokens": [
     {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bef2db007a0b7270f540e00c401d12f41aa77bcad42b09bb1c3fad268f8476ad
-size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:d086993ff875ad4162e2b32d5c4f2cfefeddf38d05c9c3a670112179f850766d
+size 5176