TinyLLaMA v1.1 LoRA fine-tuned for 3-class malicious prompt detection
Browse files- README.md +89 -0
- adapter_model.safetensors +1 -1
- eval_metrics.json +45 -0
- final_model/README.md +206 -0
- final_model/adapter_config.json +42 -0
- final_model/adapter_model.safetensors +3 -0
- final_model/special_tokens_map.json +24 -0
- final_model/tokenizer.json +0 -0
- final_model/tokenizer_config.json +43 -0
- overall_metrics.csv +2 -0
README.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
license: apache-2.0
|
| 4 |
+
base_model: TinyLlama/TinyLlama_v1.1
|
| 5 |
+
tags:
|
| 6 |
+
- base_model:adapter:TinyLlama/TinyLlama_v1.1
|
| 7 |
+
- lora
|
| 8 |
+
- transformers
|
| 9 |
+
metrics:
|
| 10 |
+
- accuracy
|
| 11 |
+
model-index:
|
| 12 |
+
- name: tinyllama-lora-malicious-classifier
|
| 13 |
+
results: []
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 17 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 18 |
+
|
| 19 |
+
# tinyllama-lora-malicious-classifier
|
| 20 |
+
|
| 21 |
+
This model is a fine-tuned version of [TinyLlama/TinyLlama_v1.1](https://huggingface.co/TinyLlama/TinyLlama_v1.1) on an unknown dataset.
|
| 22 |
+
It achieves the following results on the evaluation set:
|
| 23 |
+
- Loss: 0.4833
|
| 24 |
+
- Accuracy: 0.8289
|
| 25 |
+
- Precision Weighted: 0.8220
|
| 26 |
+
- Recall Weighted: 0.8289
|
| 27 |
+
- F1 Weighted: 0.8239
|
| 28 |
+
- Mcc: 0.7203
|
| 29 |
+
- Balanced Accuracy: 0.7724
|
| 30 |
+
- Macro Fnr: 0.2276
|
| 31 |
+
- Macro Fpr: 0.0897
|
| 32 |
+
- Macro Specificity: 0.9103
|
| 33 |
+
- Per Class: {'jailbreaking': {'TP': 228, 'FP': 103, 'FN': 161, 'TN': 1437, 'FNR': 0.4138817480719794, 'FPR': 0.06688311688311688, 'Specificity': 0.9331168831168831}, 'prompt injection': {'TP': 439, 'FP': 112, 'FN': 131, 'TN': 1247, 'FNR': 0.22982456140350876, 'FPR': 0.08241353936718175, 'Specificity': 0.9175864606328182}, 'unharmful': {'TP': 932, 'FP': 115, 'FN': 38, 'TN': 844, 'FNR': 0.03917525773195876, 'FPR': 0.11991657977059438, 'Specificity': 0.8800834202294057}}
|
| 34 |
+
|
| 35 |
+
## Model description
|
| 36 |
+
|
| 37 |
+
More information needed
|
| 38 |
+
|
| 39 |
+
## Intended uses & limitations
|
| 40 |
+
|
| 41 |
+
More information needed
|
| 42 |
+
|
| 43 |
+
## Training and evaluation data
|
| 44 |
+
|
| 45 |
+
More information needed
|
| 46 |
+
|
| 47 |
+
## Training procedure
|
| 48 |
+
|
| 49 |
+
### Training hyperparameters
|
| 50 |
+
|
| 51 |
+
The following hyperparameters were used during training:
|
| 52 |
+
- learning_rate: 2e-05
|
| 53 |
+
- train_batch_size: 8
|
| 54 |
+
- eval_batch_size: 8
|
| 55 |
+
- seed: 42
|
| 56 |
+
- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 57 |
+
- lr_scheduler_type: linear
|
| 58 |
+
- lr_scheduler_warmup_ratio: 0.1
|
| 59 |
+
- num_epochs: 15
|
| 60 |
+
- mixed_precision_training: Native AMP
|
| 61 |
+
|
| 62 |
+
### Training results
|
| 63 |
+
|
| 64 |
+
| Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision Weighted | Recall Weighted | F1 Weighted | Mcc | Balanced Accuracy | Macro Fnr | Macro Fpr | Macro Specificity | Per Class |
|
| 65 |
+
|:-------------:|:-----:|:-----:|:---------------:|:--------:|:------------------:|:---------------:|:-----------:|:------:|:-----------------:|:---------:|:---------:|:-----------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
| 66 |
+
| 1.0323 | 1.0 | 1107 | 1.0327 | 0.5702 | 0.5595 | 0.5702 | 0.5636 | 0.2924 | 0.5112 | 0.4888 | 0.2364 | 0.7636 | {'jailbreaking': {'TP': 113, 'FP': 209, 'FN': 276, 'TN': 1331, 'FNR': 0.7095115681233933, 'FPR': 0.1357142857142857, 'Specificity': 0.8642857142857143}, 'prompt injection': {'TP': 312, 'FP': 238, 'FN': 258, 'TN': 1121, 'FNR': 0.45263157894736844, 'FPR': 0.1751287711552612, 'Specificity': 0.8248712288447387}, 'unharmful': {'TP': 675, 'FP': 382, 'FN': 295, 'TN': 577, 'FNR': 0.30412371134020616, 'FPR': 0.3983315954118874, 'Specificity': 0.6016684045881127}} |
|
| 67 |
+
| 0.7496 | 2.0 | 2214 | 0.7073 | 0.7252 | 0.7112 | 0.7252 | 0.7144 | 0.5463 | 0.6529 | 0.3471 | 0.1502 | 0.8498 | {'jailbreaking': {'TP': 154, 'FP': 132, 'FN': 235, 'TN': 1408, 'FNR': 0.6041131105398457, 'FPR': 0.08571428571428572, 'Specificity': 0.9142857142857143}, 'prompt injection': {'TP': 386, 'FP': 163, 'FN': 184, 'TN': 1196, 'FNR': 0.32280701754385965, 'FPR': 0.1199411331861663, 'Specificity': 0.8800588668138337}, 'unharmful': {'TP': 859, 'FP': 235, 'FN': 111, 'TN': 724, 'FNR': 0.11443298969072165, 'FPR': 0.24504692387904067, 'Specificity': 0.7549530761209593}} |
|
| 68 |
+
| 0.5695 | 3.0 | 3321 | 0.6096 | 0.7766 | 0.7636 | 0.7766 | 0.7654 | 0.6323 | 0.7031 | 0.2969 | 0.1215 | 0.8785 | {'jailbreaking': {'TP': 171, 'FP': 106, 'FN': 218, 'TN': 1434, 'FNR': 0.5604113110539846, 'FPR': 0.06883116883116883, 'Specificity': 0.9311688311688312}, 'prompt injection': {'TP': 417, 'FP': 141, 'FN': 153, 'TN': 1218, 'FNR': 0.26842105263157895, 'FPR': 0.10375275938189846, 'Specificity': 0.8962472406181016}, 'unharmful': {'TP': 910, 'FP': 184, 'FN': 60, 'TN': 775, 'FNR': 0.061855670103092786, 'FPR': 0.19186652763295098, 'Specificity': 0.808133472367049}} |
|
| 69 |
+
| 0.5059 | 4.0 | 4428 | 0.5686 | 0.7926 | 0.7817 | 0.7926 | 0.7843 | 0.6596 | 0.7245 | 0.2755 | 0.1107 | 0.8893 | {'jailbreaking': {'TP': 191, 'FP': 114, 'FN': 198, 'TN': 1426, 'FNR': 0.5089974293059126, 'FPR': 0.07402597402597402, 'Specificity': 0.925974025974026}, 'prompt injection': {'TP': 419, 'FP': 131, 'FN': 151, 'TN': 1228, 'FNR': 0.2649122807017544, 'FPR': 0.0963944076526858, 'Specificity': 0.9036055923473142}, 'unharmful': {'TP': 919, 'FP': 155, 'FN': 51, 'TN': 804, 'FNR': 0.05257731958762887, 'FPR': 0.1616266944734098, 'Specificity': 0.8383733055265902}} |
|
| 70 |
+
| 0.4583 | 5.0 | 5535 | 0.5413 | 0.8056 | 0.7953 | 0.8056 | 0.7977 | 0.6812 | 0.7389 | 0.2611 | 0.1032 | 0.8968 | {'jailbreaking': {'TP': 199, 'FP': 109, 'FN': 190, 'TN': 1431, 'FNR': 0.4884318766066838, 'FPR': 0.07077922077922078, 'Specificity': 0.9292207792207792}, 'prompt injection': {'TP': 426, 'FP': 126, 'FN': 144, 'TN': 1233, 'FNR': 0.25263157894736843, 'FPR': 0.09271523178807947, 'Specificity': 0.9072847682119205}, 'unharmful': {'TP': 929, 'FP': 140, 'FN': 41, 'TN': 819, 'FNR': 0.042268041237113405, 'FPR': 0.145985401459854, 'Specificity': 0.8540145985401459}} |
|
| 71 |
+
| 0.4763 | 6.0 | 6642 | 0.5243 | 0.8113 | 0.8028 | 0.8113 | 0.8052 | 0.6910 | 0.7498 | 0.2502 | 0.0994 | 0.9006 | {'jailbreaking': {'TP': 212, 'FP': 113, 'FN': 177, 'TN': 1427, 'FNR': 0.455012853470437, 'FPR': 0.07337662337662337, 'Specificity': 0.9266233766233766}, 'prompt injection': {'TP': 428, 'FP': 120, 'FN': 142, 'TN': 1239, 'FNR': 0.24912280701754386, 'FPR': 0.08830022075055188, 'Specificity': 0.9116997792494481}, 'unharmful': {'TP': 925, 'FP': 131, 'FN': 45, 'TN': 828, 'FNR': 0.04639175257731959, 'FPR': 0.13660062565172054, 'Specificity': 0.8633993743482794}} |
|
| 72 |
+
| 0.4283 | 7.0 | 7749 | 0.5095 | 0.8170 | 0.8083 | 0.8170 | 0.8104 | 0.7003 | 0.7546 | 0.2454 | 0.0969 | 0.9031 | {'jailbreaking': {'TP': 213, 'FP': 105, 'FN': 176, 'TN': 1435, 'FNR': 0.4524421593830334, 'FPR': 0.06818181818181818, 'Specificity': 0.9318181818181818}, 'prompt injection': {'TP': 430, 'FP': 118, 'FN': 140, 'TN': 1241, 'FNR': 0.24561403508771928, 'FPR': 0.08682855040470934, 'Specificity': 0.9131714495952906}, 'unharmful': {'TP': 933, 'FP': 130, 'FN': 37, 'TN': 829, 'FNR': 0.03814432989690722, 'FPR': 0.13555787278415016, 'Specificity': 0.8644421272158499}} |
|
| 73 |
+
| 0.4119 | 8.0 | 8856 | 0.5033 | 0.8191 | 0.8116 | 0.8191 | 0.8135 | 0.7041 | 0.7592 | 0.2408 | 0.0951 | 0.9049 | {'jailbreaking': {'TP': 223, 'FP': 118, 'FN': 166, 'TN': 1422, 'FNR': 0.4267352185089974, 'FPR': 0.07662337662337662, 'Specificity': 0.9233766233766234}, 'prompt injection': {'TP': 422, 'FP': 105, 'FN': 148, 'TN': 1254, 'FNR': 0.2596491228070175, 'FPR': 0.0772626931567329, 'Specificity': 0.9227373068432672}, 'unharmful': {'TP': 935, 'FP': 126, 'FN': 35, 'TN': 833, 'FNR': 0.03608247422680412, 'FPR': 0.13138686131386862, 'Specificity': 0.8686131386861314}} |
|
| 74 |
+
| 0.412 | 9.0 | 9963 | 0.4955 | 0.8253 | 0.8178 | 0.8253 | 0.8199 | 0.7143 | 0.7671 | 0.2329 | 0.0916 | 0.9084 | {'jailbreaking': {'TP': 222, 'FP': 103, 'FN': 167, 'TN': 1437, 'FNR': 0.42930591259640105, 'FPR': 0.06688311688311688, 'Specificity': 0.9331168831168831}, 'prompt injection': {'TP': 440, 'FP': 118, 'FN': 130, 'TN': 1241, 'FNR': 0.22807017543859648, 'FPR': 0.08682855040470934, 'Specificity': 0.9131714495952906}, 'unharmful': {'TP': 930, 'FP': 116, 'FN': 40, 'TN': 843, 'FNR': 0.041237113402061855, 'FPR': 0.12095933263816476, 'Specificity': 0.8790406673618353}} |
|
| 75 |
+
| 0.496 | 10.0 | 11070 | 0.4926 | 0.8289 | 0.8214 | 0.8289 | 0.8232 | 0.7202 | 0.7703 | 0.2297 | 0.0903 | 0.9097 | {'jailbreaking': {'TP': 224, 'FP': 98, 'FN': 165, 'TN': 1442, 'FNR': 0.4241645244215938, 'FPR': 0.06363636363636363, 'Specificity': 0.9363636363636364}, 'prompt injection': {'TP': 439, 'FP': 113, 'FN': 131, 'TN': 1246, 'FNR': 0.22982456140350876, 'FPR': 0.08314937454010302, 'Specificity': 0.9168506254598969}, 'unharmful': {'TP': 936, 'FP': 119, 'FN': 34, 'TN': 840, 'FNR': 0.03505154639175258, 'FPR': 0.12408759124087591, 'Specificity': 0.8759124087591241}} |
|
| 76 |
+
| 0.428 | 11.0 | 12177 | 0.4890 | 0.8258 | 0.8191 | 0.8258 | 0.8207 | 0.7154 | 0.7677 | 0.2323 | 0.0913 | 0.9087 | {'jailbreaking': {'TP': 230, 'FP': 117, 'FN': 159, 'TN': 1423, 'FNR': 0.4087403598971722, 'FPR': 0.07597402597402597, 'Specificity': 0.924025974025974}, 'prompt injection': {'TP': 424, 'FP': 99, 'FN': 146, 'TN': 1260, 'FNR': 0.256140350877193, 'FPR': 0.0728476821192053, 'Specificity': 0.9271523178807947}, 'unharmful': {'TP': 939, 'FP': 120, 'FN': 31, 'TN': 839, 'FNR': 0.031958762886597936, 'FPR': 0.1251303441084463, 'Specificity': 0.8748696558915537}} |
|
| 77 |
+
| 0.4103 | 12.0 | 13284 | 0.4866 | 0.8269 | 0.8191 | 0.8269 | 0.8206 | 0.7166 | 0.7669 | 0.2331 | 0.0922 | 0.9078 | {'jailbreaking': {'TP': 221, 'FP': 99, 'FN': 168, 'TN': 1441, 'FNR': 0.4318766066838046, 'FPR': 0.06428571428571428, 'Specificity': 0.9357142857142857}, 'prompt injection': {'TP': 437, 'FP': 107, 'FN': 133, 'TN': 1252, 'FNR': 0.23333333333333334, 'FPR': 0.07873436350257543, 'Specificity': 0.9212656364974245}, 'unharmful': {'TP': 937, 'FP': 128, 'FN': 33, 'TN': 831, 'FNR': 0.03402061855670103, 'FPR': 0.1334723670490094, 'Specificity': 0.8665276329509907}} |
|
| 78 |
+
| 0.4009 | 13.0 | 14391 | 0.4833 | 0.8289 | 0.8220 | 0.8289 | 0.8239 | 0.7203 | 0.7724 | 0.2276 | 0.0897 | 0.9103 | {'jailbreaking': {'TP': 228, 'FP': 103, 'FN': 161, 'TN': 1437, 'FNR': 0.4138817480719794, 'FPR': 0.06688311688311688, 'Specificity': 0.9331168831168831}, 'prompt injection': {'TP': 439, 'FP': 112, 'FN': 131, 'TN': 1247, 'FNR': 0.22982456140350876, 'FPR': 0.08241353936718175, 'Specificity': 0.9175864606328182}, 'unharmful': {'TP': 932, 'FP': 115, 'FN': 38, 'TN': 844, 'FNR': 0.03917525773195876, 'FPR': 0.11991657977059438, 'Specificity': 0.8800834202294057}} |
|
| 79 |
+
| 0.4242 | 14.0 | 15498 | 0.4834 | 0.8284 | 0.8211 | 0.8284 | 0.8228 | 0.7193 | 0.7700 | 0.2300 | 0.0906 | 0.9094 | {'jailbreaking': {'TP': 226, 'FP': 105, 'FN': 163, 'TN': 1435, 'FNR': 0.4190231362467866, 'FPR': 0.06818181818181818, 'Specificity': 0.9318181818181818}, 'prompt injection': {'TP': 435, 'FP': 104, 'FN': 135, 'TN': 1255, 'FNR': 0.23684210526315788, 'FPR': 0.07652685798381163, 'Specificity': 0.9234731420161884}, 'unharmful': {'TP': 937, 'FP': 122, 'FN': 33, 'TN': 837, 'FNR': 0.03402061855670103, 'FPR': 0.12721584984358708, 'Specificity': 0.872784150156413}} |
|
| 80 |
+
| 0.3859 | 15.0 | 16605 | 0.4829 | 0.8269 | 0.8197 | 0.8269 | 0.8215 | 0.7168 | 0.7692 | 0.2308 | 0.0912 | 0.9088 | {'jailbreaking': {'TP': 226, 'FP': 106, 'FN': 163, 'TN': 1434, 'FNR': 0.4190231362467866, 'FPR': 0.06883116883116883, 'Specificity': 0.9311688311688312}, 'prompt injection': {'TP': 436, 'FP': 107, 'FN': 134, 'TN': 1252, 'FNR': 0.23508771929824562, 'FPR': 0.07873436350257543, 'Specificity': 0.9212656364974245}, 'unharmful': {'TP': 933, 'FP': 121, 'FN': 37, 'TN': 838, 'FNR': 0.03814432989690722, 'FPR': 0.1261730969760167, 'Specificity': 0.8738269030239834}} |
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
### Framework versions
|
| 84 |
+
|
| 85 |
+
- PEFT 0.17.1
|
| 86 |
+
- Transformers 4.53.3
|
| 87 |
+
- Pytorch 2.6.0+cu124
|
| 88 |
+
- Datasets 4.3.0
|
| 89 |
+
- Tokenizers 0.21.4
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9058976
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16120d428e11b203725121c0717568f3084db04cdd8305aaab1d1f4bdcd2186b
|
| 3 |
size 9058976
|
eval_metrics.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"eval_loss": 0.48330193758010864,
|
| 3 |
+
"eval_accuracy": 0.8289269051321928,
|
| 4 |
+
"eval_precision_weighted": 0.821952872098563,
|
| 5 |
+
"eval_recall_weighted": 0.8289269051321928,
|
| 6 |
+
"eval_f1_weighted": 0.8238609492009197,
|
| 7 |
+
"eval_MCC": 0.7203450380787764,
|
| 8 |
+
"eval_balanced_accuracy": 0.7723728109308511,
|
| 9 |
+
"eval_macro_FNR": 0.227627189069149,
|
| 10 |
+
"eval_macro_FPR": 0.08973774534029767,
|
| 11 |
+
"eval_macro_Specificity": 0.9102622546597022,
|
| 12 |
+
"eval_per_class": {
|
| 13 |
+
"jailbreaking": {
|
| 14 |
+
"TP": 228,
|
| 15 |
+
"FP": 103,
|
| 16 |
+
"FN": 161,
|
| 17 |
+
"TN": 1437,
|
| 18 |
+
"FNR": 0.4138817480719794,
|
| 19 |
+
"FPR": 0.06688311688311688,
|
| 20 |
+
"Specificity": 0.9331168831168831
|
| 21 |
+
},
|
| 22 |
+
"prompt injection": {
|
| 23 |
+
"TP": 439,
|
| 24 |
+
"FP": 112,
|
| 25 |
+
"FN": 131,
|
| 26 |
+
"TN": 1247,
|
| 27 |
+
"FNR": 0.22982456140350876,
|
| 28 |
+
"FPR": 0.08241353936718175,
|
| 29 |
+
"Specificity": 0.9175864606328182
|
| 30 |
+
},
|
| 31 |
+
"unharmful": {
|
| 32 |
+
"TP": 932,
|
| 33 |
+
"FP": 115,
|
| 34 |
+
"FN": 38,
|
| 35 |
+
"TN": 844,
|
| 36 |
+
"FNR": 0.03917525773195876,
|
| 37 |
+
"FPR": 0.11991657977059438,
|
| 38 |
+
"Specificity": 0.8800834202294057
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"eval_runtime": 44.2738,
|
| 42 |
+
"eval_samples_per_second": 43.57,
|
| 43 |
+
"eval_steps_per_second": 5.466,
|
| 44 |
+
"epoch": 15.0
|
| 45 |
+
}
|
final_model/README.md
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: TinyLlama/TinyLlama_v1.1
|
| 3 |
+
library_name: peft
|
| 4 |
+
tags:
|
| 5 |
+
- base_model:adapter:TinyLlama/TinyLlama_v1.1
|
| 6 |
+
- lora
|
| 7 |
+
- transformers
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# Model Card for Model ID
|
| 11 |
+
|
| 12 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
## Model Details
|
| 17 |
+
|
| 18 |
+
### Model Description
|
| 19 |
+
|
| 20 |
+
<!-- Provide a longer summary of what this model is. -->
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
- **Developed by:** [More Information Needed]
|
| 25 |
+
- **Funded by [optional]:** [More Information Needed]
|
| 26 |
+
- **Shared by [optional]:** [More Information Needed]
|
| 27 |
+
- **Model type:** [More Information Needed]
|
| 28 |
+
- **Language(s) (NLP):** [More Information Needed]
|
| 29 |
+
- **License:** [More Information Needed]
|
| 30 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
| 31 |
+
|
| 32 |
+
### Model Sources [optional]
|
| 33 |
+
|
| 34 |
+
<!-- Provide the basic links for the model. -->
|
| 35 |
+
|
| 36 |
+
- **Repository:** [More Information Needed]
|
| 37 |
+
- **Paper [optional]:** [More Information Needed]
|
| 38 |
+
- **Demo [optional]:** [More Information Needed]
|
| 39 |
+
|
| 40 |
+
## Uses
|
| 41 |
+
|
| 42 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
| 43 |
+
|
| 44 |
+
### Direct Use
|
| 45 |
+
|
| 46 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
| 47 |
+
|
| 48 |
+
[More Information Needed]
|
| 49 |
+
|
| 50 |
+
### Downstream Use [optional]
|
| 51 |
+
|
| 52 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
| 53 |
+
|
| 54 |
+
[More Information Needed]
|
| 55 |
+
|
| 56 |
+
### Out-of-Scope Use
|
| 57 |
+
|
| 58 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
| 59 |
+
|
| 60 |
+
[More Information Needed]
|
| 61 |
+
|
| 62 |
+
## Bias, Risks, and Limitations
|
| 63 |
+
|
| 64 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
| 65 |
+
|
| 66 |
+
[More Information Needed]
|
| 67 |
+
|
| 68 |
+
### Recommendations
|
| 69 |
+
|
| 70 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
| 71 |
+
|
| 72 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
| 73 |
+
|
| 74 |
+
## How to Get Started with the Model
|
| 75 |
+
|
| 76 |
+
Use the code below to get started with the model.
|
| 77 |
+
|
| 78 |
+
[More Information Needed]
|
| 79 |
+
|
| 80 |
+
## Training Details
|
| 81 |
+
|
| 82 |
+
### Training Data
|
| 83 |
+
|
| 84 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
| 85 |
+
|
| 86 |
+
[More Information Needed]
|
| 87 |
+
|
| 88 |
+
### Training Procedure
|
| 89 |
+
|
| 90 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
| 91 |
+
|
| 92 |
+
#### Preprocessing [optional]
|
| 93 |
+
|
| 94 |
+
[More Information Needed]
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
#### Training Hyperparameters
|
| 98 |
+
|
| 99 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
| 100 |
+
|
| 101 |
+
#### Speeds, Sizes, Times [optional]
|
| 102 |
+
|
| 103 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
| 104 |
+
|
| 105 |
+
[More Information Needed]
|
| 106 |
+
|
| 107 |
+
## Evaluation
|
| 108 |
+
|
| 109 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
| 110 |
+
|
| 111 |
+
### Testing Data, Factors & Metrics
|
| 112 |
+
|
| 113 |
+
#### Testing Data
|
| 114 |
+
|
| 115 |
+
<!-- This should link to a Dataset Card if possible. -->
|
| 116 |
+
|
| 117 |
+
[More Information Needed]
|
| 118 |
+
|
| 119 |
+
#### Factors
|
| 120 |
+
|
| 121 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
| 122 |
+
|
| 123 |
+
[More Information Needed]
|
| 124 |
+
|
| 125 |
+
#### Metrics
|
| 126 |
+
|
| 127 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
| 128 |
+
|
| 129 |
+
[More Information Needed]
|
| 130 |
+
|
| 131 |
+
### Results
|
| 132 |
+
|
| 133 |
+
[More Information Needed]
|
| 134 |
+
|
| 135 |
+
#### Summary
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
## Model Examination [optional]
|
| 140 |
+
|
| 141 |
+
<!-- Relevant interpretability work for the model goes here -->
|
| 142 |
+
|
| 143 |
+
[More Information Needed]
|
| 144 |
+
|
| 145 |
+
## Environmental Impact
|
| 146 |
+
|
| 147 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
| 148 |
+
|
| 149 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
| 150 |
+
|
| 151 |
+
- **Hardware Type:** [More Information Needed]
|
| 152 |
+
- **Hours used:** [More Information Needed]
|
| 153 |
+
- **Cloud Provider:** [More Information Needed]
|
| 154 |
+
- **Compute Region:** [More Information Needed]
|
| 155 |
+
- **Carbon Emitted:** [More Information Needed]
|
| 156 |
+
|
| 157 |
+
## Technical Specifications [optional]
|
| 158 |
+
|
| 159 |
+
### Model Architecture and Objective
|
| 160 |
+
|
| 161 |
+
[More Information Needed]
|
| 162 |
+
|
| 163 |
+
### Compute Infrastructure
|
| 164 |
+
|
| 165 |
+
[More Information Needed]
|
| 166 |
+
|
| 167 |
+
#### Hardware
|
| 168 |
+
|
| 169 |
+
[More Information Needed]
|
| 170 |
+
|
| 171 |
+
#### Software
|
| 172 |
+
|
| 173 |
+
[More Information Needed]
|
| 174 |
+
|
| 175 |
+
## Citation [optional]
|
| 176 |
+
|
| 177 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
| 178 |
+
|
| 179 |
+
**BibTeX:**
|
| 180 |
+
|
| 181 |
+
[More Information Needed]
|
| 182 |
+
|
| 183 |
+
**APA:**
|
| 184 |
+
|
| 185 |
+
[More Information Needed]
|
| 186 |
+
|
| 187 |
+
## Glossary [optional]
|
| 188 |
+
|
| 189 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
| 190 |
+
|
| 191 |
+
[More Information Needed]
|
| 192 |
+
|
| 193 |
+
## More Information [optional]
|
| 194 |
+
|
| 195 |
+
[More Information Needed]
|
| 196 |
+
|
| 197 |
+
## Model Card Authors [optional]
|
| 198 |
+
|
| 199 |
+
[More Information Needed]
|
| 200 |
+
|
| 201 |
+
## Model Card Contact
|
| 202 |
+
|
| 203 |
+
[More Information Needed]
|
| 204 |
+
### Framework versions
|
| 205 |
+
|
| 206 |
+
- PEFT 0.17.1
|
final_model/adapter_config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 16,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.05,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": [
|
| 22 |
+
"classifier",
|
| 23 |
+
"score"
|
| 24 |
+
],
|
| 25 |
+
"peft_type": "LORA",
|
| 26 |
+
"qalora_group_size": 16,
|
| 27 |
+
"r": 8,
|
| 28 |
+
"rank_pattern": {},
|
| 29 |
+
"revision": null,
|
| 30 |
+
"target_modules": [
|
| 31 |
+
"q_proj",
|
| 32 |
+
"o_proj",
|
| 33 |
+
"k_proj",
|
| 34 |
+
"v_proj"
|
| 35 |
+
],
|
| 36 |
+
"target_parameters": null,
|
| 37 |
+
"task_type": "SEQ_CLS",
|
| 38 |
+
"trainable_token_indices": null,
|
| 39 |
+
"use_dora": false,
|
| 40 |
+
"use_qalora": false,
|
| 41 |
+
"use_rslora": false
|
| 42 |
+
}
|
final_model/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16120d428e11b203725121c0717568f3084db04cdd8305aaab1d1f4bdcd2186b
|
| 3 |
+
size 9058976
|
final_model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
final_model/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
final_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"clean_up_tokenization_spaces": false,
|
| 33 |
+
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
+
"legacy": false,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"padding_side": "right",
|
| 39 |
+
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
+
"unk_token": "<unk>",
|
| 42 |
+
"use_default_system_prompt": false
|
| 43 |
+
}
|
overall_metrics.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
eval_loss,eval_accuracy,eval_precision_weighted,eval_recall_weighted,eval_f1_weighted,eval_MCC,eval_balanced_accuracy,eval_macro_FNR,eval_macro_FPR,eval_macro_Specificity,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
|
| 2 |
+
0.48330193758010864,0.8289269051321928,0.821952872098563,0.8289269051321928,0.8238609492009197,0.7203450380787764,0.7723728109308511,0.227627189069149,0.08973774534029767,0.9102622546597022,44.2738,43.57,5.466,15.0
|