Spaces:
Running
on
Zero
Running
on
Zero
Upload 8 files
Browse files- data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/checkpoint.pt +3 -0
- data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/config.json +29 -0
- data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/config_train.json +53 -0
- data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/checkpoint.pt +3 -0
- data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/config.json +166 -0
- data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/config_train.json +53 -0
- data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/config.json +8 -0
- data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/stoke_config.json +10 -0
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/checkpoint.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8ed0c5f360111a0cf03c113e844eb99d5a25966859f2b746cdfecb0641c1b34
|
| 3 |
+
size 8439912
|
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "meta-llama/Llama-3.2-1B-Instruct",
|
| 3 |
+
"type": "span_classifier",
|
| 4 |
+
"label_map": [
|
| 5 |
+
"no_span",
|
| 6 |
+
"span"
|
| 7 |
+
],
|
| 8 |
+
"learning_rate": 0.001,
|
| 9 |
+
"classifier_dim": 4096,
|
| 10 |
+
"loss_weights": [
|
| 11 |
+
1.0,
|
| 12 |
+
50.0
|
| 13 |
+
],
|
| 14 |
+
"identifier": "B7Ogkftne6",
|
| 15 |
+
"best_f1_validation": 0.5553668737411499,
|
| 16 |
+
"best_f1_validation_classwise": {
|
| 17 |
+
"span": {
|
| 18 |
+
"p": 0.3978736698627472,
|
| 19 |
+
"r": 0.9192339777946472,
|
| 20 |
+
"f": 0.5553668737411499,
|
| 21 |
+
"s": 4804.0
|
| 22 |
+
},
|
| 23 |
+
"macro": {
|
| 24 |
+
"p": 0.3978736698627472,
|
| 25 |
+
"r": 0.9192339777946472,
|
| 26 |
+
"f": 0.5553668737411499
|
| 27 |
+
}
|
| 28 |
+
}
|
| 29 |
+
}
|
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/config_train.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"path": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa",
|
| 3 |
+
"splits": [
|
| 4 |
+
"train",
|
| 5 |
+
"validation"
|
| 6 |
+
],
|
| 7 |
+
"layers": [
|
| 8 |
+
8,
|
| 9 |
+
9,
|
| 10 |
+
10,
|
| 11 |
+
11,
|
| 12 |
+
12
|
| 13 |
+
],
|
| 14 |
+
"hfcache": "",
|
| 15 |
+
"classifier_dims": [
|
| 16 |
+
4096
|
| 17 |
+
],
|
| 18 |
+
"learning_rates": [
|
| 19 |
+
0.0001,
|
| 20 |
+
5e-05,
|
| 21 |
+
0.0003,
|
| 22 |
+
0.0005,
|
| 23 |
+
0.001
|
| 24 |
+
],
|
| 25 |
+
"cuda": true,
|
| 26 |
+
"n_steps_per_epoch": 500,
|
| 27 |
+
"n_epochs": 30,
|
| 28 |
+
"batch_size": 4,
|
| 29 |
+
"balance_loss": false,
|
| 30 |
+
"loss_weights_span": [
|
| 31 |
+
[
|
| 32 |
+
1.0,
|
| 33 |
+
1.0
|
| 34 |
+
],
|
| 35 |
+
[
|
| 36 |
+
1.0,
|
| 37 |
+
50.0
|
| 38 |
+
],
|
| 39 |
+
[
|
| 40 |
+
1.0,
|
| 41 |
+
100.0
|
| 42 |
+
]
|
| 43 |
+
],
|
| 44 |
+
"time": 1728553144.1331656,
|
| 45 |
+
"config_dataset": {
|
| 46 |
+
"generation_kwargs": {
|
| 47 |
+
"max_new_tokens": 500,
|
| 48 |
+
"repetition_penalty": 1.2
|
| 49 |
+
},
|
| 50 |
+
"model_id": "meta-llama/Llama-3.2-1B-Instruct",
|
| 51 |
+
"flair_model_name": "flair/ner-english-ontonotes-large"
|
| 52 |
+
}
|
| 53 |
+
}
|
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/checkpoint.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95f1f6318f661446e7ff38af3e31293ba1f0fc88ed94aee0a8c47ceaf6ec2b1c
|
| 3 |
+
size 33884328
|
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/config.json
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"layer": 10,
|
| 3 |
+
"model": "meta-llama/Llama-3.2-1B-Instruct",
|
| 4 |
+
"type": "token_classifier",
|
| 5 |
+
"label_map": [
|
| 6 |
+
"O",
|
| 7 |
+
"CARDINAL",
|
| 8 |
+
"DATE",
|
| 9 |
+
"EVENT",
|
| 10 |
+
"FAC",
|
| 11 |
+
"GPE",
|
| 12 |
+
"LANGUAGE",
|
| 13 |
+
"LAW",
|
| 14 |
+
"LOC",
|
| 15 |
+
"MONEY",
|
| 16 |
+
"NORP",
|
| 17 |
+
"ORDINAL",
|
| 18 |
+
"ORG",
|
| 19 |
+
"PERCENT",
|
| 20 |
+
"PERSON",
|
| 21 |
+
"PRODUCT",
|
| 22 |
+
"QUANTITY",
|
| 23 |
+
"TIME",
|
| 24 |
+
"WORK_OF_ART"
|
| 25 |
+
],
|
| 26 |
+
"learning_rate": 0.001,
|
| 27 |
+
"classifier_dim": 4096,
|
| 28 |
+
"loss_weights": [
|
| 29 |
+
1.0,
|
| 30 |
+
1.0,
|
| 31 |
+
1.0,
|
| 32 |
+
1.0,
|
| 33 |
+
1.0,
|
| 34 |
+
1.0,
|
| 35 |
+
1.0,
|
| 36 |
+
1.0,
|
| 37 |
+
1.0,
|
| 38 |
+
1.0,
|
| 39 |
+
1.0,
|
| 40 |
+
1.0,
|
| 41 |
+
1.0,
|
| 42 |
+
1.0,
|
| 43 |
+
1.0,
|
| 44 |
+
1.0,
|
| 45 |
+
1.0,
|
| 46 |
+
1.0,
|
| 47 |
+
1.0
|
| 48 |
+
],
|
| 49 |
+
"identifier": "ShBuzLjzh9",
|
| 50 |
+
"best_f1_validation": 0.8960928916931152,
|
| 51 |
+
"best_f1_validation_classwise": {
|
| 52 |
+
"CARDINAL": {
|
| 53 |
+
"p": 0.8828781247138977,
|
| 54 |
+
"r": 0.9042496085166931,
|
| 55 |
+
"f": 0.8934361338615417,
|
| 56 |
+
"s": 1859.0
|
| 57 |
+
},
|
| 58 |
+
"DATE": {
|
| 59 |
+
"p": 0.9753617644309998,
|
| 60 |
+
"r": 0.9696733951568604,
|
| 61 |
+
"f": 0.9725092649459839,
|
| 62 |
+
"s": 5144.0
|
| 63 |
+
},
|
| 64 |
+
"EVENT": {
|
| 65 |
+
"p": 0.8030303120613098,
|
| 66 |
+
"r": 0.7194570302963257,
|
| 67 |
+
"f": 0.758949875831604,
|
| 68 |
+
"s": 221.0
|
| 69 |
+
},
|
| 70 |
+
"FAC": {
|
| 71 |
+
"p": 0.7110481858253479,
|
| 72 |
+
"r": 0.7011173367500305,
|
| 73 |
+
"f": 0.7060478329658508,
|
| 74 |
+
"s": 358.0
|
| 75 |
+
},
|
| 76 |
+
"GPE": {
|
| 77 |
+
"p": 0.8975331783294678,
|
| 78 |
+
"r": 0.9113680124282837,
|
| 79 |
+
"f": 0.9043976664543152,
|
| 80 |
+
"s": 1038.0
|
| 81 |
+
},
|
| 82 |
+
"LANGUAGE": {
|
| 83 |
+
"p": 0.9354838728904724,
|
| 84 |
+
"r": 0.8787878751754761,
|
| 85 |
+
"f": 0.90625,
|
| 86 |
+
"s": 66.0
|
| 87 |
+
},
|
| 88 |
+
"LAW": {
|
| 89 |
+
"p": 0.8544303774833679,
|
| 90 |
+
"r": 0.7714285850524902,
|
| 91 |
+
"f": 0.8108107447624207,
|
| 92 |
+
"s": 175.0
|
| 93 |
+
},
|
| 94 |
+
"LOC": {
|
| 95 |
+
"p": 0.6557376980781555,
|
| 96 |
+
"r": 0.6425702571868896,
|
| 97 |
+
"f": 0.6490872502326965,
|
| 98 |
+
"s": 249.0
|
| 99 |
+
},
|
| 100 |
+
"MONEY": {
|
| 101 |
+
"p": 0.9277108311653137,
|
| 102 |
+
"r": 0.9277108311653137,
|
| 103 |
+
"f": 0.9277108311653137,
|
| 104 |
+
"s": 166.0
|
| 105 |
+
},
|
| 106 |
+
"NORP": {
|
| 107 |
+
"p": 0.8764045238494873,
|
| 108 |
+
"r": 0.7852349281311035,
|
| 109 |
+
"f": 0.8283185958862305,
|
| 110 |
+
"s": 298.0
|
| 111 |
+
},
|
| 112 |
+
"ORDINAL": {
|
| 113 |
+
"p": 0.8032786846160889,
|
| 114 |
+
"r": 0.8909090757369995,
|
| 115 |
+
"f": 0.8448275923728943,
|
| 116 |
+
"s": 55.0
|
| 117 |
+
},
|
| 118 |
+
"ORG": {
|
| 119 |
+
"p": 0.8269370198249817,
|
| 120 |
+
"r": 0.8305454254150391,
|
| 121 |
+
"f": 0.8287373185157776,
|
| 122 |
+
"s": 1375.0
|
| 123 |
+
},
|
| 124 |
+
"PERCENT": {
|
| 125 |
+
"p": 0.939130425453186,
|
| 126 |
+
"r": 0.7248322367668152,
|
| 127 |
+
"f": 0.8181818127632141,
|
| 128 |
+
"s": 149.0
|
| 129 |
+
},
|
| 130 |
+
"PERSON": {
|
| 131 |
+
"p": 0.9401294589042664,
|
| 132 |
+
"r": 0.9535011053085327,
|
| 133 |
+
"f": 0.9467681050300598,
|
| 134 |
+
"s": 1828.0
|
| 135 |
+
},
|
| 136 |
+
"PRODUCT": {
|
| 137 |
+
"p": 0.7434554696083069,
|
| 138 |
+
"r": 0.4610389471054077,
|
| 139 |
+
"f": 0.56913822889328,
|
| 140 |
+
"s": 308.0
|
| 141 |
+
},
|
| 142 |
+
"QUANTITY": {
|
| 143 |
+
"p": 0.8620689511299133,
|
| 144 |
+
"r": 0.8796296119689941,
|
| 145 |
+
"f": 0.8707607984542847,
|
| 146 |
+
"s": 540.0
|
| 147 |
+
},
|
| 148 |
+
"TIME": {
|
| 149 |
+
"p": 0.6315789222717285,
|
| 150 |
+
"r": 0.4285714328289032,
|
| 151 |
+
"f": 0.5106382369995117,
|
| 152 |
+
"s": 56.0
|
| 153 |
+
},
|
| 154 |
+
"WORK_OF_ART": {
|
| 155 |
+
"p": 0.8444194197654724,
|
| 156 |
+
"r": 0.8221734166145325,
|
| 157 |
+
"f": 0.8331480026245117,
|
| 158 |
+
"s": 911.0
|
| 159 |
+
},
|
| 160 |
+
"macro": {
|
| 161 |
+
"p": 0.8394787311553955,
|
| 162 |
+
"r": 0.7890443801879883,
|
| 163 |
+
"f": 0.8099843859672546
|
| 164 |
+
}
|
| 165 |
+
}
|
| 166 |
+
}
|
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/config_train.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"path": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa",
|
| 3 |
+
"splits": [
|
| 4 |
+
"train",
|
| 5 |
+
"validation"
|
| 6 |
+
],
|
| 7 |
+
"layers": [
|
| 8 |
+
8,
|
| 9 |
+
9,
|
| 10 |
+
10,
|
| 11 |
+
11,
|
| 12 |
+
12
|
| 13 |
+
],
|
| 14 |
+
"hfcache": "",
|
| 15 |
+
"classifier_dims": [
|
| 16 |
+
4096
|
| 17 |
+
],
|
| 18 |
+
"learning_rates": [
|
| 19 |
+
0.0001,
|
| 20 |
+
5e-05,
|
| 21 |
+
0.0003,
|
| 22 |
+
0.0005,
|
| 23 |
+
0.001
|
| 24 |
+
],
|
| 25 |
+
"cuda": true,
|
| 26 |
+
"n_steps_per_epoch": 500,
|
| 27 |
+
"n_epochs": 30,
|
| 28 |
+
"batch_size": 4,
|
| 29 |
+
"balance_loss": false,
|
| 30 |
+
"loss_weights_span": [
|
| 31 |
+
[
|
| 32 |
+
1.0,
|
| 33 |
+
1.0
|
| 34 |
+
],
|
| 35 |
+
[
|
| 36 |
+
1.0,
|
| 37 |
+
50.0
|
| 38 |
+
],
|
| 39 |
+
[
|
| 40 |
+
1.0,
|
| 41 |
+
100.0
|
| 42 |
+
]
|
| 43 |
+
],
|
| 44 |
+
"time": 1728553144.1331656,
|
| 45 |
+
"config_dataset": {
|
| 46 |
+
"generation_kwargs": {
|
| 47 |
+
"max_new_tokens": 500,
|
| 48 |
+
"repetition_penalty": 1.2
|
| 49 |
+
},
|
| 50 |
+
"model_id": "meta-llama/Llama-3.2-1B-Instruct",
|
| 51 |
+
"flair_model_name": "flair/ner-english-ontonotes-large"
|
| 52 |
+
}
|
| 53 |
+
}
|
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/config.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"generation_kwargs": {
|
| 3 |
+
"max_new_tokens": 500,
|
| 4 |
+
"repetition_penalty": 1.2
|
| 5 |
+
},
|
| 6 |
+
"model_id": "meta-llama/Llama-3.2-1B-Instruct",
|
| 7 |
+
"flair_model_name": "flair/ner-english-ontonotes-large"
|
| 8 |
+
}
|
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/stoke_config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"default": {
|
| 3 |
+
"classifier_token": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9",
|
| 4 |
+
"classifier_span": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6"
|
| 5 |
+
},
|
| 6 |
+
"basic": {
|
| 7 |
+
"classifier_token": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9",
|
| 8 |
+
"classifier_span": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/lx17UmlUbN"
|
| 9 |
+
}
|
| 10 |
+
}
|