Upload folder using huggingface_hub
Browse files- combined/multi_head_v2_complete.pt +3 -0
- heads/hedging_head.pt +3 -0
- heads/repetition_head.pt +3 -0
- heads/sycophancy_head.pt +3 -0
- heads/verbosity_head.pt +3 -0
- training_logs/hedging_results.json +82 -0
- training_logs/verbosity_results.json +82 -0
combined/multi_head_v2_complete.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80ca660754225782c07dbbbdb7e192e6f1b96ee805b451f5e6f8aebfceb9fa67
|
| 3 |
+
size 8496958
|
heads/hedging_head.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a43d30ab3e87d8e7dc70c62da5ca5b49f54e272713969e87c5f3a742e485871d
|
| 3 |
+
size 24186
|
heads/repetition_head.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ea75a1a8b408dadc229b464d0e1f131af33a3a974efa523ba9aad2780625fb3
|
| 3 |
+
size 8424206
|
heads/sycophancy_head.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a410f16a20edcf2d1b5609c74e39bccae4d7ed0c7007b0eb15a39db984ba98e6
|
| 3 |
+
size 24216
|
heads/verbosity_head.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba1118f564de6f41db58f48a44141cea2800a490e7b9f9646414c713af49dadb
|
| 3 |
+
size 24206
|
training_logs/hedging_results.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 3000,
|
| 4 |
+
"accuracy": 0.7798140048980713,
|
| 5 |
+
"precision": 0.06394045212277155,
|
| 6 |
+
"recall": 0.6678825110385871,
|
| 7 |
+
"f1": 0.11670776094869087,
|
| 8 |
+
"pos_risk": 0.524723470211029,
|
| 9 |
+
"neg_risk": 0.4489431381225586,
|
| 10 |
+
"separation": 1.1687971719656465
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"step": 4000,
|
| 14 |
+
"accuracy": 0.8607996106147766,
|
| 15 |
+
"precision": 0.08731814842027921,
|
| 16 |
+
"recall": 0.5703589940487618,
|
| 17 |
+
"f1": 0.15145027272263856,
|
| 18 |
+
"pos_risk": 0.5151649713516235,
|
| 19 |
+
"neg_risk": 0.4292229413986206,
|
| 20 |
+
"separation": 1.2002270187911235
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"step": 5000,
|
| 24 |
+
"accuracy": 0.8619410991668701,
|
| 25 |
+
"precision": 0.09372991293168936,
|
| 26 |
+
"recall": 0.6158571702822039,
|
| 27 |
+
"f1": 0.1626981108152656,
|
| 28 |
+
"pos_risk": 0.5229318737983704,
|
| 29 |
+
"neg_risk": 0.42363420128822327,
|
| 30 |
+
"separation": 1.234394843967258
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"step": 6000,
|
| 34 |
+
"accuracy": 0.873987078666687,
|
| 35 |
+
"precision": 0.10097000352146493,
|
| 36 |
+
"recall": 0.6054904972163563,
|
| 37 |
+
"f1": 0.17307797837897163,
|
| 38 |
+
"pos_risk": 0.5275717377662659,
|
| 39 |
+
"neg_risk": 0.4137572944164276,
|
| 40 |
+
"separation": 1.2750753760374536
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"step": 7000,
|
| 44 |
+
"accuracy": 0.9015830159187317,
|
| 45 |
+
"precision": 0.12172369670202667,
|
| 46 |
+
"recall": 0.5661355346515646,
|
| 47 |
+
"f1": 0.20036689767631471,
|
| 48 |
+
"pos_risk": 0.521373987197876,
|
| 49 |
+
"neg_risk": 0.3951682150363922,
|
| 50 |
+
"separation": 1.319372275803764
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"step": 8000,
|
| 54 |
+
"accuracy": 0.8688943982124329,
|
| 55 |
+
"precision": 0.09942703067071115,
|
| 56 |
+
"recall": 0.6229602610865809,
|
| 57 |
+
"f1": 0.1714844369286054,
|
| 58 |
+
"pos_risk": 0.5516535639762878,
|
| 59 |
+
"neg_risk": 0.40235474705696106,
|
| 60 |
+
"separation": 1.3710626456165327
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"step": 9000,
|
| 64 |
+
"accuracy": 0.8865934014320374,
|
| 65 |
+
"precision": 0.11200424929178471,
|
| 66 |
+
"recall": 0.6072182760606643,
|
| 67 |
+
"f1": 0.18912374062004847,
|
| 68 |
+
"pos_risk": 0.5500459671020508,
|
| 69 |
+
"neg_risk": 0.3843628168106079,
|
| 70 |
+
"separation": 1.4310592571525516
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"step": 10000,
|
| 74 |
+
"accuracy": 0.8839634656906128,
|
| 75 |
+
"precision": 0.11537280327589149,
|
| 76 |
+
"recall": 0.6490689191783452,
|
| 77 |
+
"f1": 0.19592049603059628,
|
| 78 |
+
"pos_risk": 0.5594488978385925,
|
| 79 |
+
"neg_risk": 0.37506988644599915,
|
| 80 |
+
"separation": 1.491585749897678
|
| 81 |
+
}
|
| 82 |
+
]
|
training_logs/verbosity_results.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 3000,
|
| 4 |
+
"accuracy": 0.9691389203071594,
|
| 5 |
+
"precision": 0.1369258846192842,
|
| 6 |
+
"recall": 0.06012644138729353,
|
| 7 |
+
"f1": 0.08356020294518005,
|
| 8 |
+
"pos_risk": 0.4014969766139984,
|
| 9 |
+
"neg_risk": 0.26297321915626526,
|
| 10 |
+
"separation": 1.52675994119165
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"step": 4000,
|
| 14 |
+
"accuracy": 0.9612593054771423,
|
| 15 |
+
"precision": 0.11997728973650933,
|
| 16 |
+
"recall": 0.10349049463514537,
|
| 17 |
+
"f1": 0.11112571858828028,
|
| 18 |
+
"pos_risk": 0.3859938085079193,
|
| 19 |
+
"neg_risk": 0.21928799152374268,
|
| 20 |
+
"separation": 1.7602140720328825
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"step": 5000,
|
| 24 |
+
"accuracy": 0.8646724820137024,
|
| 25 |
+
"precision": 0.0876178851490621,
|
| 26 |
+
"recall": 0.5081474555896888,
|
| 27 |
+
"f1": 0.14946423485272597,
|
| 28 |
+
"pos_risk": 0.45228344202041626,
|
| 29 |
+
"neg_risk": 0.2378995418548584,
|
| 30 |
+
"separation": 1.9011530602120816
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"step": 6000,
|
| 34 |
+
"accuracy": 0.86240553855896,
|
| 35 |
+
"precision": 0.08743681522381432,
|
| 36 |
+
"recall": 0.5171408218690174,
|
| 37 |
+
"f1": 0.1495825968816301,
|
| 38 |
+
"pos_risk": 0.45121535658836365,
|
| 39 |
+
"neg_risk": 0.22329428791999817,
|
| 40 |
+
"separation": 2.0207205513023467
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"step": 7000,
|
| 44 |
+
"accuracy": 0.8883561491966248,
|
| 45 |
+
"precision": 0.09316823884267353,
|
| 46 |
+
"recall": 0.4318151462535061,
|
| 47 |
+
"f1": 0.1532675426467451,
|
| 48 |
+
"pos_risk": 0.43008705973625183,
|
| 49 |
+
"neg_risk": 0.20528849959373474,
|
| 50 |
+
"separation": 2.0950372796693078
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"step": 8000,
|
| 54 |
+
"accuracy": 0.8657205700874329,
|
| 55 |
+
"precision": 0.08880243245644875,
|
| 56 |
+
"recall": 0.5116646631939806,
|
| 57 |
+
"f1": 0.15133907260785828,
|
| 58 |
+
"pos_risk": 0.44835221767425537,
|
| 59 |
+
"neg_risk": 0.21308068931102753,
|
| 60 |
+
"separation": 2.1041428912397078
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"step": 9000,
|
| 64 |
+
"accuracy": 0.853208601474762,
|
| 65 |
+
"precision": 0.08620888430834804,
|
| 66 |
+
"recall": 0.5493076888829527,
|
| 67 |
+
"f1": 0.1490290104089601,
|
| 68 |
+
"pos_risk": 0.45867228507995605,
|
| 69 |
+
"neg_risk": 0.21553963422775269,
|
| 70 |
+
"separation": 2.128018295675932
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"step": 10000,
|
| 74 |
+
"accuracy": 0.8786846399307251,
|
| 75 |
+
"precision": 0.09251913030283324,
|
| 76 |
+
"recall": 0.4750456346556253,
|
| 77 |
+
"f1": 0.15487504399859206,
|
| 78 |
+
"pos_risk": 0.43932676315307617,
|
| 79 |
+
"neg_risk": 0.20558473467826843,
|
| 80 |
+
"separation": 2.1369619871854995
|
| 81 |
+
}
|
| 82 |
+
]
|