LoganResearch commited on
Commit
7d0c263
·
verified ·
1 Parent(s): 63b640e

Upload folder using huggingface_hub

Browse files
combined/multi_head_v2_complete.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ca660754225782c07dbbbdb7e192e6f1b96ee805b451f5e6f8aebfceb9fa67
3
+ size 8496958
heads/hedging_head.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a43d30ab3e87d8e7dc70c62da5ca5b49f54e272713969e87c5f3a742e485871d
3
+ size 24186
heads/repetition_head.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ea75a1a8b408dadc229b464d0e1f131af33a3a974efa523ba9aad2780625fb3
3
+ size 8424206
heads/sycophancy_head.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a410f16a20edcf2d1b5609c74e39bccae4d7ed0c7007b0eb15a39db984ba98e6
3
+ size 24216
heads/verbosity_head.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba1118f564de6f41db58f48a44141cea2800a490e7b9f9646414c713af49dadb
3
+ size 24206
training_logs/hedging_results.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "step": 3000,
4
+ "accuracy": 0.7798140048980713,
5
+ "precision": 0.06394045212277155,
6
+ "recall": 0.6678825110385871,
7
+ "f1": 0.11670776094869087,
8
+ "pos_risk": 0.524723470211029,
9
+ "neg_risk": 0.4489431381225586,
10
+ "separation": 1.1687971719656465
11
+ },
12
+ {
13
+ "step": 4000,
14
+ "accuracy": 0.8607996106147766,
15
+ "precision": 0.08731814842027921,
16
+ "recall": 0.5703589940487618,
17
+ "f1": 0.15145027272263856,
18
+ "pos_risk": 0.5151649713516235,
19
+ "neg_risk": 0.4292229413986206,
20
+ "separation": 1.2002270187911235
21
+ },
22
+ {
23
+ "step": 5000,
24
+ "accuracy": 0.8619410991668701,
25
+ "precision": 0.09372991293168936,
26
+ "recall": 0.6158571702822039,
27
+ "f1": 0.1626981108152656,
28
+ "pos_risk": 0.5229318737983704,
29
+ "neg_risk": 0.42363420128822327,
30
+ "separation": 1.234394843967258
31
+ },
32
+ {
33
+ "step": 6000,
34
+ "accuracy": 0.873987078666687,
35
+ "precision": 0.10097000352146493,
36
+ "recall": 0.6054904972163563,
37
+ "f1": 0.17307797837897163,
38
+ "pos_risk": 0.5275717377662659,
39
+ "neg_risk": 0.4137572944164276,
40
+ "separation": 1.2750753760374536
41
+ },
42
+ {
43
+ "step": 7000,
44
+ "accuracy": 0.9015830159187317,
45
+ "precision": 0.12172369670202667,
46
+ "recall": 0.5661355346515646,
47
+ "f1": 0.20036689767631471,
48
+ "pos_risk": 0.521373987197876,
49
+ "neg_risk": 0.3951682150363922,
50
+ "separation": 1.319372275803764
51
+ },
52
+ {
53
+ "step": 8000,
54
+ "accuracy": 0.8688943982124329,
55
+ "precision": 0.09942703067071115,
56
+ "recall": 0.6229602610865809,
57
+ "f1": 0.1714844369286054,
58
+ "pos_risk": 0.5516535639762878,
59
+ "neg_risk": 0.40235474705696106,
60
+ "separation": 1.3710626456165327
61
+ },
62
+ {
63
+ "step": 9000,
64
+ "accuracy": 0.8865934014320374,
65
+ "precision": 0.11200424929178471,
66
+ "recall": 0.6072182760606643,
67
+ "f1": 0.18912374062004847,
68
+ "pos_risk": 0.5500459671020508,
69
+ "neg_risk": 0.3843628168106079,
70
+ "separation": 1.4310592571525516
71
+ },
72
+ {
73
+ "step": 10000,
74
+ "accuracy": 0.8839634656906128,
75
+ "precision": 0.11537280327589149,
76
+ "recall": 0.6490689191783452,
77
+ "f1": 0.19592049603059628,
78
+ "pos_risk": 0.5594488978385925,
79
+ "neg_risk": 0.37506988644599915,
80
+ "separation": 1.491585749897678
81
+ }
82
+ ]
training_logs/verbosity_results.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "step": 3000,
4
+ "accuracy": 0.9691389203071594,
5
+ "precision": 0.1369258846192842,
6
+ "recall": 0.06012644138729353,
7
+ "f1": 0.08356020294518005,
8
+ "pos_risk": 0.4014969766139984,
9
+ "neg_risk": 0.26297321915626526,
10
+ "separation": 1.52675994119165
11
+ },
12
+ {
13
+ "step": 4000,
14
+ "accuracy": 0.9612593054771423,
15
+ "precision": 0.11997728973650933,
16
+ "recall": 0.10349049463514537,
17
+ "f1": 0.11112571858828028,
18
+ "pos_risk": 0.3859938085079193,
19
+ "neg_risk": 0.21928799152374268,
20
+ "separation": 1.7602140720328825
21
+ },
22
+ {
23
+ "step": 5000,
24
+ "accuracy": 0.8646724820137024,
25
+ "precision": 0.0876178851490621,
26
+ "recall": 0.5081474555896888,
27
+ "f1": 0.14946423485272597,
28
+ "pos_risk": 0.45228344202041626,
29
+ "neg_risk": 0.2378995418548584,
30
+ "separation": 1.9011530602120816
31
+ },
32
+ {
33
+ "step": 6000,
34
+ "accuracy": 0.86240553855896,
35
+ "precision": 0.08743681522381432,
36
+ "recall": 0.5171408218690174,
37
+ "f1": 0.1495825968816301,
38
+ "pos_risk": 0.45121535658836365,
39
+ "neg_risk": 0.22329428791999817,
40
+ "separation": 2.0207205513023467
41
+ },
42
+ {
43
+ "step": 7000,
44
+ "accuracy": 0.8883561491966248,
45
+ "precision": 0.09316823884267353,
46
+ "recall": 0.4318151462535061,
47
+ "f1": 0.1532675426467451,
48
+ "pos_risk": 0.43008705973625183,
49
+ "neg_risk": 0.20528849959373474,
50
+ "separation": 2.0950372796693078
51
+ },
52
+ {
53
+ "step": 8000,
54
+ "accuracy": 0.8657205700874329,
55
+ "precision": 0.08880243245644875,
56
+ "recall": 0.5116646631939806,
57
+ "f1": 0.15133907260785828,
58
+ "pos_risk": 0.44835221767425537,
59
+ "neg_risk": 0.21308068931102753,
60
+ "separation": 2.1041428912397078
61
+ },
62
+ {
63
+ "step": 9000,
64
+ "accuracy": 0.853208601474762,
65
+ "precision": 0.08620888430834804,
66
+ "recall": 0.5493076888829527,
67
+ "f1": 0.1490290104089601,
68
+ "pos_risk": 0.45867228507995605,
69
+ "neg_risk": 0.21553963422775269,
70
+ "separation": 2.128018295675932
71
+ },
72
+ {
73
+ "step": 10000,
74
+ "accuracy": 0.8786846399307251,
75
+ "precision": 0.09251913030283324,
76
+ "recall": 0.4750456346556253,
77
+ "f1": 0.15487504399859206,
78
+ "pos_risk": 0.43932676315307617,
79
+ "neg_risk": 0.20558473467826843,
80
+ "separation": 2.1369619871854995
81
+ }
82
+ ]