Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator
#18
by autoevaluator HF Staff - opened
README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
---
|
| 2 |
language: en
|
| 3 |
-
|
| 4 |
tags:
|
| 5 |
- text-generation
|
| 6 |
-
|
| 7 |
commercial: false
|
| 8 |
model-index:
|
| 9 |
- name: inverse-scaling/opt-350m_eval
|
|
@@ -17,14 +17,16 @@ model-index:
|
|
| 17 |
config: inverse-scaling--NeQA
|
| 18 |
split: train
|
| 19 |
metrics:
|
| 20 |
-
-
|
| 21 |
-
type: accuracy
|
| 22 |
value: 0.4666666666666667
|
|
|
|
| 23 |
verified: true
|
| 24 |
-
|
| 25 |
-
|
| 26 |
value: 0.9192380222864449
|
|
|
|
| 27 |
verified: true
|
|
|
|
| 28 |
- task:
|
| 29 |
type: zero-shot-classification
|
| 30 |
name: Zero-Shot Text Classification
|
|
@@ -34,14 +36,16 @@ model-index:
|
|
| 34 |
config: inverse-scaling--quote-repetition
|
| 35 |
split: train
|
| 36 |
metrics:
|
| 37 |
-
-
|
| 38 |
-
type: accuracy
|
| 39 |
value: 0.9633333333333334
|
|
|
|
| 40 |
verified: true
|
| 41 |
-
|
| 42 |
-
|
| 43 |
value: 0.03444786100047819
|
|
|
|
| 44 |
verified: true
|
|
|
|
| 45 |
- task:
|
| 46 |
type: zero-shot-classification
|
| 47 |
name: Zero-Shot Text Classification
|
|
@@ -51,14 +55,16 @@ model-index:
|
|
| 51 |
config: inverse-scaling--redefine-math
|
| 52 |
split: train
|
| 53 |
metrics:
|
| 54 |
-
-
|
| 55 |
-
type: accuracy
|
| 56 |
value: 0.6877777777777778
|
|
|
|
| 57 |
verified: true
|
| 58 |
-
|
| 59 |
-
|
| 60 |
value: 0.6016371671193176
|
|
|
|
| 61 |
verified: true
|
|
|
|
| 62 |
- task:
|
| 63 |
type: zero-shot-classification
|
| 64 |
name: Zero-Shot Text Classification
|
|
@@ -68,14 +74,16 @@ model-index:
|
|
| 68 |
config: inverse-scaling--hindsight-neglect-10shot
|
| 69 |
split: train
|
| 70 |
metrics:
|
| 71 |
-
-
|
| 72 |
-
type: accuracy
|
| 73 |
value: 0.4380952380952381
|
|
|
|
| 74 |
verified: true
|
| 75 |
-
|
| 76 |
-
|
| 77 |
value: 0.8774787804555325
|
|
|
|
| 78 |
verified: true
|
|
|
|
| 79 |
- task:
|
| 80 |
type: zero-shot-classification
|
| 81 |
name: Zero-Shot Text Classification
|
|
@@ -85,14 +93,16 @@ model-index:
|
|
| 85 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
| 86 |
split: test
|
| 87 |
metrics:
|
| 88 |
-
-
|
| 89 |
-
type: accuracy
|
| 90 |
value: 0.44660194174757284
|
|
|
|
| 91 |
verified: true
|
| 92 |
-
|
| 93 |
-
|
| 94 |
value: 0.9301078982717057
|
|
|
|
| 95 |
verified: true
|
|
|
|
| 96 |
- task:
|
| 97 |
type: zero-shot-classification
|
| 98 |
name: Zero-Shot Text Classification
|
|
@@ -102,14 +112,16 @@ model-index:
|
|
| 102 |
config: mathemakitten--winobias_antistereotype_test_v5
|
| 103 |
split: test
|
| 104 |
metrics:
|
| 105 |
-
-
|
| 106 |
-
type: accuracy
|
| 107 |
value: 0.4368932038834951
|
|
|
|
| 108 |
verified: true
|
| 109 |
-
|
| 110 |
-
|
| 111 |
value: 0.9175132444057151
|
|
|
|
| 112 |
verified: true
|
|
|
|
| 113 |
---
|
| 114 |
|
| 115 |
|
|
|
|
| 1 |
---
|
| 2 |
language: en
|
| 3 |
+
license: other
|
| 4 |
tags:
|
| 5 |
- text-generation
|
| 6 |
+
inference: false
|
| 7 |
commercial: false
|
| 8 |
model-index:
|
| 9 |
- name: inverse-scaling/opt-350m_eval
|
|
|
|
| 17 |
config: inverse-scaling--NeQA
|
| 18 |
split: train
|
| 19 |
metrics:
|
| 20 |
+
- type: accuracy
|
|
|
|
| 21 |
value: 0.4666666666666667
|
| 22 |
+
name: Accuracy
|
| 23 |
verified: true
|
| 24 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzczZDk5ODcxOWEyYTg2NDc5MzUxYTUzY2IxMzEzYTQ4Mjc2NjY1YzZkNDNmODg1Y2JhMzEzNzNiNDE0MzVlMCIsInZlcnNpb24iOjF9._V6n5pjfCnCFhUIN5rOfSj4enIrb3uo7hDBgnwUsnVxJ2vUWdZiSXR29_ZtGBlJ8b78gfEVQPr9JkZ2vWH-kDw
|
| 25 |
+
- type: loss
|
| 26 |
value: 0.9192380222864449
|
| 27 |
+
name: Loss
|
| 28 |
verified: true
|
| 29 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWMyMzFmNjM4MDhiMGRiM2FlOTU2NjJmY2FiNmMyYTFjZGM5MjMyNzU1MTcyYjhhOGI3MjQ1N2ZkZWNhNTNjOCIsInZlcnNpb24iOjF9.zCtWmvufSPCQpO28PXuyd4tuA_m1WjoNKivlxMW9Z8BgFmhvTObC9FtbS0kkJ6hS9wS2NHLi8-gHyQqjCuCJAA
|
| 30 |
- task:
|
| 31 |
type: zero-shot-classification
|
| 32 |
name: Zero-Shot Text Classification
|
|
|
|
| 36 |
config: inverse-scaling--quote-repetition
|
| 37 |
split: train
|
| 38 |
metrics:
|
| 39 |
+
- type: accuracy
|
|
|
|
| 40 |
value: 0.9633333333333334
|
| 41 |
+
name: Accuracy
|
| 42 |
verified: true
|
| 43 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYWQ4NGY0N2UzZTI0MDRkYjQwODhjNWJhMjg3OTExYzI2NmVkMGVmMWJjYjMxNDZiZTYwZDdmMzhhMTJiNTM5ZCIsInZlcnNpb24iOjF9.aLrG02yUjaEbIoarFb13RKohrd2v9EhjefJ8Hp8RbK7cFtgZSbbybZ4q3_tmZEjZW96CCeHTldVjiuCfKM36CQ
|
| 44 |
+
- type: loss
|
| 45 |
value: 0.03444786100047819
|
| 46 |
+
name: Loss
|
| 47 |
verified: true
|
| 48 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNzgwODhkYzcyNTZhN2MxOTVhYmQ3YjdhZTM3ZTZhNzRhNDRlMzBjNzJiNTllNWU4MWM4M2E0NzljMjViOTUwNiIsInZlcnNpb24iOjF9.raav8NSrkoH1d7veZGaQxapvVB9J7s9E5dPqyMkZ2dWxWHoqWCbT1Rwt_FpTbkd8g2qSlnQBGF94W1Mo_tzPAw
|
| 49 |
- task:
|
| 50 |
type: zero-shot-classification
|
| 51 |
name: Zero-Shot Text Classification
|
|
|
|
| 55 |
config: inverse-scaling--redefine-math
|
| 56 |
split: train
|
| 57 |
metrics:
|
| 58 |
+
- type: accuracy
|
|
|
|
| 59 |
value: 0.6877777777777778
|
| 60 |
+
name: Accuracy
|
| 61 |
verified: true
|
| 62 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTVkNTE3NTZjYTUzYzViMGUzYjhlMGNjMjZlNDE4MGE5N2NmZDQyYWQzOTg1N2JmMTY1ODg0Y2UyYzYxNzQ3NCIsInZlcnNpb24iOjF9.Z8xYedPo4bCpRjO7soiqpoQX_JusfqLtDlUFl5rug7n-9BDPy8EQyCm37bKBAge0SosQQxMaPv04Q_doUhVlAw
|
| 63 |
+
- type: loss
|
| 64 |
value: 0.6016371671193176
|
| 65 |
+
name: Loss
|
| 66 |
verified: true
|
| 67 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzQ1YmY4ODlmZjJlNTY1NWQ3ZTQ0MjM5MjBkYjg2MjZiNWEzODYzNDcwOWQ1ZWU3MmY2MjRjYmQ2ZDQ1NDk0NSIsInZlcnNpb24iOjF9.Rr-dawBCi_eof82m2928wQXvEWiyeuFGf2Zpk259vkmDI6Fkn3Pz_3bNzoGNNOUVoOKhDc9cUYjBE11tIv9tBA
|
| 68 |
- task:
|
| 69 |
type: zero-shot-classification
|
| 70 |
name: Zero-Shot Text Classification
|
|
|
|
| 74 |
config: inverse-scaling--hindsight-neglect-10shot
|
| 75 |
split: train
|
| 76 |
metrics:
|
| 77 |
+
- type: accuracy
|
|
|
|
| 78 |
value: 0.4380952380952381
|
| 79 |
+
name: Accuracy
|
| 80 |
verified: true
|
| 81 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDFmMWRmNjQyMWJkNzgxOTNkNDBiYjA0MjQ1NjE2NzM0ZjQ0Mzg0ZjkyYzlhMTdjMDZhNWY0NjU2NTAzYmFmYSIsInZlcnNpb24iOjF9.5VZ9gq1ldypfzpPYP3_Wv64rDlVO3jlJrnxK28qXDTcaHCcvF4YtYNry5ud8y9T9L1YrTVMaaPqLafavOHHlDQ
|
| 82 |
+
- type: loss
|
| 83 |
value: 0.8774787804555325
|
| 84 |
+
name: Loss
|
| 85 |
verified: true
|
| 86 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTU0YTY5ZWIxMzRhNzRlZDliMDNlZjcyM2FjOGRkZjVlNWNiM2I0MWNlZTFiNzdhMTUxMDU4YTY4YmJhZWU2NSIsInZlcnNpb24iOjF9.MletkNEfPZm3q3WPT3T7D_tO-zGxQj_opy9IPgPJmTxVpGRnZePdXl47U4LiWHPw4BrCrExIsPeBpeeJR9ZNBQ
|
| 87 |
- task:
|
| 88 |
type: zero-shot-classification
|
| 89 |
name: Zero-Shot Text Classification
|
|
|
|
| 93 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
| 94 |
split: test
|
| 95 |
metrics:
|
| 96 |
+
- type: accuracy
|
|
|
|
| 97 |
value: 0.44660194174757284
|
| 98 |
+
name: Accuracy
|
| 99 |
verified: true
|
| 100 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjhkNzNlNjliYTk2NzhjNjkxNDcxNjFjZGY1N2VjNzMyN2JlNDgwNDU4YTQ3NGQ5NWQ3ZTJiMjU2MWRiYzI4MiIsInZlcnNpb24iOjF9.Ln1Bi_uUuFTmq-qBfrd7qcD_29fXC_5FTH5aenCuqmZ8TK_akoUbTxIj39FTxfFUmJtxnFgiyCcolTIOB9vgCA
|
| 101 |
+
- type: loss
|
| 102 |
value: 0.9301078982717057
|
| 103 |
+
name: Loss
|
| 104 |
verified: true
|
| 105 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMGUzNWRkN2RlMjA0NzNhMDhmZWMyOWVmYzk5YTZkZTYyMmRjYWRlNzMyOWYyMGYxYmI0MWJmYWNlYWFhOTliNyIsInZlcnNpb24iOjF9.m-Vjvm6yNYiShP08VEdT-XSVDUpC0Ko96F30YNtg047LE_Mx7UJ3bCSo1MnnGqQ6FIS1j4B2H1guJIvLyRMSAg
|
| 106 |
- task:
|
| 107 |
type: zero-shot-classification
|
| 108 |
name: Zero-Shot Text Classification
|
|
|
|
| 112 |
config: mathemakitten--winobias_antistereotype_test_v5
|
| 113 |
split: test
|
| 114 |
metrics:
|
| 115 |
+
- type: accuracy
|
|
|
|
| 116 |
value: 0.4368932038834951
|
| 117 |
+
name: Accuracy
|
| 118 |
verified: true
|
| 119 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYjIyNmE2YjJhOThmNzFlZjE4ZjlkOTY4NzhjMTMyYmFiM2ExNDIwYzRjMGM4NDRiNzk4ZWIzMGNiMzIwYzA0NyIsInZlcnNpb24iOjF9.4iGtnHIrNkvivgWcihLTftRGZiHfBc2-UefBbX8st55HPXemb7A6IYKic96VN8bTBumEcb0PrSMYoSUsP6UFCQ
|
| 120 |
+
- type: loss
|
| 121 |
value: 0.9175132444057151
|
| 122 |
+
name: Loss
|
| 123 |
verified: true
|
| 124 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTAzODg3MjM4YWUxODJjOTU5Y2I4MGIwMGEwMTAwMTdjMWZhZTk0NDllNDQ4NWRlODI0NjBiZGI2ZjNjNmUzNyIsInZlcnNpb24iOjF9.u8PyUlKCZw5QqYWeE5WFM2t8IWacQhyHU_jyMPZoK1PvhUVItH80CxKrkimSQNMaTwOPNd53szUesfRkP_yXDA
|
| 125 |
---
|
| 126 |
|
| 127 |
|