Commit
·
af2b164
1
Parent(s):
26e1596
Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator (#16)
Browse files- Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator (a49eb2814974e826b5f803631562a3e0b0a3e074)
Co-authored-by: Evaluation Bot <autoevaluator@users.noreply.huggingface.co>
README.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
---
|
| 2 |
language: en
|
| 3 |
-
|
| 4 |
tags:
|
| 5 |
- text-generation
|
| 6 |
- opt
|
| 7 |
-
|
| 8 |
commercial: false
|
| 9 |
model-index:
|
| 10 |
- name: inverse-scaling/opt-6.7b_eval
|
|
@@ -18,14 +18,16 @@ model-index:
|
|
| 18 |
config: inverse-scaling--NeQA
|
| 19 |
split: train
|
| 20 |
metrics:
|
| 21 |
-
-
|
| 22 |
-
type: accuracy
|
| 23 |
value: 0.54
|
|
|
|
| 24 |
verified: true
|
| 25 |
-
|
| 26 |
-
|
| 27 |
value: 0.740270353704691
|
|
|
|
| 28 |
verified: true
|
|
|
|
| 29 |
- task:
|
| 30 |
type: zero-shot-classification
|
| 31 |
name: Zero-Shot Text Classification
|
|
@@ -35,14 +37,16 @@ model-index:
|
|
| 35 |
config: inverse-scaling--quote-repetition
|
| 36 |
split: train
|
| 37 |
metrics:
|
| 38 |
-
-
|
| 39 |
-
type: accuracy
|
| 40 |
value: 0.86
|
|
|
|
| 41 |
verified: true
|
| 42 |
-
|
| 43 |
-
|
| 44 |
value: 0.22016974209290055
|
|
|
|
| 45 |
verified: true
|
|
|
|
| 46 |
- task:
|
| 47 |
type: zero-shot-classification
|
| 48 |
name: Zero-Shot Text Classification
|
|
@@ -52,14 +56,16 @@ model-index:
|
|
| 52 |
config: inverse-scaling--redefine-math
|
| 53 |
split: train
|
| 54 |
metrics:
|
| 55 |
-
-
|
| 56 |
-
type: accuracy
|
| 57 |
value: 0.6733333333333333
|
|
|
|
| 58 |
verified: true
|
| 59 |
-
|
| 60 |
-
|
| 61 |
value: 0.638882334422734
|
|
|
|
| 62 |
verified: true
|
|
|
|
| 63 |
- task:
|
| 64 |
type: zero-shot-classification
|
| 65 |
name: Zero-Shot Text Classification
|
|
@@ -69,14 +75,16 @@ model-index:
|
|
| 69 |
config: inverse-scaling--hindsight-neglect-10shot
|
| 70 |
split: train
|
| 71 |
metrics:
|
| 72 |
-
-
|
| 73 |
-
type: accuracy
|
| 74 |
value: 0.4666666666666667
|
|
|
|
| 75 |
verified: true
|
| 76 |
-
|
| 77 |
-
|
| 78 |
value: 0.7550815605928027
|
|
|
|
| 79 |
verified: true
|
|
|
|
| 80 |
- task:
|
| 81 |
type: zero-shot-classification
|
| 82 |
name: Zero-Shot Text Classification
|
|
@@ -86,14 +94,16 @@ model-index:
|
|
| 86 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
| 87 |
split: test
|
| 88 |
metrics:
|
| 89 |
-
-
|
| 90 |
-
type: accuracy
|
| 91 |
value: 0.3737864077669903
|
|
|
|
| 92 |
verified: true
|
| 93 |
-
|
| 94 |
-
|
| 95 |
value: 1.2823651640752816
|
|
|
|
| 96 |
verified: true
|
|
|
|
| 97 |
- task:
|
| 98 |
type: zero-shot-classification
|
| 99 |
name: Zero-Shot Text Classification
|
|
@@ -103,14 +113,16 @@ model-index:
|
|
| 103 |
config: mathemakitten--winobias_antistereotype_test_v5
|
| 104 |
split: test
|
| 105 |
metrics:
|
| 106 |
-
-
|
| 107 |
-
type: accuracy
|
| 108 |
value: 0.3859223300970874
|
|
|
|
| 109 |
verified: true
|
| 110 |
-
|
| 111 |
-
|
| 112 |
value: 1.295986159347468
|
|
|
|
| 113 |
verified: true
|
|
|
|
| 114 |
---
|
| 115 |
|
| 116 |
# OPT : Open Pre-trained Transformer Language Models
|
|
|
|
| 1 |
---
|
| 2 |
language: en
|
| 3 |
+
license: other
|
| 4 |
tags:
|
| 5 |
- text-generation
|
| 6 |
- opt
|
| 7 |
+
inference: false
|
| 8 |
commercial: false
|
| 9 |
model-index:
|
| 10 |
- name: inverse-scaling/opt-6.7b_eval
|
|
|
|
| 18 |
config: inverse-scaling--NeQA
|
| 19 |
split: train
|
| 20 |
metrics:
|
| 21 |
+
- type: accuracy
|
|
|
|
| 22 |
value: 0.54
|
| 23 |
+
name: Accuracy
|
| 24 |
verified: true
|
| 25 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWE0ZjA1NDg0YjYzNTZhYjIwZDRhNDcxYjNiYTQ1YTY2YWQ1YTUzZmIyMTlmYTljMGJiNjAyNzc0YTNiYWFhNCIsInZlcnNpb24iOjF9.eWcHC6dzOjnuF-mT6Z2G8Z1xCoow6iViE1Qy-VNKMSzIcJZcvgkZI0NhU50YMi4tOOZN2k92MATtbXtcZR5yCQ
|
| 26 |
+
- type: loss
|
| 27 |
value: 0.740270353704691
|
| 28 |
+
name: Loss
|
| 29 |
verified: true
|
| 30 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTY1M2ZlYTYzMjZhNTVmZjgyMWJiYmYxZGM2NjQxYjdlZDI3ZmZmODAxMTI5N2RmMjMyNzYzMWUxZTViNjM5YSIsInZlcnNpb24iOjF9.G3DqNVlNLP5uAmzOKa9hsxBBiSWXbrDesp3hIlQomYe2YsbWbYF0WssbFi7DXEu5hmj6yCN2E-olbEjzwZ2eBQ
|
| 31 |
- task:
|
| 32 |
type: zero-shot-classification
|
| 33 |
name: Zero-Shot Text Classification
|
|
|
|
| 37 |
config: inverse-scaling--quote-repetition
|
| 38 |
split: train
|
| 39 |
metrics:
|
| 40 |
+
- type: accuracy
|
|
|
|
| 41 |
value: 0.86
|
| 42 |
+
name: Accuracy
|
| 43 |
verified: true
|
| 44 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzA0ZjJlZGUwOGNhNmE3MmMzMDY1YTM4ZjYzNDUwYjk1MTU2MmVhMGQzYjI3YzI0ZGMzMWFkODIyZWE5Mjk2ZCIsInZlcnNpb24iOjF9.pc3tzIMBv05ZBixkmRojnIzsdHLvYhZX_sJnNZ_t_oo61DrTUhYQYq3xikx8S5rIr5sWrLTbxWn3rAAXme0KAQ
|
| 45 |
+
- type: loss
|
| 46 |
value: 0.22016974209290055
|
| 47 |
+
name: Loss
|
| 48 |
verified: true
|
| 49 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWE5Njk4OWQ5Mzg3ODljMWM3MzhiNjBhNTk5ZGJjMDU3ZTJlZDZjZjBjYzdkMmMxZTJlMTJkMjg1OTA5ZWQxNSIsInZlcnNpb24iOjF9.NubehOGlzEURMYuTkvqzXmf1ENadam7uZ62YA1nv1DjAivd8VySmpLl-QnnZLcDbhduMZbRp4lMQbWG9Z26LAg
|
| 50 |
- task:
|
| 51 |
type: zero-shot-classification
|
| 52 |
name: Zero-Shot Text Classification
|
|
|
|
| 56 |
config: inverse-scaling--redefine-math
|
| 57 |
split: train
|
| 58 |
metrics:
|
| 59 |
+
- type: accuracy
|
|
|
|
| 60 |
value: 0.6733333333333333
|
| 61 |
+
name: Accuracy
|
| 62 |
verified: true
|
| 63 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZTAyYjAwMzgyMDc2MmU1NDM0MmMyOWUyYzc3YTYxNzkyYzk2ZGZiMTk5NjlkODUwNDQ1NzFlMTU0Y2Y0ZGZlYSIsInZlcnNpb24iOjF9.VMxtPMY9qKk4eSjAlDb_jfg1nsf8eq1Oz5WnfUSC-VkXREQ6-f1qBooJc617t6U5apIbHnaW9XP3LTYrGzvUDQ
|
| 64 |
+
- type: loss
|
| 65 |
value: 0.638882334422734
|
| 66 |
+
name: Loss
|
| 67 |
verified: true
|
| 68 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDYwNWVmZGM5ZmM2MmY0Y2IzYzNhOTNkZmU2YTA2MWZlZTU1ZGI2OTM1YzJiNjViNzMwMjA0Y2Q0ODBlYTgzOSIsInZlcnNpb24iOjF9.YJujmeEYbf4ZOJ0w_Q24d7t5ksKST35aweNJSk6UYuCiV6uSIJhJUz_w8iFwo9ykM-EOXamL87dftlkyawgtBw
|
| 69 |
- task:
|
| 70 |
type: zero-shot-classification
|
| 71 |
name: Zero-Shot Text Classification
|
|
|
|
| 75 |
config: inverse-scaling--hindsight-neglect-10shot
|
| 76 |
split: train
|
| 77 |
metrics:
|
| 78 |
+
- type: accuracy
|
|
|
|
| 79 |
value: 0.4666666666666667
|
| 80 |
+
name: Accuracy
|
| 81 |
verified: true
|
| 82 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYjVlMzdkYTUxZmI1ZDBmMDdjM2VhMjA1ZTg0MGYzMzU0NzFlN2JmNDY2NDc0MmVlMjI3MDg1Y2Q5MDRhYWU1ZCIsInZlcnNpb24iOjF9.Z01fwvvUFNOWeUWexSpdmAUPYJIsYUV-eb1ybSEjQ3cb9ow2STMVgxp0PqaDJMVWKg30xIkARahsg8ci6QpbBw
|
| 83 |
+
- type: loss
|
| 84 |
value: 0.7550815605928027
|
| 85 |
+
name: Loss
|
| 86 |
verified: true
|
| 87 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZWQyNmYwZjdkMTM1YjIxYzEwMmUwMWVlZTRjODQwYWExNDQ2MTgzYzA0ZTlkODcxYWIxMzdmNWE0NDdmNzcxYiIsInZlcnNpb24iOjF9.TtX2cKfatVMFX09l6DiuKFEa1vlDJUBPohSLmdQGh8QCTf-DrylUqARU8Ni5cSiSlidFF4n4IWIL0vQ941n6DQ
|
| 88 |
- task:
|
| 89 |
type: zero-shot-classification
|
| 90 |
name: Zero-Shot Text Classification
|
|
|
|
| 94 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
| 95 |
split: test
|
| 96 |
metrics:
|
| 97 |
+
- type: accuracy
|
|
|
|
| 98 |
value: 0.3737864077669903
|
| 99 |
+
name: Accuracy
|
| 100 |
verified: true
|
| 101 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTliMzU1NGIxMTUxYTM4NzVlYzI4YzljMDYyOWM1ZDdkMWMyNjIwOWQ4OGNhZWE3ZTljZGI0ZTA2ZWU3MjVmMiIsInZlcnNpb24iOjF9.dTlDpXOusgl6m3dn7XwfKeaxaVfU1VnEHWFeh7yBNSq5TyHPWbixlNumOWDjc-y9v8g0oWBXqWhT0KMQDaGVCQ
|
| 102 |
+
- type: loss
|
| 103 |
value: 1.2823651640752816
|
| 104 |
+
name: Loss
|
| 105 |
verified: true
|
| 106 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTRjZmU4YWNkNGEwMjNlMGEyYjA1ZjhjOGE3OTZiZTJlYjMyMjViMTYyYWQ1YTdlMmM1ZjU5NTFhOWU3NzM1OCIsInZlcnNpb24iOjF9.yGmOME0MrX0moaU5c2WYf8H7CFfSGsPuQ2qp9MCi_es5RQRWoCHeCcR5oLQ4RATmVpYdzocPxqrbeZfqxVIOAQ
|
| 107 |
- task:
|
| 108 |
type: zero-shot-classification
|
| 109 |
name: Zero-Shot Text Classification
|
|
|
|
| 113 |
config: mathemakitten--winobias_antistereotype_test_v5
|
| 114 |
split: test
|
| 115 |
metrics:
|
| 116 |
+
- type: accuracy
|
|
|
|
| 117 |
value: 0.3859223300970874
|
| 118 |
+
name: Accuracy
|
| 119 |
verified: true
|
| 120 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNzZlZjIzNDM0Mzk5MmRlMTFlOWVlZjY3MDFmY2NhZjlkYWNmMWQ2MjdhOTg3YTg0OTI1YjY5YmYxMTc4YjYyOCIsInZlcnNpb24iOjF9.nCFVShWbHuHFKEdK5INjQSfLI9KQUNQZqqjqYCw_HVHSW0QHLIXdAb7_GDZJhCUTJ-JkBVCJFtEliA2Zw9GjAw
|
| 121 |
+
- type: loss
|
| 122 |
value: 1.295986159347468
|
| 123 |
+
name: Loss
|
| 124 |
verified: true
|
| 125 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYWE3ZjhmYzM3NjRhMjc3OGU5NWQzY2Q1NzA2ZDBjN2Q1YmZkYzdiMDBhMmY1ZDM5NmU2YzQ2ZGZmZmYyMzg5NiIsInZlcnNpb24iOjF9.2UzIpqw83YQdGOqTKKP7ywqpNdgCDkR36lhkbja6qFsKyQctcg4vZgLXfMSfufWf1G_9iXqY8r-JiZadMdK3Dg
|
| 126 |
---
|
| 127 |
|
| 128 |
# OPT : Open Pre-trained Transformer Language Models
|