mrs83 commited on
Commit
314b267
·
verified ·
1 Parent(s): 0d2d82a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +81 -2
README.md CHANGED
@@ -33,6 +33,87 @@ Work in Progress!
33
 
34
  This model was trained with SFT.
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ### Framework versions
37
 
38
  - PEFT 0.17.1
@@ -44,8 +125,6 @@ This model was trained with SFT.
44
 
45
  ## Citations
46
 
47
-
48
-
49
  Cite TRL as:
50
 
51
  ```bibtex
 
33
 
34
  This model was trained with SFT.
35
 
36
+ ## Evaluation
37
+
38
+ This model has been loaded in 4-bit and evaluated with [lighteval](https://github.com/huggingface/lighteval)
39
+
40
+ | Task |Version| Metric |Value | |Stderr|
41
+ |------------------------------------------------------|-------|----------------------------------------------------------------------------------------------------------------------------|-----:|---|-----:|
42
+ |all | |acc |0.5383|± |0.1476|
43
+ | | |acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=True) |0.7000|± |0.1528|
44
+ | | |acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False) |0.8000|± |0.1333|
45
+ | | |truthfulqa_mc1 |0.6000|± |0.1633|
46
+ | | |truthfulqa_mc2 |0.7066|± |0.1481|
47
+ | | |em:normalize_gold=<function gsm8k_normalizer at 0x7c5d972c3ba0>&normalize_pred=<function gsm8k_normalizer at 0x7c5d972c3ba0>|0.6000|± |0.1633|
48
+ |leaderboard:arc:challenge:25 | |acc |0.8000|± |0.1333|
49
+ | | |acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=True) |0.7000|± |0.1528|
50
+ |leaderboard:gsm8k:5 | |em:normalize_gold=<function gsm8k_normalizer at 0x7c5d972c3ba0>&normalize_pred=<function gsm8k_normalizer at 0x7c5d972c3ba0>|0.6000|± |0.1633|
51
+ |leaderboard:hellaswag:10 | |acc |0.5000|± |0.1667|
52
+ | | |acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False) |0.8000|± |0.1333|
53
+ |leaderboard:mmlu:_average:5 | |acc |0.5316|± |0.1474|
54
+ |leaderboard:mmlu:abstract_algebra:5 | |acc |0.3000|± |0.1528|
55
+ |leaderboard:mmlu:anatomy:5 | |acc |0.3000|± |0.1528|
56
+ |leaderboard:mmlu:astronomy:5 | |acc |0.7000|± |0.1528|
57
+ |leaderboard:mmlu:business_ethics:5 | |acc |0.4000|± |0.1633|
58
+ |leaderboard:mmlu:clinical_knowledge:5 | |acc |0.7000|± |0.1528|
59
+ |leaderboard:mmlu:college_biology:5 | |acc |0.5000|± |0.1667|
60
+ |leaderboard:mmlu:college_chemistry:5 | |acc |0.4000|± |0.1633|
61
+ |leaderboard:mmlu:college_computer_science:5 | |acc |0.4000|± |0.1633|
62
+ |leaderboard:mmlu:college_mathematics:5 | |acc |0.2000|± |0.1333|
63
+ |leaderboard:mmlu:college_medicine:5 | |acc |0.5000|± |0.1667|
64
+ |leaderboard:mmlu:college_physics:5 | |acc |0.5000|± |0.1667|
65
+ |leaderboard:mmlu:computer_security:5 | |acc |0.9000|± |0.1000|
66
+ |leaderboard:mmlu:conceptual_physics:5 | |acc |0.4000|± |0.1633|
67
+ |leaderboard:mmlu:econometrics:5 | |acc |0.4000|± |0.1633|
68
+ |leaderboard:mmlu:electrical_engineering:5 | |acc |0.7000|± |0.1528|
69
+ |leaderboard:mmlu:elementary_mathematics:5 | |acc |0.3000|± |0.1528|
70
+ |leaderboard:mmlu:formal_logic:5 | |acc |0.3000|± |0.1528|
71
+ |leaderboard:mmlu:global_facts:5 | |acc |0.3000|± |0.1528|
72
+ |leaderboard:mmlu:high_school_biology:5 | |acc |0.9000|± |0.1000|
73
+ |leaderboard:mmlu:high_school_chemistry:5 | |acc |0.5000|± |0.1667|
74
+ |leaderboard:mmlu:high_school_computer_science:5 | |acc |0.6000|± |0.1633|
75
+ |leaderboard:mmlu:high_school_european_history:5 | |acc |0.7000|± |0.1528|
76
+ |leaderboard:mmlu:high_school_geography:5 | |acc |1.0000|± |0.0000|
77
+ |leaderboard:mmlu:high_school_government_and_politics:5| |acc |0.8000|± |0.1333|
78
+ |leaderboard:mmlu:high_school_macroeconomics:5 | |acc |0.6000|± |0.1633|
79
+ |leaderboard:mmlu:high_school_mathematics:5 | |acc |0.3000|± |0.1528|
80
+ |leaderboard:mmlu:high_school_microeconomics:5 | |acc |0.7000|± |0.1528|
81
+ |leaderboard:mmlu:high_school_physics:5 | |acc |0.3000|± |0.1528|
82
+ |leaderboard:mmlu:high_school_psychology:5 | |acc |0.9000|± |0.1000|
83
+ |leaderboard:mmlu:high_school_statistics:5 | |acc |0.5000|± |0.1667|
84
+ |leaderboard:mmlu:high_school_us_history:5 | |acc |0.8000|± |0.1333|
85
+ |leaderboard:mmlu:high_school_world_history:5 | |acc |0.9000|± |0.1000|
86
+ |leaderboard:mmlu:human_aging:5 | |acc |0.5000|± |0.1667|
87
+ |leaderboard:mmlu:human_sexuality:5 | |acc |0.4000|± |0.1633|
88
+ |leaderboard:mmlu:international_law:5 | |acc |0.6000|± |0.1633|
89
+ |leaderboard:mmlu:jurisprudence:5 | |acc |0.6000|± |0.1633|
90
+ |leaderboard:mmlu:logical_fallacies:5 | |acc |0.4000|± |0.1633|
91
+ |leaderboard:mmlu:machine_learning:5 | |acc |0.5000|± |0.1667|
92
+ |leaderboard:mmlu:management:5 | |acc |0.5000|± |0.1667|
93
+ |leaderboard:mmlu:marketing:5 | |acc |0.8000|± |0.1333|
94
+ |leaderboard:mmlu:medical_genetics:5 | |acc |0.9000|± |0.1000|
95
+ |leaderboard:mmlu:miscellaneous:5 | |acc |0.5000|± |0.1667|
96
+ |leaderboard:mmlu:moral_disputes:5 | |acc |0.7000|± |0.1528|
97
+ |leaderboard:mmlu:moral_scenarios:5 | |acc |0.1000|± |0.1000|
98
+ |leaderboard:mmlu:nutrition:5 | |acc |0.6000|± |0.1633|
99
+ |leaderboard:mmlu:philosophy:5 | |acc |0.5000|± |0.1667|
100
+ |leaderboard:mmlu:prehistory:5 | |acc |0.4000|± |0.1633|
101
+ |leaderboard:mmlu:professional_accounting:5 | |acc |0.3000|± |0.1528|
102
+ |leaderboard:mmlu:professional_law:5 | |acc |0.4000|± |0.1633|
103
+ |leaderboard:mmlu:professional_medicine:5 | |acc |0.2000|± |0.1333|
104
+ |leaderboard:mmlu:professional_psychology:5 | |acc |0.3000|± |0.1528|
105
+ |leaderboard:mmlu:public_relations:5 | |acc |0.3000|± |0.1528|
106
+ |leaderboard:mmlu:security_studies:5 | |acc |0.3000|± |0.1528|
107
+ |leaderboard:mmlu:sociology:5 | |acc |0.8000|± |0.1333|
108
+ |leaderboard:mmlu:us_foreign_policy:5 | |acc |0.7000|± |0.1528|
109
+ |leaderboard:mmlu:virology:5 | |acc |0.5000|± |0.1667|
110
+ |leaderboard:mmlu:world_religions:5 | |acc |0.8000|± |0.1333|
111
+ |leaderboard:truthfulqa:mc:0 | |truthfulqa_mc1 |0.6000|± |0.1633|
112
+ | | |truthfulqa_mc2 |0.7066|± |0.1481|
113
+ |leaderboard:winogrande:5 | |acc |0.7000|± |0.1528|
114
+
115
+
116
+
117
  ### Framework versions
118
 
119
  - PEFT 0.17.1
 
125
 
126
  ## Citations
127
 
 
 
128
  Cite TRL as:
129
 
130
  ```bibtex