EnricoFermi commited on
Commit
c8371e7
·
verified ·
1 Parent(s): b099f13

Upload eval/calibrated_eval_results.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. eval/calibrated_eval_results.json +26 -0
eval/calibrated_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "complete",
3
+ "benchmark": "humaneval",
4
+ "anchor": {
5
+ "model": "Qwen/Qwen2.5-Coder-7B",
6
+ "published": {
7
+ "score": 61.6,
8
+ "metric": "pass@1",
9
+ "source": "Qwen2.5-Coder Technical Report Table 5, arXiv:2409.12186"
10
+ },
11
+ "measured_scores": {
12
+ "humaneval": 62.2,
13
+ "humaneval_plus": 53.7
14
+ },
15
+ "delta": 0.6
16
+ },
17
+ "model_under_test": {
18
+ "path": "/home/joel/forge_v2_qwen7b_compensated_kl",
19
+ "measured_scores": {
20
+ "humaneval": 61.0,
21
+ "humaneval_plus": 53.0
22
+ }
23
+ },
24
+ "tolerance": 3.0,
25
+ "calibration_passed": true
26
+ }