narcolepticchicken commited on
Commit
6a7c356
·
verified ·
1 Parent(s): 799bf90

Upload reports/humaneval_real_results.json

Browse files
Files changed (1) hide show
  1. reports/humaneval_real_results.json +26 -0
reports/humaneval_real_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
3
+ "date": "2026-05-07",
4
+ "benchmark": "openai/openai_humaneval",
5
+ "num_problems": 164,
6
+ "pass1_short_tokens": 128,
7
+ "pass2_long_tokens": 1024,
8
+ "pass1_passed": 103,
9
+ "pass1_failed": 61,
10
+ "pass1_accuracy": 0.6280,
11
+ "pass1_tokens": 12859,
12
+ "pass2_passed": 20,
13
+ "pass2_failed": 41,
14
+ "pass2_tokens": 8184,
15
+ "final_passed": 123,
16
+ "final_accuracy": 0.7500,
17
+ "total_tokens": 21043,
18
+ "baseline_tokens": 167936,
19
+ "savings_percent": 87.5,
20
+ "failure_analysis": {
21
+ "syntax_errors_from_truncation": "majority of remaining failures",
22
+ "genuine_assert_errors": "~20% of failures",
23
+ "note": "Raising short tokens from 128 to 256 would likely push pass@1 to 80%+"
24
+ },
25
+ "notes": "Completion format (no chat template), stop-token trimming at \\nclass \\ndef \\n# \\nif __name__ \\nprint(, clean_body strips leading/trailing blank lines"
26
+ }