Text Generation
MLX
Safetensors
Rust
qwen2
7b
agentic-coding
android
apple-silicon
attested
bash
c
chain-of-custody
chinese
code
code-completion
code-generation
code-infill
compacted
compensation-lora
consumer-gpu
cpp
cryptographically-verified
css
distillation
edge-inference
efficient
embedded
english
forge-alloy
function-calling
general
general-purpose
go
head-pruning
html
iphone
java
javascript
knowledge-distillation
kotlin
llama-cpp
lm-studio
local-inference
lora
macbook
mobile
multilingual
ollama
on-device
optimized
php
pruned
python
qwen
qwen-coder
qwen2.5
qwen2.5-coder
raspberry-pi
reproducible
ruby
sql
swift
teacher-student
typescript
validation-artifact
versatile
conversational
Upload eval/calibrated_eval_results.json with huggingface_hub
Browse files
eval/calibrated_eval_results.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "complete",
|
| 3 |
+
"benchmark": "humaneval",
|
| 4 |
+
"anchor": {
|
| 5 |
+
"model": "Qwen/Qwen2.5-Coder-7B",
|
| 6 |
+
"published": {
|
| 7 |
+
"score": 61.6,
|
| 8 |
+
"metric": "pass@1",
|
| 9 |
+
"source": "Qwen2.5-Coder Technical Report Table 5, arXiv:2409.12186"
|
| 10 |
+
},
|
| 11 |
+
"measured_scores": {
|
| 12 |
+
"humaneval": 62.2,
|
| 13 |
+
"humaneval_plus": 53.7
|
| 14 |
+
},
|
| 15 |
+
"delta": 0.6
|
| 16 |
+
},
|
| 17 |
+
"model_under_test": {
|
| 18 |
+
"path": "/home/joel/forge_v2_qwen7b_compensated_kl",
|
| 19 |
+
"measured_scores": {
|
| 20 |
+
"humaneval": 61.0,
|
| 21 |
+
"humaneval_plus": 53.0
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"tolerance": 3.0,
|
| 25 |
+
"calibration_passed": true
|
| 26 |
+
}
|