Spaces:
Running
Running
File size: 2,253 Bytes
d473371 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# AUP Benchmark Data
# Format: task -> method -> list of (rho, accuracy) pairs
# rho: parallelism (tokens per forward)
# accuracy: model accuracy (0-1 scale)
# Model metadata: type (AR/dLLM), foundation model, link
_meta:
Qwen-2.5-7B-it:
type: AR
foundation: Qwen-2.5-7B-it
link: https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
LLaDA:
type: dLLM
foundation: LLaDA-8B-it
link: https://github.com/ML-GSAI/LLaDA
Fast-dLLM-LLaDA:
type: dLLM
foundation: LLaDA-8B-it
link: https://github.com/NVlabs/Fast-dLLM
D2F-LLaDA:
type: dLLM
foundation: LLaDA-8B-it
link: https://github.com/zhijie-group/Discrete-Diffusion-Forcing
dParallel-LLaDA:
type: dLLM
foundation: LLaDA-8B-it
link: https://github.com/czg1225/dParallel
d3LLM-LLaDA:
type: dLLM
foundation: LLaDA-8B-it
link: https://github.com/hao-ai-lab/d3llm
GSM8K-CoT:
Qwen-2.5-7B-it:
- [1.0, 74.1]
LLaDA:
- [1.0, 72.55]
Fast-dLLM-LLaDA:
- [1.0, 74.79]
- [2.77, 74.68]
D2F-LLaDA:
- [1.0, 74.98]
- [2.88, 74.39]
dParallel-LLaDA:
- [1.0, 74.0]
- [5.14, 72.63]
d3LLM-LLaDA:
- [1.0, 74.02]
- [9.11, 73.09]
MATH:
Qwen-2.5-7B-it:
- [1.0, 41.15]
LLaDA:
- [1.0, 32.2]
Fast-dLLM-LLaDA:
- [1.0, 32.1]
- [1.97, 30.82]
D2F-LLaDA:
- [1.0, 29.1]
- [2.66, 28.94]
dParallel-LLaDA:
- [1.0, 32.0]
- [3.17, 30.18]
d3LLM-LLaDA:
- [1.0, 32.76]
- [5.74, 30.36]
MBPP:
Qwen-2.5-7B-it:
- [1.0, 63.6]
LLaDA:
- [1.0, 41.72]
Fast-dLLM-LLaDA:
- [1.0, 41.58]
- [2.13, 38.6]
D2F-LLaDA:
- [1.0, 39.10]
- [2.13, 39.00]
dParallel-LLaDA:
- [1.0, 41.62]
- [2.35, 40.0]
d3LLM-LLaDA:
- [1.0, 42.0]
- [4.21, 40.60]
HumanEval:
Qwen-2.5-7B-it:
- [1.0, 67.73]
LLaDA:
- [1.0, 38.28]
Fast-dLLM-LLaDA:
- [1.0, 38.16]
- [2.56, 37.8]
D2F-LLaDA:
- [1.0, 41.02]
- [2.69, 40.64]
dParallel-LLaDA:
- [1.0, 39.68]
- [4.93, 39.02]
d3LLM-LLaDA:
- [1.0, 39.8]
- [5.95, 39.63]
Long-GSM8K:
Qwen-2.5-7B-it:
- [1.0, 82.56]
LLaDA:
- [1.0, 78.58]
Fast-dLLM-LLaDA:
- [1.0, 78.45]
- [2.45, 78.01]
D2F-LLaDA:
- [1.0, 76.00]
- [2.7, 75.66]
dParallel-LLaDA:
- [1.0, 79.15]
- [4.49, 76.65]
d3LLM-LLaDA:
- [1.0, 78.32]
- [6.95, 74.22]
|