File size: 2,253 Bytes
d473371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# AUP Benchmark Data
# Format: task -> method -> list of (rho, accuracy) pairs
# rho: parallelism (tokens per forward)
# accuracy: model accuracy (0-1 scale)

# Model metadata: type (AR/dLLM), foundation model, link
_meta:
  Qwen-2.5-7B-it:
    type: AR
    foundation: Qwen-2.5-7B-it
    link: https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
  LLaDA:
    type: dLLM
    foundation: LLaDA-8B-it
    link: https://github.com/ML-GSAI/LLaDA
  Fast-dLLM-LLaDA:
    type: dLLM
    foundation: LLaDA-8B-it
    link: https://github.com/NVlabs/Fast-dLLM
  D2F-LLaDA:
    type: dLLM
    foundation: LLaDA-8B-it
    link: https://github.com/zhijie-group/Discrete-Diffusion-Forcing
  dParallel-LLaDA:
    type: dLLM
    foundation: LLaDA-8B-it
    link: https://github.com/czg1225/dParallel
  d3LLM-LLaDA:
    type: dLLM
    foundation: LLaDA-8B-it
    link: https://github.com/hao-ai-lab/d3llm

GSM8K-CoT:
  Qwen-2.5-7B-it:
  - [1.0, 74.1]
  LLaDA:
  - [1.0, 72.55]
  Fast-dLLM-LLaDA:
  - [1.0, 74.79]
  - [2.77, 74.68]
  D2F-LLaDA:
  - [1.0, 74.98]
  - [2.88, 74.39]
  dParallel-LLaDA:
  - [1.0, 74.0]
  - [5.14, 72.63]
  d3LLM-LLaDA:
  - [1.0, 74.02]
  - [9.11, 73.09]
MATH:
  Qwen-2.5-7B-it:
  - [1.0, 41.15]
  LLaDA:
  - [1.0, 32.2]
  Fast-dLLM-LLaDA:
  - [1.0, 32.1]
  - [1.97, 30.82]
  D2F-LLaDA:
  - [1.0, 29.1]
  - [2.66, 28.94]
  dParallel-LLaDA:
  - [1.0, 32.0]
  - [3.17, 30.18]
  d3LLM-LLaDA:
  - [1.0, 32.76]
  - [5.74, 30.36]
MBPP:
  Qwen-2.5-7B-it:
  - [1.0, 63.6]
  LLaDA:
  - [1.0, 41.72]
  Fast-dLLM-LLaDA:
  - [1.0, 41.58]
  - [2.13, 38.6]
  D2F-LLaDA:
  - [1.0, 39.10]
  - [2.13, 39.00]
  dParallel-LLaDA:
  - [1.0, 41.62]
  - [2.35, 40.0]
  d3LLM-LLaDA:
  - [1.0, 42.0]
  - [4.21, 40.60]
HumanEval:
  Qwen-2.5-7B-it:
  - [1.0, 67.73]
  LLaDA:
  - [1.0, 38.28]
  Fast-dLLM-LLaDA:
  - [1.0, 38.16]
  - [2.56, 37.8]
  D2F-LLaDA:
  - [1.0, 41.02]
  - [2.69, 40.64]
  dParallel-LLaDA:
  - [1.0, 39.68]
  - [4.93, 39.02]
  d3LLM-LLaDA:
  - [1.0, 39.8]
  - [5.95, 39.63]
Long-GSM8K:
  Qwen-2.5-7B-it:
  - [1.0, 82.56]
  LLaDA:
  - [1.0, 78.58]
  Fast-dLLM-LLaDA:
  - [1.0, 78.45]
  - [2.45, 78.01]
  D2F-LLaDA:
  - [1.0, 76.00]
  - [2.7, 75.66]
  dParallel-LLaDA:
  - [1.0, 79.15]
  - [4.49, 76.65]
  d3LLM-LLaDA:
  - [1.0, 78.32]
  - [6.95, 74.22]