Update README.md
Browse files
README.md
CHANGED
|
@@ -124,19 +124,33 @@ def generate(
|
|
| 124 |
):
|
| 125 |
device = model.device
|
| 126 |
mask_id = tokenizer.mask_token_id
|
| 127 |
-
bos_id = tokenizer.bos_token_id
|
| 128 |
pad_id = tokenizer.pad_token_id
|
|
|
|
|
|
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
num_blocks = math.ceil(max_new_tokens / block_size)
|
| 136 |
steps_per_block = math.ceil(steps / num_blocks)
|
| 137 |
generated = 0
|
| 138 |
|
| 139 |
while generated < max_new_tokens:
|
|
|
|
|
|
|
| 140 |
T_prefix = x.size(1)
|
| 141 |
offset = T_prefix % block_size
|
| 142 |
room = block_size if offset == 0 else block_size - offset
|
|
@@ -148,7 +162,6 @@ def generate(
|
|
| 148 |
|
| 149 |
out = model(x, attention_mask=attn_pfx, position_ids=pos_pfx, use_cache=True)
|
| 150 |
cond_past = out.past_key_values
|
| 151 |
-
prefix_logits = out.logits[:, -1:, :]
|
| 152 |
|
| 153 |
if cfg_scale > 0:
|
| 154 |
un_x = x.clone()
|
|
@@ -159,6 +172,7 @@ def generate(
|
|
| 159 |
uncond_past = None
|
| 160 |
|
| 161 |
block = torch.full((B, cur_len), mask_id, device=device, dtype=torch.long)
|
|
|
|
| 162 |
x = torch.cat([x, block], dim=1)
|
| 163 |
T_total = x.size(1)
|
| 164 |
|
|
@@ -191,39 +205,69 @@ def generate(
|
|
| 191 |
logits, x_blk, m_blk, num_transfer[:, t], temperature, remasking
|
| 192 |
)
|
| 193 |
x[:, T_prefix:T_total] = x_blk_new
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
| 197 |
|
| 198 |
generated += cur_len
|
|
|
|
|
|
|
| 199 |
|
| 200 |
return x
|
| 201 |
|
| 202 |
|
| 203 |
-
device = "cuda"
|
| 204 |
model = AutoModelForMaskedLM.from_pretrained("dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1", dtype=torch.bfloat16, trust_remote_code=True).to(device).eval()
|
| 205 |
tokenizer = AutoTokenizer.from_pretrained("dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1", trust_remote_code=True)
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
]
|
| 212 |
-
prompt = tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=False)
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
```
|
| 220 |
|
| 221 |
## Generation Parameters
|
| 222 |
|
| 223 |
| Parameter | Description | Default |
|
| 224 |
| ---------------- | ---------------------------------------------------------------------------------------------- | -------- |
|
| 225 |
-
| `max_new_tokens` | Number of tokens to generate |
|
| 226 |
-
| `steps` | Number of diffusion denoising iterations |
|
| 227 |
| `temperature` | Sampling temperature; set to `0.0` for deterministic generation | 0.0 |
|
| 228 |
| `block_size` | Token block size used during iterative denoising | 32 |
|
| 229 |
| `cfg_scale` | Classifier-free guidance scale controlling instruction adherence (higher = more deterministic) | 0.0 |
|
|
@@ -236,7 +280,7 @@ Follow the Github repo's demo script [examples/a2d/bd3lm/chat.py](https://github
|
|
| 236 |
```shell
|
| 237 |
python -u examples/a2d/bd3lm/chat.py \
|
| 238 |
--model_name_or_path dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1 \
|
| 239 |
-
--
|
| 240 |
```
|
| 241 |
|
| 242 |
## Evaluation
|
|
@@ -279,7 +323,8 @@ python -u examples/a2d/bd3lm/chat.py \
|
|
| 279 |
|
| 280 |
To automatically evaluate Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1 on all benchmarks, run:
|
| 281 |
```shell
|
| 282 |
-
bash examples/a2d/
|
|
|
|
| 283 |
--model_name_or_path dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1
|
| 284 |
```
|
| 285 |
|
|
|
|
| 124 |
):
|
| 125 |
device = model.device
|
| 126 |
mask_id = tokenizer.mask_token_id
|
|
|
|
| 127 |
pad_id = tokenizer.pad_token_id
|
| 128 |
+
if pad_id is None:
|
| 129 |
+
pad_id = tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.mask_token_id
|
| 130 |
|
| 131 |
+
if isinstance(prompt, torch.Tensor):
|
| 132 |
+
x = prompt.to(device).long()
|
| 133 |
+
else:
|
| 134 |
+
if isinstance(prompt[0], (list, tuple)):
|
| 135 |
+
max_len = max(len(p) for p in prompt)
|
| 136 |
+
x = torch.full((len(prompt), max_len), pad_id, device=device, dtype=torch.long)
|
| 137 |
+
for i, p in enumerate(prompt):
|
| 138 |
+
x[i, : len(p)] = torch.tensor(p, device=device)
|
| 139 |
+
else:
|
| 140 |
+
x = torch.tensor(prompt, device=device).long()
|
| 141 |
+
if x.dim() == 1:
|
| 142 |
+
x = x.unsqueeze(0)
|
| 143 |
+
|
| 144 |
+
B = x.size(0)
|
| 145 |
+
finished = torch.zeros(B, dtype=torch.bool, device=device)
|
| 146 |
|
| 147 |
num_blocks = math.ceil(max_new_tokens / block_size)
|
| 148 |
steps_per_block = math.ceil(steps / num_blocks)
|
| 149 |
generated = 0
|
| 150 |
|
| 151 |
while generated < max_new_tokens:
|
| 152 |
+
if finished.all():
|
| 153 |
+
break
|
| 154 |
T_prefix = x.size(1)
|
| 155 |
offset = T_prefix % block_size
|
| 156 |
room = block_size if offset == 0 else block_size - offset
|
|
|
|
| 162 |
|
| 163 |
out = model(x, attention_mask=attn_pfx, position_ids=pos_pfx, use_cache=True)
|
| 164 |
cond_past = out.past_key_values
|
|
|
|
| 165 |
|
| 166 |
if cfg_scale > 0:
|
| 167 |
un_x = x.clone()
|
|
|
|
| 172 |
uncond_past = None
|
| 173 |
|
| 174 |
block = torch.full((B, cur_len), mask_id, device=device, dtype=torch.long)
|
| 175 |
+
block[finished] = pad_id
|
| 176 |
x = torch.cat([x, block], dim=1)
|
| 177 |
T_total = x.size(1)
|
| 178 |
|
|
|
|
| 205 |
logits, x_blk, m_blk, num_transfer[:, t], temperature, remasking
|
| 206 |
)
|
| 207 |
x[:, T_prefix:T_total] = x_blk_new
|
| 208 |
+
if tokenizer.eos_token_id is not None:
|
| 209 |
+
finished |= (x_blk_new == tokenizer.eos_token_id).any(dim=1)
|
| 210 |
+
if finished.all():
|
| 211 |
+
break
|
| 212 |
|
| 213 |
generated += cur_len
|
| 214 |
+
if finished.all():
|
| 215 |
+
break
|
| 216 |
|
| 217 |
return x
|
| 218 |
|
| 219 |
|
| 220 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 221 |
model = AutoModelForMaskedLM.from_pretrained("dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1", dtype=torch.bfloat16, trust_remote_code=True).to(device).eval()
|
| 222 |
tokenizer = AutoTokenizer.from_pretrained("dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1", trust_remote_code=True)
|
| 223 |
|
| 224 |
+
prompts = [
|
| 225 |
+
[
|
| 226 |
+
{"role": "system", "content": "You are a helpful AI assistant."},
|
| 227 |
+
{"role": "user", "content": "Implement a BFS traversal in Python with clear inline comments."},
|
| 228 |
+
],
|
| 229 |
+
[
|
| 230 |
+
{"role": "system", "content": "You are a helpful AI assistant."},
|
| 231 |
+
{"role": "user", "content": "Write a concise pytest that checks a Fibonacci implementation."},
|
| 232 |
+
],
|
| 233 |
]
|
|
|
|
| 234 |
|
| 235 |
+
encoded = [tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=True) for m in prompts]
|
| 236 |
+
prompt_lens = [len(e) for e in encoded]
|
| 237 |
+
max_len = max(prompt_lens)
|
| 238 |
+
pad_id = tokenizer.pad_token_id
|
| 239 |
+
if pad_id is None:
|
| 240 |
+
pad_id = tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.mask_token_id
|
| 241 |
+
input_ids = torch.full((len(encoded), max_len), pad_id, dtype=torch.long)
|
| 242 |
+
for i, ids in enumerate(encoded):
|
| 243 |
+
input_ids[i, : len(ids)] = torch.tensor(ids, dtype=torch.long)
|
| 244 |
+
input_ids = input_ids.to(device)
|
| 245 |
+
|
| 246 |
+
max_new_tokens = 128
|
| 247 |
+
text = generate(
|
| 248 |
+
model,
|
| 249 |
+
tokenizer,
|
| 250 |
+
input_ids,
|
| 251 |
+
steps=128,
|
| 252 |
+
max_new_tokens=max_new_tokens,
|
| 253 |
+
block_size=32,
|
| 254 |
+
temperature=0.0,
|
| 255 |
+
cfg_scale=0.0,
|
| 256 |
+
remasking="low_confidence",
|
| 257 |
+
)
|
| 258 |
|
| 259 |
+
new_tokens = [text[i, prompt_lens[i] : prompt_lens[i] + max_new_tokens].tolist() for i in range(len(prompt_lens))]
|
| 260 |
+
for idx, decoded in enumerate(tokenizer.batch_decode(new_tokens, skip_special_tokens=False)):
|
| 261 |
+
print(f"\n[Sample {idx}]")
|
| 262 |
+
print(decoded)
|
| 263 |
```
|
| 264 |
|
| 265 |
## Generation Parameters
|
| 266 |
|
| 267 |
| Parameter | Description | Default |
|
| 268 |
| ---------------- | ---------------------------------------------------------------------------------------------- | -------- |
|
| 269 |
+
| `max_new_tokens` | Number of tokens to generate | 128 |
|
| 270 |
+
| `steps` | Number of diffusion denoising iterations | 128 |
|
| 271 |
| `temperature` | Sampling temperature; set to `0.0` for deterministic generation | 0.0 |
|
| 272 |
| `block_size` | Token block size used during iterative denoising | 32 |
|
| 273 |
| `cfg_scale` | Classifier-free guidance scale controlling instruction adherence (higher = more deterministic) | 0.0 |
|
|
|
|
| 280 |
```shell
|
| 281 |
python -u examples/a2d/bd3lm/chat.py \
|
| 282 |
--model_name_or_path dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1 \
|
| 283 |
+
--chat_template True
|
| 284 |
```
|
| 285 |
|
| 286 |
## Evaluation
|
|
|
|
| 323 |
|
| 324 |
To automatically evaluate Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1 on all benchmarks, run:
|
| 325 |
```shell
|
| 326 |
+
bash examples/a2d/bd3lm/eval.sh \
|
| 327 |
+
--model_type coder \
|
| 328 |
--model_name_or_path dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1
|
| 329 |
```
|
| 330 |
|