OnAnOrange commited on
Commit
d6f109e
·
verified ·
1 Parent(s): d412e98

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +68 -23
README.md CHANGED
@@ -124,19 +124,33 @@ def generate(
124
  ):
125
  device = model.device
126
  mask_id = tokenizer.mask_token_id
127
- bos_id = tokenizer.bos_token_id
128
  pad_id = tokenizer.pad_token_id
 
 
129
 
130
- prompt = torch.tensor(prompt, device=device).long()
131
- B = 1
132
- T0 = len(prompt)
133
- x = prompt
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  num_blocks = math.ceil(max_new_tokens / block_size)
136
  steps_per_block = math.ceil(steps / num_blocks)
137
  generated = 0
138
 
139
  while generated < max_new_tokens:
 
 
140
  T_prefix = x.size(1)
141
  offset = T_prefix % block_size
142
  room = block_size if offset == 0 else block_size - offset
@@ -148,7 +162,6 @@ def generate(
148
 
149
  out = model(x, attention_mask=attn_pfx, position_ids=pos_pfx, use_cache=True)
150
  cond_past = out.past_key_values
151
- prefix_logits = out.logits[:, -1:, :]
152
 
153
  if cfg_scale > 0:
154
  un_x = x.clone()
@@ -159,6 +172,7 @@ def generate(
159
  uncond_past = None
160
 
161
  block = torch.full((B, cur_len), mask_id, device=device, dtype=torch.long)
 
162
  x = torch.cat([x, block], dim=1)
163
  T_total = x.size(1)
164
 
@@ -191,39 +205,69 @@ def generate(
191
  logits, x_blk, m_blk, num_transfer[:, t], temperature, remasking
192
  )
193
  x[:, T_prefix:T_total] = x_blk_new
194
-
195
- if (x_blk_new == tokenizer.eos_token_id).any():
196
- break
 
197
 
198
  generated += cur_len
 
 
199
 
200
  return x
201
 
202
 
203
- device = "cuda"
204
  model = AutoModelForMaskedLM.from_pretrained("dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1", dtype=torch.bfloat16, trust_remote_code=True).to(device).eval()
205
  tokenizer = AutoTokenizer.from_pretrained("dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1", trust_remote_code=True)
206
 
207
- prompt = "Lily can run 12 kilometers per hour for 4 hours. After that, she runs 6 kilometers per hour. How many kilometers can she run in 8 hours?"
208
- m = [
209
- {"role": "system", "content": "You are a helpful AI assistant."},
210
- {"role": "user", "content": prompt}
 
 
 
 
 
211
  ]
212
- prompt = tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=False)
213
 
214
- input_ids = tokenizer(prompt)["input_ids"]
215
- input_ids = torch.tensor(input_ids).to(device).unsqueeze(0)
216
- text = generate(model,tokenizer, input_ids, steps=256, max_new_tokens=256, block_size=32, temperature=0.0, cfg_scale=0.0, remasking="low_confidence")
217
- print(tokenizer.batch_decode(text[:, input_ids.shape[1]:], skip_special_tokens=False)[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
 
 
 
 
219
  ```
220
 
221
  ## Generation Parameters
222
 
223
  | Parameter | Description | Default |
224
  | ---------------- | ---------------------------------------------------------------------------------------------- | -------- |
225
- | `max_new_tokens` | Number of tokens to generate | 256 |
226
- | `steps` | Number of diffusion denoising iterations | 256 |
227
  | `temperature` | Sampling temperature; set to `0.0` for deterministic generation | 0.0 |
228
  | `block_size` | Token block size used during iterative denoising | 32 |
229
  | `cfg_scale` | Classifier-free guidance scale controlling instruction adherence (higher = more deterministic) | 0.0 |
@@ -236,7 +280,7 @@ Follow the Github repo's demo script [examples/a2d/bd3lm/chat.py](https://github
236
  ```shell
237
  python -u examples/a2d/bd3lm/chat.py \
238
  --model_name_or_path dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1 \
239
- --chat True
240
  ```
241
 
242
  ## Evaluation
@@ -279,7 +323,8 @@ python -u examples/a2d/bd3lm/chat.py \
279
 
280
  To automatically evaluate Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1 on all benchmarks, run:
281
  ```shell
282
- bash examples/a2d/eval_bd3lm.sh \
 
283
  --model_name_or_path dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1
284
  ```
285
 
 
124
  ):
125
  device = model.device
126
  mask_id = tokenizer.mask_token_id
 
127
  pad_id = tokenizer.pad_token_id
128
+ if pad_id is None:
129
+ pad_id = tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.mask_token_id
130
 
131
+ if isinstance(prompt, torch.Tensor):
132
+ x = prompt.to(device).long()
133
+ else:
134
+ if isinstance(prompt[0], (list, tuple)):
135
+ max_len = max(len(p) for p in prompt)
136
+ x = torch.full((len(prompt), max_len), pad_id, device=device, dtype=torch.long)
137
+ for i, p in enumerate(prompt):
138
+ x[i, : len(p)] = torch.tensor(p, device=device)
139
+ else:
140
+ x = torch.tensor(prompt, device=device).long()
141
+ if x.dim() == 1:
142
+ x = x.unsqueeze(0)
143
+
144
+ B = x.size(0)
145
+ finished = torch.zeros(B, dtype=torch.bool, device=device)
146
 
147
  num_blocks = math.ceil(max_new_tokens / block_size)
148
  steps_per_block = math.ceil(steps / num_blocks)
149
  generated = 0
150
 
151
  while generated < max_new_tokens:
152
+ if finished.all():
153
+ break
154
  T_prefix = x.size(1)
155
  offset = T_prefix % block_size
156
  room = block_size if offset == 0 else block_size - offset
 
162
 
163
  out = model(x, attention_mask=attn_pfx, position_ids=pos_pfx, use_cache=True)
164
  cond_past = out.past_key_values
 
165
 
166
  if cfg_scale > 0:
167
  un_x = x.clone()
 
172
  uncond_past = None
173
 
174
  block = torch.full((B, cur_len), mask_id, device=device, dtype=torch.long)
175
+ block[finished] = pad_id
176
  x = torch.cat([x, block], dim=1)
177
  T_total = x.size(1)
178
 
 
205
  logits, x_blk, m_blk, num_transfer[:, t], temperature, remasking
206
  )
207
  x[:, T_prefix:T_total] = x_blk_new
208
+ if tokenizer.eos_token_id is not None:
209
+ finished |= (x_blk_new == tokenizer.eos_token_id).any(dim=1)
210
+ if finished.all():
211
+ break
212
 
213
  generated += cur_len
214
+ if finished.all():
215
+ break
216
 
217
  return x
218
 
219
 
220
+ device = "cuda" if torch.cuda.is_available() else "cpu"
221
  model = AutoModelForMaskedLM.from_pretrained("dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1", dtype=torch.bfloat16, trust_remote_code=True).to(device).eval()
222
  tokenizer = AutoTokenizer.from_pretrained("dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1", trust_remote_code=True)
223
 
224
+ prompts = [
225
+ [
226
+ {"role": "system", "content": "You are a helpful AI assistant."},
227
+ {"role": "user", "content": "Implement a BFS traversal in Python with clear inline comments."},
228
+ ],
229
+ [
230
+ {"role": "system", "content": "You are a helpful AI assistant."},
231
+ {"role": "user", "content": "Write a concise pytest that checks a Fibonacci implementation."},
232
+ ],
233
  ]
 
234
 
235
+ encoded = [tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=True) for m in prompts]
236
+ prompt_lens = [len(e) for e in encoded]
237
+ max_len = max(prompt_lens)
238
+ pad_id = tokenizer.pad_token_id
239
+ if pad_id is None:
240
+ pad_id = tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.mask_token_id
241
+ input_ids = torch.full((len(encoded), max_len), pad_id, dtype=torch.long)
242
+ for i, ids in enumerate(encoded):
243
+ input_ids[i, : len(ids)] = torch.tensor(ids, dtype=torch.long)
244
+ input_ids = input_ids.to(device)
245
+
246
+ max_new_tokens = 128
247
+ text = generate(
248
+ model,
249
+ tokenizer,
250
+ input_ids,
251
+ steps=128,
252
+ max_new_tokens=max_new_tokens,
253
+ block_size=32,
254
+ temperature=0.0,
255
+ cfg_scale=0.0,
256
+ remasking="low_confidence",
257
+ )
258
 
259
+ new_tokens = [text[i, prompt_lens[i] : prompt_lens[i] + max_new_tokens].tolist() for i in range(len(prompt_lens))]
260
+ for idx, decoded in enumerate(tokenizer.batch_decode(new_tokens, skip_special_tokens=False)):
261
+ print(f"\n[Sample {idx}]")
262
+ print(decoded)
263
  ```
264
 
265
  ## Generation Parameters
266
 
267
  | Parameter | Description | Default |
268
  | ---------------- | ---------------------------------------------------------------------------------------------- | -------- |
269
+ | `max_new_tokens` | Number of tokens to generate | 128 |
270
+ | `steps` | Number of diffusion denoising iterations | 128 |
271
  | `temperature` | Sampling temperature; set to `0.0` for deterministic generation | 0.0 |
272
  | `block_size` | Token block size used during iterative denoising | 32 |
273
  | `cfg_scale` | Classifier-free guidance scale controlling instruction adherence (higher = more deterministic) | 0.0 |
 
280
  ```shell
281
  python -u examples/a2d/bd3lm/chat.py \
282
  --model_name_or_path dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1 \
283
+ --chat_template True
284
  ```
285
 
286
  ## Evaluation
 
323
 
324
  To automatically evaluate Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1 on all benchmarks, run:
325
  ```shell
326
+ bash examples/a2d/bd3lm/eval.sh \
327
+ --model_type coder \
328
  --model_name_or_path dllm-collection/Qwen2.5-Coder-0.5B-Instruct-diffusion-bd3lm-v0.1
329
  ```
330