Update vlm_inference.py
Browse files- vlm_inference.py +0 -5
vlm_inference.py
CHANGED
|
@@ -166,10 +166,6 @@ def vlm_infer_stream(
|
|
| 166 |
logits = model.llm.vocab_projection(x)[:, -1, :] / temperature
|
| 167 |
probs = F.softmax(logits, dim=-1)
|
| 168 |
next_token = torch.multinomial(probs, 1)
|
| 169 |
-
if int(next_token.item()) in stop_ids:
|
| 170 |
-
print("DEBUG first token in stop_ids")
|
| 171 |
-
yield "[EOS]"
|
| 172 |
-
return
|
| 173 |
|
| 174 |
acc, last = [], ""
|
| 175 |
|
|
@@ -177,7 +173,6 @@ def vlm_infer_stream(
|
|
| 177 |
# sampled from prefill
|
| 178 |
tid = int(next_token.item())
|
| 179 |
if tid in stop_ids:
|
| 180 |
-
yield "[EOS]"
|
| 181 |
break
|
| 182 |
|
| 183 |
|
|
|
|
| 166 |
logits = model.llm.vocab_projection(x)[:, -1, :] / temperature
|
| 167 |
probs = F.softmax(logits, dim=-1)
|
| 168 |
next_token = torch.multinomial(probs, 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
acc, last = [], ""
|
| 171 |
|
|
|
|
| 173 |
# sampled from prefill
|
| 174 |
tid = int(next_token.item())
|
| 175 |
if tid in stop_ids:
|
|
|
|
| 176 |
break
|
| 177 |
|
| 178 |
|