Spaces:
Runtime error
Runtime error
Update vlm.py
Browse files
vlm.py
CHANGED
|
@@ -87,7 +87,7 @@ def stream_response(messages: list[dict]):
|
|
| 87 |
generation_kwargs = dict(
|
| 88 |
inputs,
|
| 89 |
streamer=streamer,
|
| 90 |
-
max_new_tokens=
|
| 91 |
do_sample=False
|
| 92 |
)
|
| 93 |
|
|
@@ -120,7 +120,7 @@ def get_response(messages: list[dict]):
|
|
| 120 |
input_len = inputs["input_ids"].shape[-1]
|
| 121 |
|
| 122 |
with torch.inference_mode():
|
| 123 |
-
generation = model.generate(**inputs, max_new_tokens=
|
| 124 |
generation = generation[0][input_len:]
|
| 125 |
|
| 126 |
decoded = processor.decode(generation, skip_special_tokens=True)
|
|
|
|
| 87 |
generation_kwargs = dict(
|
| 88 |
inputs,
|
| 89 |
streamer=streamer,
|
| 90 |
+
max_new_tokens=2_048,
|
| 91 |
do_sample=False
|
| 92 |
)
|
| 93 |
|
|
|
|
| 120 |
input_len = inputs["input_ids"].shape[-1]
|
| 121 |
|
| 122 |
with torch.inference_mode():
|
| 123 |
+
generation = model.generate(**inputs, max_new_tokens=2_048, do_sample=False)
|
| 124 |
generation = generation[0][input_len:]
|
| 125 |
|
| 126 |
decoded = processor.decode(generation, skip_special_tokens=True)
|