Commit ·
4eec729
1
Parent(s): aa68cd8
fixed stream
Browse files
app.py
CHANGED
|
@@ -75,6 +75,9 @@ def chat_fn(message, history, temperature, top_p, top_k):
|
|
| 75 |
"<assistant>\n"
|
| 76 |
)
|
| 77 |
|
|
|
|
|
|
|
|
|
|
| 78 |
for chunk in vlm_infer_stream(
|
| 79 |
model=model_gpu,
|
| 80 |
image_tensor=image_tensor,
|
|
@@ -84,13 +87,15 @@ def chat_fn(message, history, temperature, top_p, top_k):
|
|
| 84 |
top_p=top_p if top_p > 0 else None,
|
| 85 |
top_k=top_k if top_k > 0 else None,
|
| 86 |
):
|
| 87 |
-
|
|
|
|
| 88 |
|
| 89 |
model_gpu.to("cpu")
|
| 90 |
torch.cuda.empty_cache()
|
| 91 |
|
| 92 |
|
| 93 |
|
|
|
|
| 94 |
# =====================================================
|
| 95 |
# UI (ChatInterface, multimodal)
|
| 96 |
# =====================================================
|
|
|
|
| 75 |
"<assistant>\n"
|
| 76 |
)
|
| 77 |
|
| 78 |
+
# ★ ここが重要:累積して yield
|
| 79 |
+
output = ""
|
| 80 |
+
|
| 81 |
for chunk in vlm_infer_stream(
|
| 82 |
model=model_gpu,
|
| 83 |
image_tensor=image_tensor,
|
|
|
|
| 87 |
top_p=top_p if top_p > 0 else None,
|
| 88 |
top_k=top_k if top_k > 0 else None,
|
| 89 |
):
|
| 90 |
+
output += chunk
|
| 91 |
+
yield output
|
| 92 |
|
| 93 |
model_gpu.to("cpu")
|
| 94 |
torch.cuda.empty_cache()
|
| 95 |
|
| 96 |
|
| 97 |
|
| 98 |
+
|
| 99 |
# =====================================================
|
| 100 |
# UI (ChatInterface, multimodal)
|
| 101 |
# =====================================================
|