Spaces:
Paused
Paused
stream fix
Browse files
app.py
CHANGED
|
@@ -337,63 +337,7 @@ class AbliterationProcessor:
|
|
| 337 |
except Exception as e:
|
| 338 |
return f"❌ Chat error: {str(e)}", history
|
| 339 |
|
| 340 |
-
|
| 341 |
-
"""Streaming chat functionality"""
|
| 342 |
-
if self.model is None or self.tokenizer is None:
|
| 343 |
-
yield "⚠️ Please load a model first!"
|
| 344 |
-
return
|
| 345 |
-
|
| 346 |
-
try:
|
| 347 |
-
# Build conversation history
|
| 348 |
-
conversation = []
|
| 349 |
-
for msg in history:
|
| 350 |
-
if isinstance(msg, dict) and "role" in msg and "content" in msg:
|
| 351 |
-
conversation.append(msg)
|
| 352 |
-
elif isinstance(msg, list) and len(msg) == 2:
|
| 353 |
-
conversation.append({"role": "user", "content": msg[0]})
|
| 354 |
-
if msg[1]:
|
| 355 |
-
conversation.append({"role": "assistant", "content": msg[1]})
|
| 356 |
-
|
| 357 |
-
# Add current message
|
| 358 |
-
conversation.append({"role": "user", "content": message})
|
| 359 |
-
|
| 360 |
-
# Generate tokens
|
| 361 |
-
toks = self.tokenizer.apply_chat_template(
|
| 362 |
-
conversation=conversation,
|
| 363 |
-
add_generation_prompt=True,
|
| 364 |
-
return_tensors="pt"
|
| 365 |
-
)
|
| 366 |
-
|
| 367 |
-
# Stream response
|
| 368 |
-
from transformers import TextStreamer
|
| 369 |
-
|
| 370 |
-
class StreamingTextStreamer(TextStreamer):
|
| 371 |
-
def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
|
| 372 |
-
super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
|
| 373 |
-
self.current_text = ""
|
| 374 |
-
|
| 375 |
-
def on_finalized_text(self, text: str, stream_end: bool = False):
|
| 376 |
-
self.current_text += text
|
| 377 |
-
yield self.current_text
|
| 378 |
-
|
| 379 |
-
streamer = StreamingTextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 380 |
-
|
| 381 |
-
# Generate with streaming
|
| 382 |
-
gen = self.model.generate(
|
| 383 |
-
toks.to(self.model.device),
|
| 384 |
-
max_new_tokens=2048,
|
| 385 |
-
temperature=0.7,
|
| 386 |
-
do_sample=True,
|
| 387 |
-
pad_token_id=self.tokenizer.eos_token_id,
|
| 388 |
-
streamer=streamer
|
| 389 |
-
)
|
| 390 |
-
|
| 391 |
-
# Yield each chunk
|
| 392 |
-
for chunk in streamer.on_finalized_text("", False):
|
| 393 |
-
yield chunk
|
| 394 |
-
|
| 395 |
-
except Exception as e:
|
| 396 |
-
yield f"❌ Chat error: {str(e)}"
|
| 397 |
|
| 398 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
| 399 |
"""Create new model card"""
|
|
@@ -598,13 +542,14 @@ def create_interface():
|
|
| 598 |
|
| 599 |
def bot_stream(history):
|
| 600 |
if history and history[-1]["role"] == "user":
|
| 601 |
-
#
|
| 602 |
-
|
| 603 |
|
| 604 |
-
#
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
|
|
|
| 608 |
|
| 609 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 610 |
bot_stream, chatbot, chatbot
|
|
|
|
| 337 |
except Exception as e:
|
| 338 |
return f"❌ Chat error: {str(e)}", history
|
| 339 |
|
| 340 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
|
| 342 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
| 343 |
"""Create new model card"""
|
|
|
|
| 542 |
|
| 543 |
def bot_stream(history):
|
| 544 |
if history and history[-1]["role"] == "user":
|
| 545 |
+
# Get the full response first
|
| 546 |
+
response, _ = processor.chat(history[-1]["content"], history[:-1])
|
| 547 |
|
| 548 |
+
# Simulate streaming by yielding partial responses character by character
|
| 549 |
+
partial_response = ""
|
| 550 |
+
for char in response:
|
| 551 |
+
partial_response += char
|
| 552 |
+
yield history + [{"role": "assistant", "content": partial_response}]
|
| 553 |
|
| 554 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 555 |
bot_stream, chatbot, chatbot
|