Spaces:

Antigma
/

Abliteration

Paused

App Files Files Community

Brianpuz commited on Jul 1, 2025

Commit

1eeb055

1 Parent(s): 8c92c76

stream fix

Browse files

Files changed (1) hide show

app.py +8 -63

app.py CHANGED Viewed

@@ -337,63 +337,7 @@ class AbliterationProcessor:
         except Exception as e:
             return f"❌ Chat error: {str(e)}", history
-    def chat_stream(self, message, history):
-        """Streaming chat functionality"""
-        if self.model is None or self.tokenizer is None:
-            yield "⚠️ Please load a model first!"
-            return
-        try:
-            # Build conversation history
-            conversation = []
-            for msg in history:
-                if isinstance(msg, dict) and "role" in msg and "content" in msg:
-                    conversation.append(msg)
-                elif isinstance(msg, list) and len(msg) == 2:
-                    conversation.append({"role": "user", "content": msg[0]})
-                    if msg[1]:
-                        conversation.append({"role": "assistant", "content": msg[1]})
-            # Add current message
-            conversation.append({"role": "user", "content": message})
-            # Generate tokens
-            toks = self.tokenizer.apply_chat_template(
-                conversation=conversation,
-                add_generation_prompt=True,
-                return_tensors="pt"
-            )
-            # Stream response
-            from transformers import TextStreamer
-            class StreamingTextStreamer(TextStreamer):
-                def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
-                    super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
-                    self.current_text = ""
-                def on_finalized_text(self, text: str, stream_end: bool = False):
-                    self.current_text += text
-                    yield self.current_text
-            streamer = StreamingTextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
-            # Generate with streaming
-            gen = self.model.generate(
-                toks.to(self.model.device),
-                max_new_tokens=2048,
-                temperature=0.7,
-                do_sample=True,
-                pad_token_id=self.tokenizer.eos_token_id,
-                streamer=streamer
-            )
-            # Yield each chunk
-            for chunk in streamer.on_finalized_text("", False):
-                yield chunk
-        except Exception as e:
-            yield f"❌ Chat error: {str(e)}"
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
     """Create new model card"""
@@ -598,13 +542,14 @@ def create_interface():
         def bot_stream(history):
             if history and history[-1]["role"] == "user":
-                # Start with empty assistant message
-                history.append({"role": "assistant", "content": ""})
-                # Get streaming response
-                for response_chunk in processor.chat_stream(history[-2]["content"], history[:-2]):
-                    history[-1]["content"] = response_chunk
-                    yield history
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
             bot_stream, chatbot, chatbot

         except Exception as e:
             return f"❌ Chat error: {str(e)}", history
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
     """Create new model card"""
         def bot_stream(history):
             if history and history[-1]["role"] == "user":
+                # Get the full response first
+                response, _ = processor.chat(history[-1]["content"], history[:-1])
+                # Simulate streaming by yielding partial responses character by character
+                partial_response = ""
+                for char in response:
+                    partial_response += char
+                    yield history + [{"role": "assistant", "content": partial_response}]
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
             bot_stream, chatbot, chatbot