Spaces:

Antigma
/

Abliteration

Paused

App Files Files Community

Brianpuz commited on Jul 1, 2025

Commit

8c92c76

1 Parent(s): fbb861f

stream

Browse files

Files changed (1) hide show

app.py +80 -18

app.py CHANGED Viewed

@@ -316,23 +316,69 @@ class AbliterationProcessor:
                 return_tensors="pt"
             )
-            # Generate response with streaming like abliterated_optimized.py
-            from transformers import TextStreamer
-            # Create a custom streamer that captures all output
-            captured_output = []
-            class CustomStreamer(TextStreamer):
                 def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
                     super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
-                    self.captured = []
                 def on_finalized_text(self, text: str, stream_end: bool = False):
-                    self.captured.append(text)
-                    super().on_finalized_text(text, stream_end)
-            streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
             gen = self.model.generate(
                 toks.to(self.model.device),
                 max_new_tokens=2048,
@@ -342,12 +388,12 @@ class AbliterationProcessor:
                 streamer=streamer
             )
-            # Get the complete response from streamer
-            response = "".join(streamer.captured).strip()
-            return response, history + [[message, response]]
         except Exception as e:
-            return f"❌ Chat error: {str(e)}", history
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
     """Create new model card"""
@@ -540,16 +586,32 @@ def create_interface():
         def bot(history):
             if history and history[-1]["role"] == "user":
-                response, _ = processor.chat(history[-1]["content"], history[:-1])
-                history.append({"role": "assistant", "content": response})
             return history
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-            bot, chatbot, chatbot
         )
         send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-            bot, chatbot, chatbot
         )
         clear.click(lambda: [], None, chatbot, queue=False)

                 return_tensors="pt"
             )
+            # Generate response without streaming for now (will be handled by Gradio)
+            gen = self.model.generate(
+                toks.to(self.model.device),
+                max_new_tokens=2048,
+                temperature=0.7,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+            # Decode response
+            decoded = self.tokenizer.batch_decode(
+                gen[0][len(toks[0]):],
+                skip_special_tokens=True
+            )
+            response = "".join(decoded).strip()
+            return response, history + [[message, response]]
+        except Exception as e:
+            return f"❌ Chat error: {str(e)}", history
+    def chat_stream(self, message, history):
+        """Streaming chat functionality"""
+        if self.model is None or self.tokenizer is None:
+            yield "⚠️ Please load a model first!"
+            return
+        try:
+            # Build conversation history
+            conversation = []
+            for msg in history:
+                if isinstance(msg, dict) and "role" in msg and "content" in msg:
+                    conversation.append(msg)
+                elif isinstance(msg, list) and len(msg) == 2:
+                    conversation.append({"role": "user", "content": msg[0]})
+                    if msg[1]:
+                        conversation.append({"role": "assistant", "content": msg[1]})
+            # Add current message
+            conversation.append({"role": "user", "content": message})
+            # Generate tokens
+            toks = self.tokenizer.apply_chat_template(
+                conversation=conversation,
+                add_generation_prompt=True,
+                return_tensors="pt"
+            )
+            # Stream response
+            from transformers import TextStreamer
+            class StreamingTextStreamer(TextStreamer):
                 def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
                     super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
+                    self.current_text = ""
                 def on_finalized_text(self, text: str, stream_end: bool = False):
+                    self.current_text += text
+                    yield self.current_text
+            streamer = StreamingTextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
+            # Generate with streaming
             gen = self.model.generate(
                 toks.to(self.model.device),
                 max_new_tokens=2048,
                 streamer=streamer
             )
+            # Yield each chunk
+            for chunk in streamer.on_finalized_text("", False):
+                yield chunk
         except Exception as e:
+            yield f"❌ Chat error: {str(e)}"
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
     """Create new model card"""
         def bot(history):
             if history and history[-1]["role"] == "user":
+                # Start with empty assistant message
+                history.append({"role": "assistant", "content": ""})
+                # Get the full response
+                response, _ = processor.chat(history[-2]["content"], history[:-2])
+                # Update the assistant message with the full response
+                history[-1]["content"] = response
             return history
+        def bot_stream(history):
+            if history and history[-1]["role"] == "user":
+                # Start with empty assistant message
+                history.append({"role": "assistant", "content": ""})
+                # Get streaming response
+                for response_chunk in processor.chat_stream(history[-2]["content"], history[:-2]):
+                    history[-1]["content"] = response_chunk
+                    yield history
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot_stream, chatbot, chatbot
         )
         send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot_stream, chatbot, chatbot
         )
         clear.click(lambda: [], None, chatbot, queue=False)