Spaces:

Antigma
/

Abliteration

Paused

App Files Files Community

Brianpuz commited on Jul 1, 2025

Commit

ece0734

1 Parent(s): 1eeb055

try

Browse files

Files changed (1) hide show

app.py +25 -32

app.py CHANGED Viewed

@@ -316,28 +316,38 @@ class AbliterationProcessor:
                 return_tensors="pt"
             )
-            # Generate response without streaming for now (will be handled by Gradio)
             gen = self.model.generate(
                 toks.to(self.model.device),
                 max_new_tokens=2048,
                 temperature=0.7,
                 do_sample=True,
-                pad_token_id=self.tokenizer.eos_token_id
-            )
-            # Decode response
-            decoded = self.tokenizer.batch_decode(
-                gen[0][len(toks[0]):],
-                skip_special_tokens=True
             )
-            response = "".join(decoded).strip()
             return response, history + [[message, response]]
         except Exception as e:
             return f"❌ Chat error: {str(e)}", history
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
     """Create new model card"""
@@ -530,33 +540,16 @@ def create_interface():
         def bot(history):
             if history and history[-1]["role"] == "user":
-                # Start with empty assistant message
-                history.append({"role": "assistant", "content": ""})
-                # Get the full response
-                response, _ = processor.chat(history[-2]["content"], history[:-2])
-                # Update the assistant message with the full response
-                history[-1]["content"] = response
-            return history
-        def bot_stream(history):
-            if history and history[-1]["role"] == "user":
-                # Get the full response first
                 response, _ = processor.chat(history[-1]["content"], history[:-1])
-                # Simulate streaming by yielding partial responses character by character
-                partial_response = ""
-                for char in response:
-                    partial_response += char
-                    yield history + [{"role": "assistant", "content": partial_response}]
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-            bot_stream, chatbot, chatbot
         )
         send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-            bot_stream, chatbot, chatbot
         )
         clear.click(lambda: [], None, chatbot, queue=False)

                 return_tensors="pt"
             )
+            # Generate response with streaming like abliterated_optimized.py
+            from transformers import TextStreamer
+            # Create a custom streamer that captures all output
+            captured_output = []
+            class CustomStreamer(TextStreamer):
+                def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
+                    super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
+                    self.captured = []
+                def on_finalized_text(self, text: str, stream_end: bool = False):
+                    self.captured.append(text)
+                    super().on_finalized_text(text, stream_end)
+            streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
             gen = self.model.generate(
                 toks.to(self.model.device),
                 max_new_tokens=2048,
                 temperature=0.7,
                 do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id,
+                streamer=streamer
             )
+            # Get the complete response from streamer
+            response = "".join(streamer.captured).strip()
             return response, history + [[message, response]]
         except Exception as e:
             return f"❌ Chat error: {str(e)}", history
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
     """Create new model card"""
         def bot(history):
             if history and history[-1]["role"] == "user":
                 response, _ = processor.chat(history[-1]["content"], history[:-1])
+                history.append({"role": "assistant", "content": response})
+            return history
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot, chatbot, chatbot
         )
         send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot, chatbot, chatbot
         )
         clear.click(lambda: [], None, chatbot, queue=False)