Spaces:

Antigma
/

Abliteration

Paused

App Files Files Community

Brianpuz commited on Jul 1, 2025

Commit

3d9243c

1 Parent(s): 2408556

add debug print

Browse files

Files changed (1) hide show

app.py +61 -28

app.py CHANGED Viewed

@@ -288,12 +288,18 @@ class AbliterationProcessor:
                 modified_weight = down_proj_weight - scale_factor * torch.matmul(projection_matrix, down_proj_weight)
                 layer.mlp.down_proj.weight.data = modified_weight
-    def chat(self, message, history, max_new_tokens=2048):
         """Chat functionality"""
         if self.model is None or self.tokenizer is None:
             return "⚠️ Please load a model first!", history
         try:
             # Build conversation history
             conversation = []
             for msg in history:
@@ -308,13 +314,16 @@ class AbliterationProcessor:
             # Add current message
             conversation.append({"role": "user", "content": message})
             # Generate tokens
             toks = self.tokenizer.apply_chat_template(
                 conversation=conversation,
                 add_generation_prompt=True,
                 return_tensors="pt"
             )
             # Generate response with streaming like abliterated_optimized.py
             from transformers import TextStreamer
@@ -328,25 +337,36 @@ class AbliterationProcessor:
                     self.captured = []
                 def on_finalized_text(self, text: str, stream_end: bool = False):
                     self.captured.append(text)
                     super().on_finalized_text(text, stream_end)
             streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
             gen = self.model.generate(
                 toks.to(self.model.device),
                 max_new_tokens=max_new_tokens,
-                temperature=0.7,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id,
                 streamer=streamer
             )
             # Get the complete response from streamer
             response = "".join(streamer.captured).strip()
             return response, history + [[message, response]]
         except Exception as e:
             return f"❌ Chat error: {str(e)}", history
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
@@ -472,16 +492,6 @@ def create_interface():
                         org_token.render()
                         private_repo.render()
-                        gr.Markdown("### 💬 Chat Settings")
-                        max_new_tokens = gr.Number(
-                            value=2048,
-                            label="Max New Tokens",
-                            minimum=1,
-                            maximum=8192,
-                            step=1,
-                            info="Maximum number of tokens to generate in chat responses"
-                        )
                         process_btn = gr.Button("🚀 Start Processing", variant="primary")
                         process_output = gr.Markdown(label="Processing Result")
                         process_image = gr.Image(show_label=False)
@@ -506,25 +516,48 @@ def create_interface():
             # Chat tab
             with gr.TabItem("💬 Chat Test"):
-                chatbot = gr.Chatbot(
-                    label="Chat Window",
-                    height=400,
-                    type="messages"
-                )
-                msg = gr.Textbox(
-                    label="Input Message",
-                    placeholder="Enter your question...",
-                    lines=3
-                )
                 with gr.Row():
-                    send_btn = gr.Button("📤 Send", variant="primary")
-                    clear = gr.Button("🗑️ Clear Chat")
                 gr.Markdown("""
                 **Usage Tips:**
                 - Load a model first, then you can start chatting
                 - The processed model will have reduced refusal behavior
                 - You can test various sensitive questions
                 """)
         # Bind events
@@ -548,18 +581,18 @@ def create_interface():
         def user(user_message, history):
             return "", history + [{"role": "user", "content": user_message}]
-        def bot(history, max_new_tokens):
             if history and history[-1]["role"] == "user":
-                response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens)
                 history.append({"role": "assistant", "content": response})
             return history
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-            bot, [chatbot, max_new_tokens], chatbot
         )
         send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-            bot, [chatbot, max_new_tokens], chatbot
         )
         clear.click(lambda: [], None, chatbot, queue=False)

                 modified_weight = down_proj_weight - scale_factor * torch.matmul(projection_matrix, down_proj_weight)
                 layer.mlp.down_proj.weight.data = modified_weight
+    def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
         """Chat functionality"""
+        print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
         if self.model is None or self.tokenizer is None:
+            print("DEBUG: Model or tokenizer not loaded")
             return "⚠️ Please load a model first!", history
         try:
+            print(f"DEBUG: Processing message: {message[:100]}...")
+            print(f"DEBUG: History length: {len(history)}")
             # Build conversation history
             conversation = []
             for msg in history:
             # Add current message
             conversation.append({"role": "user", "content": message})
+            print(f"DEBUG: Conversation length: {len(conversation)}")
             # Generate tokens
+            print("DEBUG: Generating tokens...")
             toks = self.tokenizer.apply_chat_template(
                 conversation=conversation,
                 add_generation_prompt=True,
                 return_tensors="pt"
             )
+            print(f"DEBUG: Input tokens shape: {toks.shape}")
             # Generate response with streaming like abliterated_optimized.py
             from transformers import TextStreamer
                     self.captured = []
                 def on_finalized_text(self, text: str, stream_end: bool = False):
+                    print(f"DEBUG: Streamer received text: '{text}' (stream_end={stream_end})")
                     self.captured.append(text)
                     super().on_finalized_text(text, stream_end)
             streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
+            print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
             gen = self.model.generate(
                 toks.to(self.model.device),
                 max_new_tokens=max_new_tokens,
+                temperature=temperature,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id,
                 streamer=streamer
             )
+            print(f"DEBUG: Generation completed, output shape: {gen.shape}")
+            print(f"DEBUG: Streamer captured {len(streamer.captured)} text chunks")
             # Get the complete response from streamer
             response = "".join(streamer.captured).strip()
+            print(f"DEBUG: Final response length: {len(response)}")
+            print(f"DEBUG: Response preview: {response[:200]}...")
             return response, history + [[message, response]]
         except Exception as e:
+            print(f"DEBUG: Exception occurred: {str(e)}")
+            import traceback
+            traceback.print_exc()
             return f"❌ Chat error: {str(e)}", history
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
                         org_token.render()
                         private_repo.render()
                         process_btn = gr.Button("🚀 Start Processing", variant="primary")
                         process_output = gr.Markdown(label="Processing Result")
                         process_image = gr.Image(show_label=False)
             # Chat tab
             with gr.TabItem("💬 Chat Test"):
                 with gr.Row():
+                    with gr.Column(scale=3):
+                        chatbot = gr.Chatbot(
+                            label="Chat Window",
+                            height=400,
+                            type="messages"
+                        )
+                        msg = gr.Textbox(
+                            label="Input Message",
+                            placeholder="Enter your question...",
+                            lines=3
+                        )
+                        with gr.Row():
+                            send_btn = gr.Button("📤 Send", variant="primary")
+                            clear = gr.Button("🗑️ Clear Chat")
+                    with gr.Column(scale=1):
+                        gr.Markdown("### ⚙️ Chat Settings")
+                        max_new_tokens = gr.Number(
+                            value=2048,
+                            label="Max New Tokens",
+                            minimum=1,
+                            maximum=8192,
+                            step=1,
+                            info="Maximum number of tokens to generate"
+                        )
+                        temperature = gr.Slider(
+                            minimum=0.1,
+                            maximum=2.0,
+                            value=0.7,
+                            step=0.1,
+                            label="Temperature",
+                            info="Higher values = more creative, Lower values = more focused"
+                        )
                 gr.Markdown("""
                 **Usage Tips:**
                 - Load a model first, then you can start chatting
                 - The processed model will have reduced refusal behavior
                 - You can test various sensitive questions
+                - Adjust Max New Tokens to control response length
+                - Adjust Temperature to control creativity
                 """)
         # Bind events
         def user(user_message, history):
             return "", history + [{"role": "user", "content": user_message}]
+        def bot(history, max_new_tokens, temperature):
             if history and history[-1]["role"] == "user":
+                response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
                 history.append({"role": "assistant", "content": response})
             return history
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot, [chatbot, max_new_tokens, temperature], chatbot
         )
         send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot, [chatbot, max_new_tokens, temperature], chatbot
         )
         clear.click(lambda: [], None, chatbot, queue=False)