Spaces:
Paused
Paused
add debug print
Browse files
app.py
CHANGED
|
@@ -288,12 +288,18 @@ class AbliterationProcessor:
|
|
| 288 |
modified_weight = down_proj_weight - scale_factor * torch.matmul(projection_matrix, down_proj_weight)
|
| 289 |
layer.mlp.down_proj.weight.data = modified_weight
|
| 290 |
|
| 291 |
-
def chat(self, message, history, max_new_tokens=2048):
|
| 292 |
"""Chat functionality"""
|
|
|
|
|
|
|
| 293 |
if self.model is None or self.tokenizer is None:
|
|
|
|
| 294 |
return "⚠️ Please load a model first!", history
|
| 295 |
|
| 296 |
try:
|
|
|
|
|
|
|
|
|
|
| 297 |
# Build conversation history
|
| 298 |
conversation = []
|
| 299 |
for msg in history:
|
|
@@ -308,13 +314,16 @@ class AbliterationProcessor:
|
|
| 308 |
|
| 309 |
# Add current message
|
| 310 |
conversation.append({"role": "user", "content": message})
|
|
|
|
| 311 |
|
| 312 |
# Generate tokens
|
|
|
|
| 313 |
toks = self.tokenizer.apply_chat_template(
|
| 314 |
conversation=conversation,
|
| 315 |
add_generation_prompt=True,
|
| 316 |
return_tensors="pt"
|
| 317 |
)
|
|
|
|
| 318 |
|
| 319 |
# Generate response with streaming like abliterated_optimized.py
|
| 320 |
from transformers import TextStreamer
|
|
@@ -328,25 +337,36 @@ class AbliterationProcessor:
|
|
| 328 |
self.captured = []
|
| 329 |
|
| 330 |
def on_finalized_text(self, text: str, stream_end: bool = False):
|
|
|
|
| 331 |
self.captured.append(text)
|
| 332 |
super().on_finalized_text(text, stream_end)
|
| 333 |
|
| 334 |
streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 335 |
|
|
|
|
| 336 |
gen = self.model.generate(
|
| 337 |
toks.to(self.model.device),
|
| 338 |
max_new_tokens=max_new_tokens,
|
| 339 |
-
temperature=
|
| 340 |
do_sample=True,
|
| 341 |
pad_token_id=self.tokenizer.eos_token_id,
|
| 342 |
streamer=streamer
|
| 343 |
)
|
| 344 |
|
|
|
|
|
|
|
|
|
|
| 345 |
# Get the complete response from streamer
|
| 346 |
response = "".join(streamer.captured).strip()
|
|
|
|
|
|
|
|
|
|
| 347 |
return response, history + [[message, response]]
|
| 348 |
|
| 349 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
| 350 |
return f"❌ Chat error: {str(e)}", history
|
| 351 |
|
| 352 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
|
@@ -472,16 +492,6 @@ def create_interface():
|
|
| 472 |
org_token.render()
|
| 473 |
private_repo.render()
|
| 474 |
|
| 475 |
-
gr.Markdown("### 💬 Chat Settings")
|
| 476 |
-
max_new_tokens = gr.Number(
|
| 477 |
-
value=2048,
|
| 478 |
-
label="Max New Tokens",
|
| 479 |
-
minimum=1,
|
| 480 |
-
maximum=8192,
|
| 481 |
-
step=1,
|
| 482 |
-
info="Maximum number of tokens to generate in chat responses"
|
| 483 |
-
)
|
| 484 |
-
|
| 485 |
process_btn = gr.Button("🚀 Start Processing", variant="primary")
|
| 486 |
process_output = gr.Markdown(label="Processing Result")
|
| 487 |
process_image = gr.Image(show_label=False)
|
|
@@ -506,25 +516,48 @@ def create_interface():
|
|
| 506 |
|
| 507 |
# Chat tab
|
| 508 |
with gr.TabItem("💬 Chat Test"):
|
| 509 |
-
chatbot = gr.Chatbot(
|
| 510 |
-
label="Chat Window",
|
| 511 |
-
height=400,
|
| 512 |
-
type="messages"
|
| 513 |
-
)
|
| 514 |
-
msg = gr.Textbox(
|
| 515 |
-
label="Input Message",
|
| 516 |
-
placeholder="Enter your question...",
|
| 517 |
-
lines=3
|
| 518 |
-
)
|
| 519 |
with gr.Row():
|
| 520 |
-
|
| 521 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
|
| 523 |
gr.Markdown("""
|
| 524 |
**Usage Tips:**
|
| 525 |
- Load a model first, then you can start chatting
|
| 526 |
- The processed model will have reduced refusal behavior
|
| 527 |
- You can test various sensitive questions
|
|
|
|
|
|
|
| 528 |
""")
|
| 529 |
|
| 530 |
# Bind events
|
|
@@ -548,18 +581,18 @@ def create_interface():
|
|
| 548 |
def user(user_message, history):
|
| 549 |
return "", history + [{"role": "user", "content": user_message}]
|
| 550 |
|
| 551 |
-
def bot(history, max_new_tokens):
|
| 552 |
if history and history[-1]["role"] == "user":
|
| 553 |
-
response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens)
|
| 554 |
history.append({"role": "assistant", "content": response})
|
| 555 |
return history
|
| 556 |
|
| 557 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 558 |
-
bot, [chatbot, max_new_tokens], chatbot
|
| 559 |
)
|
| 560 |
|
| 561 |
send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 562 |
-
bot, [chatbot, max_new_tokens], chatbot
|
| 563 |
)
|
| 564 |
|
| 565 |
clear.click(lambda: [], None, chatbot, queue=False)
|
|
|
|
| 288 |
modified_weight = down_proj_weight - scale_factor * torch.matmul(projection_matrix, down_proj_weight)
|
| 289 |
layer.mlp.down_proj.weight.data = modified_weight
|
| 290 |
|
| 291 |
+
def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
|
| 292 |
"""Chat functionality"""
|
| 293 |
+
print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
| 294 |
+
|
| 295 |
if self.model is None or self.tokenizer is None:
|
| 296 |
+
print("DEBUG: Model or tokenizer not loaded")
|
| 297 |
return "⚠️ Please load a model first!", history
|
| 298 |
|
| 299 |
try:
|
| 300 |
+
print(f"DEBUG: Processing message: {message[:100]}...")
|
| 301 |
+
print(f"DEBUG: History length: {len(history)}")
|
| 302 |
+
|
| 303 |
# Build conversation history
|
| 304 |
conversation = []
|
| 305 |
for msg in history:
|
|
|
|
| 314 |
|
| 315 |
# Add current message
|
| 316 |
conversation.append({"role": "user", "content": message})
|
| 317 |
+
print(f"DEBUG: Conversation length: {len(conversation)}")
|
| 318 |
|
| 319 |
# Generate tokens
|
| 320 |
+
print("DEBUG: Generating tokens...")
|
| 321 |
toks = self.tokenizer.apply_chat_template(
|
| 322 |
conversation=conversation,
|
| 323 |
add_generation_prompt=True,
|
| 324 |
return_tensors="pt"
|
| 325 |
)
|
| 326 |
+
print(f"DEBUG: Input tokens shape: {toks.shape}")
|
| 327 |
|
| 328 |
# Generate response with streaming like abliterated_optimized.py
|
| 329 |
from transformers import TextStreamer
|
|
|
|
| 337 |
self.captured = []
|
| 338 |
|
| 339 |
def on_finalized_text(self, text: str, stream_end: bool = False):
|
| 340 |
+
print(f"DEBUG: Streamer received text: '{text}' (stream_end={stream_end})")
|
| 341 |
self.captured.append(text)
|
| 342 |
super().on_finalized_text(text, stream_end)
|
| 343 |
|
| 344 |
streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 345 |
|
| 346 |
+
print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
| 347 |
gen = self.model.generate(
|
| 348 |
toks.to(self.model.device),
|
| 349 |
max_new_tokens=max_new_tokens,
|
| 350 |
+
temperature=temperature,
|
| 351 |
do_sample=True,
|
| 352 |
pad_token_id=self.tokenizer.eos_token_id,
|
| 353 |
streamer=streamer
|
| 354 |
)
|
| 355 |
|
| 356 |
+
print(f"DEBUG: Generation completed, output shape: {gen.shape}")
|
| 357 |
+
print(f"DEBUG: Streamer captured {len(streamer.captured)} text chunks")
|
| 358 |
+
|
| 359 |
# Get the complete response from streamer
|
| 360 |
response = "".join(streamer.captured).strip()
|
| 361 |
+
print(f"DEBUG: Final response length: {len(response)}")
|
| 362 |
+
print(f"DEBUG: Response preview: {response[:200]}...")
|
| 363 |
+
|
| 364 |
return response, history + [[message, response]]
|
| 365 |
|
| 366 |
except Exception as e:
|
| 367 |
+
print(f"DEBUG: Exception occurred: {str(e)}")
|
| 368 |
+
import traceback
|
| 369 |
+
traceback.print_exc()
|
| 370 |
return f"❌ Chat error: {str(e)}", history
|
| 371 |
|
| 372 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
|
|
|
| 492 |
org_token.render()
|
| 493 |
private_repo.render()
|
| 494 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
process_btn = gr.Button("🚀 Start Processing", variant="primary")
|
| 496 |
process_output = gr.Markdown(label="Processing Result")
|
| 497 |
process_image = gr.Image(show_label=False)
|
|
|
|
| 516 |
|
| 517 |
# Chat tab
|
| 518 |
with gr.TabItem("💬 Chat Test"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
with gr.Row():
|
| 520 |
+
with gr.Column(scale=3):
|
| 521 |
+
chatbot = gr.Chatbot(
|
| 522 |
+
label="Chat Window",
|
| 523 |
+
height=400,
|
| 524 |
+
type="messages"
|
| 525 |
+
)
|
| 526 |
+
msg = gr.Textbox(
|
| 527 |
+
label="Input Message",
|
| 528 |
+
placeholder="Enter your question...",
|
| 529 |
+
lines=3
|
| 530 |
+
)
|
| 531 |
+
with gr.Row():
|
| 532 |
+
send_btn = gr.Button("📤 Send", variant="primary")
|
| 533 |
+
clear = gr.Button("🗑️ Clear Chat")
|
| 534 |
+
|
| 535 |
+
with gr.Column(scale=1):
|
| 536 |
+
gr.Markdown("### ⚙️ Chat Settings")
|
| 537 |
+
max_new_tokens = gr.Number(
|
| 538 |
+
value=2048,
|
| 539 |
+
label="Max New Tokens",
|
| 540 |
+
minimum=1,
|
| 541 |
+
maximum=8192,
|
| 542 |
+
step=1,
|
| 543 |
+
info="Maximum number of tokens to generate"
|
| 544 |
+
)
|
| 545 |
+
temperature = gr.Slider(
|
| 546 |
+
minimum=0.1,
|
| 547 |
+
maximum=2.0,
|
| 548 |
+
value=0.7,
|
| 549 |
+
step=0.1,
|
| 550 |
+
label="Temperature",
|
| 551 |
+
info="Higher values = more creative, Lower values = more focused"
|
| 552 |
+
)
|
| 553 |
|
| 554 |
gr.Markdown("""
|
| 555 |
**Usage Tips:**
|
| 556 |
- Load a model first, then you can start chatting
|
| 557 |
- The processed model will have reduced refusal behavior
|
| 558 |
- You can test various sensitive questions
|
| 559 |
+
- Adjust Max New Tokens to control response length
|
| 560 |
+
- Adjust Temperature to control creativity
|
| 561 |
""")
|
| 562 |
|
| 563 |
# Bind events
|
|
|
|
| 581 |
def user(user_message, history):
|
| 582 |
return "", history + [{"role": "user", "content": user_message}]
|
| 583 |
|
| 584 |
+
def bot(history, max_new_tokens, temperature):
|
| 585 |
if history and history[-1]["role"] == "user":
|
| 586 |
+
response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
|
| 587 |
history.append({"role": "assistant", "content": response})
|
| 588 |
return history
|
| 589 |
|
| 590 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 591 |
+
bot, [chatbot, max_new_tokens, temperature], chatbot
|
| 592 |
)
|
| 593 |
|
| 594 |
send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 595 |
+
bot, [chatbot, max_new_tokens, temperature], chatbot
|
| 596 |
)
|
| 597 |
|
| 598 |
clear.click(lambda: [], None, chatbot, queue=False)
|