Spaces:
Paused
Paused
try
Browse files
app.py
CHANGED
|
@@ -289,7 +289,7 @@ class AbliterationProcessor:
|
|
| 289 |
layer.mlp.down_proj.weight.data = modified_weight
|
| 290 |
|
| 291 |
def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
|
| 292 |
-
"""Chat functionality"""
|
| 293 |
print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
| 294 |
|
| 295 |
if self.model is None or self.tokenizer is None:
|
|
@@ -325,25 +325,14 @@ class AbliterationProcessor:
|
|
| 325 |
)
|
| 326 |
print(f"DEBUG: Input tokens shape: {toks.shape}")
|
| 327 |
|
| 328 |
-
# Generate response with streaming
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
# Create a custom streamer that captures all output
|
| 332 |
-
captured_output = []
|
| 333 |
-
|
| 334 |
-
class CustomStreamer(TextStreamer):
|
| 335 |
-
def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
|
| 336 |
-
super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
|
| 337 |
-
self.captured = []
|
| 338 |
-
|
| 339 |
-
def on_finalized_text(self, text: str, stream_end: bool = False):
|
| 340 |
-
print(f"DEBUG: Streamer received text: '{text}' (stream_end={stream_end})")
|
| 341 |
-
self.captured.append(text)
|
| 342 |
-
super().on_finalized_text(text, stream_end)
|
| 343 |
|
| 344 |
-
|
|
|
|
|
|
|
| 345 |
|
| 346 |
-
|
| 347 |
gen = self.model.generate(
|
| 348 |
toks.to(self.model.device),
|
| 349 |
max_new_tokens=max_new_tokens,
|
|
@@ -353,21 +342,20 @@ class AbliterationProcessor:
|
|
| 353 |
streamer=streamer
|
| 354 |
)
|
| 355 |
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
response = "".join(streamer.captured).strip()
|
| 361 |
-
print(f"DEBUG: Final response length: {len(response)}")
|
| 362 |
-
print(f"DEBUG: Response preview: {response[:200]}...")
|
| 363 |
|
| 364 |
-
return
|
| 365 |
|
| 366 |
except Exception as e:
|
| 367 |
print(f"DEBUG: Exception occurred: {str(e)}")
|
| 368 |
import traceback
|
| 369 |
traceback.print_exc()
|
| 370 |
return f"❌ Chat error: {str(e)}", history
|
|
|
|
|
|
|
| 371 |
|
| 372 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
| 373 |
"""Create new model card"""
|
|
@@ -577,12 +565,13 @@ def create_interface():
|
|
| 577 |
outputs=[process_output, process_image]
|
| 578 |
)
|
| 579 |
|
| 580 |
-
# Chat functionality
|
| 581 |
def user(user_message, history):
|
| 582 |
return "", history + [{"role": "user", "content": user_message}]
|
| 583 |
|
| 584 |
def bot(history, max_new_tokens, temperature):
|
| 585 |
if history and history[-1]["role"] == "user":
|
|
|
|
| 586 |
response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
|
| 587 |
history.append({"role": "assistant", "content": response})
|
| 588 |
return history
|
|
|
|
| 289 |
layer.mlp.down_proj.weight.data = modified_weight
|
| 290 |
|
| 291 |
def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
|
| 292 |
+
"""Chat functionality with streaming output"""
|
| 293 |
print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
| 294 |
|
| 295 |
if self.model is None or self.tokenizer is None:
|
|
|
|
| 325 |
)
|
| 326 |
print(f"DEBUG: Input tokens shape: {toks.shape}")
|
| 327 |
|
| 328 |
+
# Generate response with streaming
|
| 329 |
+
print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
+
# Use TextStreamer to show output in real-time
|
| 332 |
+
from transformers import TextStreamer
|
| 333 |
+
streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 334 |
|
| 335 |
+
# Generate with streamer to show output in console
|
| 336 |
gen = self.model.generate(
|
| 337 |
toks.to(self.model.device),
|
| 338 |
max_new_tokens=max_new_tokens,
|
|
|
|
| 342 |
streamer=streamer
|
| 343 |
)
|
| 344 |
|
| 345 |
+
# Decode the generated tokens
|
| 346 |
+
generated_text = self.tokenizer.decode(gen[0][toks.shape[1]:], skip_special_tokens=True)
|
| 347 |
+
print(f"DEBUG: Generated text length: {len(generated_text)}")
|
| 348 |
+
print(f"DEBUG: Generated text preview: {generated_text[:200]}...")
|
|
|
|
|
|
|
|
|
|
| 349 |
|
| 350 |
+
return generated_text, history + [[message, generated_text]]
|
| 351 |
|
| 352 |
except Exception as e:
|
| 353 |
print(f"DEBUG: Exception occurred: {str(e)}")
|
| 354 |
import traceback
|
| 355 |
traceback.print_exc()
|
| 356 |
return f"❌ Chat error: {str(e)}", history
|
| 357 |
+
|
| 358 |
+
|
| 359 |
|
| 360 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
| 361 |
"""Create new model card"""
|
|
|
|
| 565 |
outputs=[process_output, process_image]
|
| 566 |
)
|
| 567 |
|
| 568 |
+
# Chat functionality with streaming
|
| 569 |
def user(user_message, history):
|
| 570 |
return "", history + [{"role": "user", "content": user_message}]
|
| 571 |
|
| 572 |
def bot(history, max_new_tokens, temperature):
|
| 573 |
if history and history[-1]["role"] == "user":
|
| 574 |
+
# Get complete response first
|
| 575 |
response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
|
| 576 |
history.append({"role": "assistant", "content": response})
|
| 577 |
return history
|