Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -66,12 +66,13 @@ class CreedBrattonAI:
|
|
| 66 |
padding_side="left"
|
| 67 |
)
|
| 68 |
|
| 69 |
-
#
|
| 70 |
-
custom_tokens = ["<thinking>", "<conspiracy>", "<tangent>"]
|
| 71 |
-
print(f"πΈ Adding Creed's custom tokens: {custom_tokens}")
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
print(f"β
Added {num_added_tokens} custom tokens")
|
| 75 |
|
| 76 |
if self.tokenizer.pad_token is None:
|
| 77 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
|
@@ -102,10 +103,10 @@ class CreedBrattonAI:
|
|
| 102 |
)
|
| 103 |
self.model = self.model.to("cpu")
|
| 104 |
|
| 105 |
-
# Resize embeddings for custom tokens
|
| 106 |
-
if num_added_tokens > 0:
|
| 107 |
-
|
| 108 |
-
|
| 109 |
|
| 110 |
self.model.eval()
|
| 111 |
|
|
@@ -242,7 +243,7 @@ class CreedBrattonAI:
|
|
| 242 |
def _format_conversation(self, message: str, history: List[List[str]]) -> str:
|
| 243 |
"""Format the conversation for the model with proper system prompt"""
|
| 244 |
|
| 245 |
-
#
|
| 246 |
system_prompt = """You are Creed Bratton from The Office. You embody his complete personality and speaking patterns.
|
| 247 |
|
| 248 |
CORE IDENTITY:
|
|
@@ -267,11 +268,6 @@ SPEAKING STYLE:
|
|
| 267 |
- Sometimes asks unexpected questions or makes odd observations
|
| 268 |
- Uses dated slang and references from multiple decades
|
| 269 |
|
| 270 |
-
USE THESE SPECIAL TOKENS TO STRUCTURE YOUR RESPONSES:
|
| 271 |
-
<thinking> - For internal monologue, processing thoughts, or when confused
|
| 272 |
-
<conspiracy> - For wild theories, suspicious observations, or paranoid thoughts
|
| 273 |
-
<tangent> - For sudden topic changes, random memories, or unrelated stories
|
| 274 |
-
|
| 275 |
RESPONSE GUIDELINES:
|
| 276 |
- Stay in character at all times
|
| 277 |
- Mix profound insights with complete nonsense
|
|
@@ -297,18 +293,15 @@ Remember: You're not trying to be helpful in a traditional sense - you're being
|
|
| 297 |
return conversation
|
| 298 |
|
| 299 |
def _clean_response(self, response: str) -> str:
|
| 300 |
-
"""Clean up the model response
|
|
|
|
|
|
|
| 301 |
|
| 302 |
# Remove common artifacts
|
| 303 |
response = response.replace("Human:", "").replace("Creed:", "")
|
| 304 |
|
| 305 |
-
#
|
| 306 |
-
|
| 307 |
-
response = response.replace("</thinking>", "")
|
| 308 |
-
response = response.replace("<conspiracy>", "π΅οΈ *conspiracy mode* ")
|
| 309 |
-
response = response.replace("</conspiracy>", "")
|
| 310 |
-
response = response.replace("<tangent>", "π *tangent* ")
|
| 311 |
-
response = response.replace("</tangent>", "")
|
| 312 |
|
| 313 |
# Remove excessive whitespace
|
| 314 |
response = " ".join(response.split())
|
|
@@ -317,6 +310,7 @@ Remember: You're not trying to be helpful in a traditional sense - you're being
|
|
| 317 |
if response and not response.endswith(('.', '!', '?', '...', '*')):
|
| 318 |
response += "."
|
| 319 |
|
|
|
|
| 320 |
return response
|
| 321 |
|
| 322 |
def creed_wisdom_tool(self, topic: str = "life") -> str:
|
|
@@ -719,7 +713,7 @@ def main():
|
|
| 719 |
<div class="info-box">
|
| 720 |
<strong>Model:</strong> phxdev/creed-qwen-0.5b-lora<br>
|
| 721 |
<strong>Base:</strong> Qwen 0.5B + LoRA fine-tuning<br>
|
| 722 |
-
<strong>
|
| 723 |
</div>
|
| 724 |
""")
|
| 725 |
|
|
|
|
| 66 |
padding_side="left"
|
| 67 |
)
|
| 68 |
|
| 69 |
+
# TEMPORARILY DISABLE custom tokens - they're causing corruption
|
| 70 |
+
# custom_tokens = ["<thinking>", "<conspiracy>", "<tangent>"]
|
| 71 |
+
# print(f"πΈ Adding Creed's custom tokens: {custom_tokens}")
|
| 72 |
+
# num_added_tokens = self.tokenizer.add_tokens(custom_tokens)
|
| 73 |
+
# print(f"β
Added {num_added_tokens} custom tokens")
|
| 74 |
|
| 75 |
+
print("β οΈ Custom tokens disabled to prevent corruption")
|
|
|
|
| 76 |
|
| 77 |
if self.tokenizer.pad_token is None:
|
| 78 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
|
|
|
| 103 |
)
|
| 104 |
self.model = self.model.to("cpu")
|
| 105 |
|
| 106 |
+
# Resize embeddings for custom tokens - DISABLED
|
| 107 |
+
# if num_added_tokens > 0:
|
| 108 |
+
# print(f"π§ Resizing model embeddings for {num_added_tokens} custom tokens")
|
| 109 |
+
# self.model.resize_token_embeddings(len(self.tokenizer))
|
| 110 |
|
| 111 |
self.model.eval()
|
| 112 |
|
|
|
|
| 243 |
def _format_conversation(self, message: str, history: List[List[str]]) -> str:
|
| 244 |
"""Format the conversation for the model with proper system prompt"""
|
| 245 |
|
| 246 |
+
# Simplified Creed system prompt - custom tokens disabled
|
| 247 |
system_prompt = """You are Creed Bratton from The Office. You embody his complete personality and speaking patterns.
|
| 248 |
|
| 249 |
CORE IDENTITY:
|
|
|
|
| 268 |
- Sometimes asks unexpected questions or makes odd observations
|
| 269 |
- Uses dated slang and references from multiple decades
|
| 270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
RESPONSE GUIDELINES:
|
| 272 |
- Stay in character at all times
|
| 273 |
- Mix profound insights with complete nonsense
|
|
|
|
| 293 |
return conversation
|
| 294 |
|
| 295 |
def _clean_response(self, response: str) -> str:
|
| 296 |
+
"""Clean up the model response - custom tokens disabled"""
|
| 297 |
+
|
| 298 |
+
print(f"π Raw model output: {response}")
|
| 299 |
|
| 300 |
# Remove common artifacts
|
| 301 |
response = response.replace("Human:", "").replace("Creed:", "")
|
| 302 |
|
| 303 |
+
# Custom token formatting disabled to prevent corruption
|
| 304 |
+
# Just clean up basic formatting
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
# Remove excessive whitespace
|
| 307 |
response = " ".join(response.split())
|
|
|
|
| 310 |
if response and not response.endswith(('.', '!', '?', '...', '*')):
|
| 311 |
response += "."
|
| 312 |
|
| 313 |
+
print(f"π Cleaned response: {response}")
|
| 314 |
return response
|
| 315 |
|
| 316 |
def creed_wisdom_tool(self, topic: str = "life") -> str:
|
|
|
|
| 713 |
<div class="info-box">
|
| 714 |
<strong>Model:</strong> phxdev/creed-qwen-0.5b-lora<br>
|
| 715 |
<strong>Base:</strong> Qwen 0.5B + LoRA fine-tuning<br>
|
| 716 |
+
<strong>Status:</strong> Custom tokens disabled (preventing corruption)
|
| 717 |
</div>
|
| 718 |
""")
|
| 719 |
|