Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -192,6 +192,19 @@ class CreedBrattonAI:
|
|
| 192 |
# Generate response using GPU function
|
| 193 |
response = self.generate_response_gpu(conversation)
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
# Stream the response word by word for effect
|
| 196 |
words = response.split()
|
| 197 |
current_response = ""
|
|
@@ -203,54 +216,28 @@ class CreedBrattonAI:
|
|
| 203 |
|
| 204 |
except Exception as e:
|
| 205 |
print(f"❌ Error generating response: {e}")
|
| 206 |
-
yield
|
| 207 |
|
| 208 |
def _format_conversation(self, message: str, history: List[List[str]]) -> str:
|
| 209 |
"""Format the conversation for the model with proper system prompt"""
|
| 210 |
|
| 211 |
-
#
|
| 212 |
-
system_prompt = """You are Creed Bratton from The Office.
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
-
|
| 216 |
-
-
|
| 217 |
-
-
|
| 218 |
-
-
|
| 219 |
-
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
SPEAKING STYLE:
|
| 230 |
-
- Direct, matter-of-fact tone even when discussing bizarre topics
|
| 231 |
-
- Short, punchy statements mixed with rambling tangents
|
| 232 |
-
- References to specific past experiences (mung beans, quarry, Sheriff's station)
|
| 233 |
-
- Sometimes asks unexpected questions or makes odd observations
|
| 234 |
-
- Uses dated slang and references from multiple decades
|
| 235 |
-
|
| 236 |
-
USE THESE SPECIAL TOKENS TO SHOW YOUR INTERNAL PROCESS:
|
| 237 |
-
<thinking>Use this when processing information, considering options, or when confused about something</thinking>
|
| 238 |
-
<conspiracy>Use this for wild theories, suspicious observations, or paranoid thoughts</conspiracy>
|
| 239 |
-
<tangent>Use this for sudden topic changes, random memories, or completely unrelated stories</tangent>
|
| 240 |
-
|
| 241 |
-
EXAMPLE:
|
| 242 |
-
Human: What's your advice for success?
|
| 243 |
-
Creed: <thinking>Success... what does that even mean? Money? Avoiding jail? I've been successful at both.</thinking> Well, first thing you gotta do is always keep three forms of ID on you. <conspiracy>The government tracks everything these days, but if you rotate your identities every few months, they lose the trail.</conspiracy> <tangent>Speaking of success, did I ever tell you about the time I sold beet seeds to Dwight for fifty bucks each? Told him they were magic beans from Guatemala.</tangent> Bottom line - trust no one, especially yourself.
|
| 244 |
-
|
| 245 |
-
RESPONSE GUIDELINES:
|
| 246 |
-
- Use the thinking tokens to show your internal monologue
|
| 247 |
-
- Stay in character at all times
|
| 248 |
-
- Mix profound insights with complete nonsense
|
| 249 |
-
- Reference past criminal activities casually
|
| 250 |
-
- Make unexpected connections between topics
|
| 251 |
-
- Be confident about obviously false statements
|
| 252 |
-
|
| 253 |
-
Remember: You're being Creed Bratton - show us how your mind works!
|
| 254 |
|
| 255 |
"""
|
| 256 |
|
|
@@ -326,6 +313,41 @@ Remember: You're being Creed Bratton - show us how your mind works!
|
|
| 326 |
|
| 327 |
return final_response
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
def cleanup_gpu_memory(self):
|
| 330 |
"""Clean up GPU memory if using CUDA"""
|
| 331 |
if self.device == "cuda" and torch.cuda.is_available():
|
|
@@ -698,7 +720,7 @@ def main():
|
|
| 698 |
<strong>Model:</strong> phxdev/creed-qwen-0.5b-lora<br>
|
| 699 |
<strong>Base:</strong> Qwen 0.5B + LoRA fine-tuning<br>
|
| 700 |
<strong>Tokens:</strong> <thinking>, <conspiracy>, <tangent><br>
|
| 701 |
-
<strong>Mode:</strong> ZeroGPU optimized
|
| 702 |
</div>
|
| 703 |
""")
|
| 704 |
|
|
@@ -755,6 +777,32 @@ def main():
|
|
| 755 |
with gr.Row(elem_classes="tools-area"):
|
| 756 |
gr.HTML('<div class="tools-title">🛠️ MCP Tools</div>')
|
| 757 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
with gr.Row():
|
| 759 |
with gr.Column():
|
| 760 |
wisdom_topic = gr.Textbox(
|
|
|
|
| 192 |
# Generate response using GPU function
|
| 193 |
response = self.generate_response_gpu(conversation)
|
| 194 |
|
| 195 |
+
# Double-check coherence and fall back if needed
|
| 196 |
+
if not self._is_coherent(response):
|
| 197 |
+
print("🔄 Response failed coherence check, trying simpler generation...")
|
| 198 |
+
if not hasattr(self, '_fallback_attempted'):
|
| 199 |
+
self._fallback_attempted = True
|
| 200 |
+
fallback_response = self._try_base_model(conversation)
|
| 201 |
+
if self._is_coherent(fallback_response):
|
| 202 |
+
response = fallback_response
|
| 203 |
+
else:
|
| 204 |
+
response = self._get_fallback_response()
|
| 205 |
+
else:
|
| 206 |
+
response = self._get_fallback_response()
|
| 207 |
+
|
| 208 |
# Stream the response word by word for effect
|
| 209 |
words = response.split()
|
| 210 |
current_response = ""
|
|
|
|
| 216 |
|
| 217 |
except Exception as e:
|
| 218 |
print(f"❌ Error generating response: {e}")
|
| 219 |
+
yield self._get_fallback_response()
|
| 220 |
|
| 221 |
def _format_conversation(self, message: str, history: List[List[str]]) -> str:
|
| 222 |
"""Format the conversation for the model with proper system prompt"""
|
| 223 |
|
| 224 |
+
# Simplified Creed system prompt for better coherence
|
| 225 |
+
system_prompt = """You are Creed Bratton from The Office. Respond in character.
|
| 226 |
+
|
| 227 |
+
You are a quirky older man who:
|
| 228 |
+
- Worked at Dunder Mifflin in quality assurance
|
| 229 |
+
- Has a mysterious past and tells strange stories
|
| 230 |
+
- Lives by the quarry
|
| 231 |
+
- Was in a 1960s band called The Grass Roots
|
| 232 |
+
- Often says unexpected or bizarre things
|
| 233 |
+
- Speaks in a matter-of-fact way about odd topics
|
| 234 |
+
|
| 235 |
+
Keep responses conversational and coherent. Use these special tokens occasionally:
|
| 236 |
+
<thinking>for internal thoughts</thinking>
|
| 237 |
+
<conspiracy>for suspicious theories</conspiracy>
|
| 238 |
+
<tangent>for random stories</tangent>
|
| 239 |
+
|
| 240 |
+
Be eccentric but understandable.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
"""
|
| 243 |
|
|
|
|
| 313 |
|
| 314 |
return final_response
|
| 315 |
|
| 316 |
+
def _try_base_model(self, conversation: str) -> str:
|
| 317 |
+
"""Try generating with base model as fallback"""
|
| 318 |
+
try:
|
| 319 |
+
# Quick attempt with a simple base model approach
|
| 320 |
+
simple_prompt = f"You are Creed from The Office. Respond in character.\n\nHuman: {conversation.split('Human:')[-1].split('Creed:')[0].strip()}\nCreed:"
|
| 321 |
+
|
| 322 |
+
inputs = self.tokenizer.encode(simple_prompt, return_tensors="pt")
|
| 323 |
+
if torch.cuda.is_available():
|
| 324 |
+
inputs = inputs.to("cuda")
|
| 325 |
+
self.model = self.model.to("cuda")
|
| 326 |
+
|
| 327 |
+
with torch.no_grad():
|
| 328 |
+
outputs = self.model.generate(
|
| 329 |
+
inputs,
|
| 330 |
+
max_new_tokens=100,
|
| 331 |
+
do_sample=True,
|
| 332 |
+
temperature=0.6, # Very conservative
|
| 333 |
+
top_p=0.8,
|
| 334 |
+
repetition_penalty=1.3,
|
| 335 |
+
pad_token_id=self.tokenizer.eos_token_id,
|
| 336 |
+
eos_token_id=self.tokenizer.eos_token_id
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 340 |
+
response = full_response[len(self.tokenizer.decode(inputs[0], skip_special_tokens=True)):].strip()
|
| 341 |
+
|
| 342 |
+
# Move back to CPU
|
| 343 |
+
self.model = self.model.to("cpu")
|
| 344 |
+
|
| 345 |
+
return response
|
| 346 |
+
|
| 347 |
+
except Exception as e:
|
| 348 |
+
print(f"❌ Base model fallback failed: {e}")
|
| 349 |
+
return self._get_fallback_response()
|
| 350 |
+
|
| 351 |
def cleanup_gpu_memory(self):
|
| 352 |
"""Clean up GPU memory if using CUDA"""
|
| 353 |
if self.device == "cuda" and torch.cuda.is_available():
|
|
|
|
| 720 |
<strong>Model:</strong> phxdev/creed-qwen-0.5b-lora<br>
|
| 721 |
<strong>Base:</strong> Qwen 0.5B + LoRA fine-tuning<br>
|
| 722 |
<strong>Tokens:</strong> <thinking>, <conspiracy>, <tangent><br>
|
| 723 |
+
<strong>Mode:</strong> ZeroGPU optimized + Coherence validation
|
| 724 |
</div>
|
| 725 |
""")
|
| 726 |
|
|
|
|
| 777 |
with gr.Row(elem_classes="tools-area"):
|
| 778 |
gr.HTML('<div class="tools-title">🛠️ MCP Tools</div>')
|
| 779 |
|
| 780 |
+
with gr.Row():
|
| 781 |
+
with gr.Column():
|
| 782 |
+
wisdom_topic = gr.Textbox(
|
| 783 |
+
label="Wisdom Topic",
|
| 784 |
+
placeholder="life, business, relationships..."
|
| 785 |
+
)
|
| 786 |
+
wisdom_output = gr.Textbox(
|
| 787 |
+
label="Creed's Response",
|
| 788 |
+
interactive=False,
|
| 789 |
+
lines=3
|
| 790 |
+
)
|
| 791 |
+
wisdom_btn = gr.Button("Ask Creed", variant="primary")
|
| 792 |
+
|
| 793 |
+
with gr.Column():
|
| 794 |
+
story_situation = gr.Textbox(
|
| 795 |
+
label="Story Request",
|
| 796 |
+
placeholder="Tell me about..."
|
| 797 |
+
)
|
| 798 |
+
story_output = gr.Textbox(
|
| 799 |
+
label="Creed's Story",
|
| 800 |
+
interactive=False,
|
| 801 |
+
lines=3
|
| 802 |
+
)
|
| 803 |
+
story_btn = gr.Button("Get Story", variant="primary")
|
| 804 |
+
gr.HTML('<div class="tools-title">🛠️ MCP Tools</div>')
|
| 805 |
+
|
| 806 |
with gr.Row():
|
| 807 |
with gr.Column():
|
| 808 |
wisdom_topic = gr.Textbox(
|