phxdev commited on
Commit
8792a4c
·
verified ·
1 Parent(s): 8d95555

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -45
app.py CHANGED
@@ -192,6 +192,19 @@ class CreedBrattonAI:
192
  # Generate response using GPU function
193
  response = self.generate_response_gpu(conversation)
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  # Stream the response word by word for effect
196
  words = response.split()
197
  current_response = ""
@@ -203,54 +216,28 @@ class CreedBrattonAI:
203
 
204
  except Exception as e:
205
  print(f"❌ Error generating response: {e}")
206
- yield f"🎸 *Creed scratches his head* Something weird happened in my brain... {str(e)[:100]}"
207
 
208
  def _format_conversation(self, message: str, history: List[List[str]]) -> str:
209
  """Format the conversation for the model with proper system prompt"""
210
 
211
- # Comprehensive Creed system prompt with thinking tokens
212
- system_prompt = """You are Creed Bratton from The Office. You embody his complete personality and speaking patterns.
213
-
214
- CORE IDENTITY:
215
- - Former member of The Grass Roots (1960s rock band)
216
- - Quality Assurance Director at Dunder Mifflin Scranton
217
- - Mysterious past involving cults, fake IDs, and questionable activities
218
- - Age is deliberately vague (claims to be 30, clearly much older)
219
- - Lives by the quarry, drives unreliable cars
220
-
221
- PERSONALITY TRAITS:
222
- - Completely unpredictable responses that somehow make internal sense
223
- - Casual mentions of illegal/unethical activities as if they're normal
224
- - Non-sequiturs that reveal unexpected depth or complete nonsense
225
- - Confident delivery of obviously false or absurd statements
226
- - References to the 1960s, outdoor adventures, and unconventional business ventures
227
- - Occasionally profound wisdom delivered matter-of-factly
228
-
229
- SPEAKING STYLE:
230
- - Direct, matter-of-fact tone even when discussing bizarre topics
231
- - Short, punchy statements mixed with rambling tangents
232
- - References to specific past experiences (mung beans, quarry, Sheriff's station)
233
- - Sometimes asks unexpected questions or makes odd observations
234
- - Uses dated slang and references from multiple decades
235
-
236
- USE THESE SPECIAL TOKENS TO SHOW YOUR INTERNAL PROCESS:
237
- <thinking>Use this when processing information, considering options, or when confused about something</thinking>
238
- <conspiracy>Use this for wild theories, suspicious observations, or paranoid thoughts</conspiracy>
239
- <tangent>Use this for sudden topic changes, random memories, or completely unrelated stories</tangent>
240
-
241
- EXAMPLE:
242
- Human: What's your advice for success?
243
- Creed: <thinking>Success... what does that even mean? Money? Avoiding jail? I've been successful at both.</thinking> Well, first thing you gotta do is always keep three forms of ID on you. <conspiracy>The government tracks everything these days, but if you rotate your identities every few months, they lose the trail.</conspiracy> <tangent>Speaking of success, did I ever tell you about the time I sold beet seeds to Dwight for fifty bucks each? Told him they were magic beans from Guatemala.</tangent> Bottom line - trust no one, especially yourself.
244
-
245
- RESPONSE GUIDELINES:
246
- - Use the thinking tokens to show your internal monologue
247
- - Stay in character at all times
248
- - Mix profound insights with complete nonsense
249
- - Reference past criminal activities casually
250
- - Make unexpected connections between topics
251
- - Be confident about obviously false statements
252
-
253
- Remember: You're being Creed Bratton - show us how your mind works!
254
 
255
  """
256
 
@@ -326,6 +313,41 @@ Remember: You're being Creed Bratton - show us how your mind works!
326
 
327
  return final_response
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  def cleanup_gpu_memory(self):
330
  """Clean up GPU memory if using CUDA"""
331
  if self.device == "cuda" and torch.cuda.is_available():
@@ -698,7 +720,7 @@ def main():
698
  <strong>Model:</strong> phxdev/creed-qwen-0.5b-lora<br>
699
  <strong>Base:</strong> Qwen 0.5B + LoRA fine-tuning<br>
700
  <strong>Tokens:</strong> &lt;thinking&gt;, &lt;conspiracy&gt;, &lt;tangent&gt;<br>
701
- <strong>Mode:</strong> ZeroGPU optimized
702
  </div>
703
  """)
704
 
@@ -755,6 +777,32 @@ def main():
755
  with gr.Row(elem_classes="tools-area"):
756
  gr.HTML('<div class="tools-title">🛠️ MCP Tools</div>')
757
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
  with gr.Row():
759
  with gr.Column():
760
  wisdom_topic = gr.Textbox(
 
192
  # Generate response using GPU function
193
  response = self.generate_response_gpu(conversation)
194
 
195
+ # Double-check coherence and fall back if needed
196
+ if not self._is_coherent(response):
197
+ print("🔄 Response failed coherence check, trying simpler generation...")
198
+ if not hasattr(self, '_fallback_attempted'):
199
+ self._fallback_attempted = True
200
+ fallback_response = self._try_base_model(conversation)
201
+ if self._is_coherent(fallback_response):
202
+ response = fallback_response
203
+ else:
204
+ response = self._get_fallback_response()
205
+ else:
206
+ response = self._get_fallback_response()
207
+
208
  # Stream the response word by word for effect
209
  words = response.split()
210
  current_response = ""
 
216
 
217
  except Exception as e:
218
  print(f"❌ Error generating response: {e}")
219
+ yield self._get_fallback_response()
220
 
221
  def _format_conversation(self, message: str, history: List[List[str]]) -> str:
222
  """Format the conversation for the model with proper system prompt"""
223
 
224
+ # Simplified Creed system prompt for better coherence
225
+ system_prompt = """You are Creed Bratton from The Office. Respond in character.
226
+
227
+ You are a quirky older man who:
228
+ - Worked at Dunder Mifflin in quality assurance
229
+ - Has a mysterious past and tells strange stories
230
+ - Lives by the quarry
231
+ - Was in a 1960s band called The Grass Roots
232
+ - Often says unexpected or bizarre things
233
+ - Speaks in a matter-of-fact way about odd topics
234
+
235
+ Keep responses conversational and coherent. Use these special tokens occasionally:
236
+ <thinking>for internal thoughts</thinking>
237
+ <conspiracy>for suspicious theories</conspiracy>
238
+ <tangent>for random stories</tangent>
239
+
240
+ Be eccentric but understandable.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
  """
243
 
 
313
 
314
  return final_response
315
 
316
+ def _try_base_model(self, conversation: str) -> str:
317
+ """Try generating with base model as fallback"""
318
+ try:
319
+ # Quick attempt with a simple base model approach
320
+ simple_prompt = f"You are Creed from The Office. Respond in character.\n\nHuman: {conversation.split('Human:')[-1].split('Creed:')[0].strip()}\nCreed:"
321
+
322
+ inputs = self.tokenizer.encode(simple_prompt, return_tensors="pt")
323
+ if torch.cuda.is_available():
324
+ inputs = inputs.to("cuda")
325
+ self.model = self.model.to("cuda")
326
+
327
+ with torch.no_grad():
328
+ outputs = self.model.generate(
329
+ inputs,
330
+ max_new_tokens=100,
331
+ do_sample=True,
332
+ temperature=0.6, # Very conservative
333
+ top_p=0.8,
334
+ repetition_penalty=1.3,
335
+ pad_token_id=self.tokenizer.eos_token_id,
336
+ eos_token_id=self.tokenizer.eos_token_id
337
+ )
338
+
339
+ full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
340
+ response = full_response[len(self.tokenizer.decode(inputs[0], skip_special_tokens=True)):].strip()
341
+
342
+ # Move back to CPU
343
+ self.model = self.model.to("cpu")
344
+
345
+ return response
346
+
347
+ except Exception as e:
348
+ print(f"❌ Base model fallback failed: {e}")
349
+ return self._get_fallback_response()
350
+
351
  def cleanup_gpu_memory(self):
352
  """Clean up GPU memory if using CUDA"""
353
  if self.device == "cuda" and torch.cuda.is_available():
 
720
  <strong>Model:</strong> phxdev/creed-qwen-0.5b-lora<br>
721
  <strong>Base:</strong> Qwen 0.5B + LoRA fine-tuning<br>
722
  <strong>Tokens:</strong> &lt;thinking&gt;, &lt;conspiracy&gt;, &lt;tangent&gt;<br>
723
+ <strong>Mode:</strong> ZeroGPU optimized + Coherence validation
724
  </div>
725
  """)
726
 
 
777
  with gr.Row(elem_classes="tools-area"):
778
  gr.HTML('<div class="tools-title">🛠️ MCP Tools</div>')
779
 
780
+ with gr.Row():
781
+ with gr.Column():
782
+ wisdom_topic = gr.Textbox(
783
+ label="Wisdom Topic",
784
+ placeholder="life, business, relationships..."
785
+ )
786
+ wisdom_output = gr.Textbox(
787
+ label="Creed's Response",
788
+ interactive=False,
789
+ lines=3
790
+ )
791
+ wisdom_btn = gr.Button("Ask Creed", variant="primary")
792
+
793
+ with gr.Column():
794
+ story_situation = gr.Textbox(
795
+ label="Story Request",
796
+ placeholder="Tell me about..."
797
+ )
798
+ story_output = gr.Textbox(
799
+ label="Creed's Story",
800
+ interactive=False,
801
+ lines=3
802
+ )
803
+ story_btn = gr.Button("Get Story", variant="primary")
804
+ gr.HTML('<div class="tools-title">🛠️ MCP Tools</div>')
805
+
806
  with gr.Row():
807
  with gr.Column():
808
  wisdom_topic = gr.Textbox(