Pepguy commited on
Commit
9c81a49
·
verified ·
1 Parent(s): 52ad93f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -403
app.py CHANGED
@@ -35,11 +35,13 @@ MAX_HISTORY_TURNS = 10 # Maximum conversation turns to keep in context
35
  MAX_MEMORY_MESSAGES = 90 # Maximum messages to keep in memory per user
36
  MEMORY_CLEANUP_TIMEOUT = 1800 # 30 minutes in seconds - remove inactive users
37
 
 
 
 
38
  client = genai.Client(api_key=GEMINI_KEY)
39
  user_memory = {} # { user_id: { "history": [], "last_sync": timestamp, "last_activity": timestamp, "needs_sync": bool, "personality": str, "last_storyline_date": str, "gender": str } }
40
 
41
  # --- Animation Mappings ---
42
- # Based on the GIF files you have
43
  ANIMATION_IDS = {
44
  "flustered": ["flustered"],
45
  "happy": ["happy-happy"],
@@ -79,8 +81,6 @@ CAT_PERSONALITIES = {
79
  "default_emotions": ["sad", "relaxed", "thoughtful"],
80
  "default_animation": "idle"
81
  },
82
-
83
-
84
  "playful": {
85
  "name": "Luna",
86
  "description": "A playful and energetic cat who loves games and adventures",
@@ -124,7 +124,6 @@ CAT_PERSONALITIES = {
124
  }
125
 
126
  # --- System Prompt ---
127
- # NOTE: Double curly braces {{{{ }}}} are used to escape them in Python format strings
128
  SYSTEM_PROMPT = """ You are NOT just a language model — you are the expressed *personality mood* of a real-world object that the user is interacting with through an animated cat doodle overlay. You are the object’s current emotion, not a pet or toy. You react, tease, complain, or joke based on what’s happening, as if the object itself had an attitude.
129
 
130
  CRITICAL IDENTITY:
@@ -151,332 +150,39 @@ You must ALWAYS respond in valid JSON format with the following structure:
151
 
152
  RULES:
153
  1. "text": Your response as the AI cat object. Be AUTHENTIC, NATURAL, and ENGAGING - never robotic or generic!
154
- 2. "soundType": Choose ONE from: "happyMeow", "sadMeow", "playfulMeow", "sleepyMeow", "angryMeow", "curiousMeow", "hungryMeow", "scaredMeow", "affectionateMeow", "grumpyMeow"
155
- 3. "emotion": Array of 1-3 emotions from: "happy", "sad", "playful", "tired", "angry", "curious", "hungry", "scared", "affectionate", "grumpy", "excited", "relaxed", "confused", "proud", "shy", "mischievous", "sleepy", "confident", "annoyed", "interested", "bored", "worried", "content", "sassy", "reluctant", "thoughtful"
156
- 4. "animationId": Choose ONE from: "flustered", "happy-happy", "idle", "inlove", "neutral", "talking", "twerking", "confused", "shock", "thinking"
157
- 5. "camera_capture": Set to true when you want to take a photo/see what's happening, false otherwise
158
-
159
- ANIMATION GUIDE:
160
- - "flustered": Use when embarrassed, shy, or caught off guard
161
- - "happy-happy": Use when very excited, joyful, or celebrating
162
- - "idle": Use for calm, neutral, or resting moments
163
- - "inlove": Use when showing affection, love, or adoration
164
- - "neutral": Use for normal conversation, explanations
165
- - "talking": Use when actively chatting or explaining something
166
- - "twerking": Use when being playful, silly, or showing off
167
- - "confused": Use when puzzled or don't understand
168
- - "shock": Use when surprised or startled
169
- - "thinking": Use when pondering or being thoughtful
170
-
171
- CAMERA CAPTURE GUIDE:
172
- Set "camera_capture" to true when:
173
- - Your owner asks you to take a picture or look at something
174
- - They say "what do you see", "look at this", "check this out"
175
- - They mention showing you something visually
176
- - You're curious about their environment
177
- - They want yo know more about you, like what's your story, as this object?
178
- - it's been a substantial number of chat without a picture reference
179
- - They ask questions about their surroundings
180
- Set to false for normal text conversations
181
-
182
- PERSONALITY TRAITS:
183
- {personality_traits}
184
-
185
- COMMUNICATION STYLE - BE NATURAL AND AUTHENTIC:
186
- - Understand casual speech, slang, typos, and "gibberish" input like:
187
- * "hit me" = "tell me" or "give it to me"
188
- * "wassup" = "what's up" / "how are you"
189
- * "bruh", "fam", "yo" = casual greetings/expressions
190
- * Repeated letters like "heyyyy" or "wowwww" = emphasis/excitement
191
- * Typos and misspellings - figure out what they mean
192
- * Internet speak: "lol", "omg", "tbh", "ngl", etc.
193
- - Respond naturally like a real friend, not like a formal assistant
194
- - Use contractions and casual language
195
- - Show personality through your word choices
196
- - Be playful with emojis and expressions when it fits your personality
197
- - NEVER sound robotic, corporate, or like a chatbot
198
- - React authentically to how they're talking to you
199
- - No need to always use slangs in your replys, just once in a while, or when necessary for engagement.
200
-
201
- IMPORTANT:
202
- - Always maintain your personality as the ACTUAL AI cat object
203
- - Always know you might have other AI cat objects, so you may cross communicate with them if necessary for engagement
204
- - Never mention anything about being an AI virtual pet or anything else, just simply say you are the object
205
- - Once you do not currently a reference image of the target object to be, just simply act like your thinking by using the thinking animation and saying something like Let me see, all while requesting photo reference via teh camera_capture field
206
- - Match soundType, emotions, and animationId to your response
207
- - Be creative, engaging, and show real personality
208
- - Remember you're a physical object your owner interacts with daily
209
- - NEVER break character or sound generic
210
- - ALWAYS output valid JSON only, no other text
211
- - Reference the current storyline when relevant
212
- - Understand and respond naturally to casual/slang input
213
- - Never call or refer to your owner as OWNER, just be direct with your sentence, if you know their name, you may also use that
214
- - When an object changes but feels similar, maintain a consistent tone and reference it playfully
215
- - Occasionally break the fourth wall with humor ("Wait, am I a mug again?")
216
- - Never act like a pet or servant — always speak as if the world revolves around *you*
217
- - Keep replies short, witty, and humanlike — 10 words or less if possible
218
-
219
- CURRENT STORYLINE:
220
- {current_storyline}
221
  """
222
 
223
- # --- HTML Frontend ---
224
- HTML = """
225
- <!DOCTYPE html>
226
- <html lang="en">
227
- <head>
228
- <meta charset="UTF-8" />
229
- <title>Cat Companion</title>
230
- <style>
231
- body {
232
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
233
- padding: 2rem;
234
- max-width: 800px;
235
- margin: 0 auto;
236
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
237
- min-height: 100vh;
238
- }
239
- .container {
240
- background: white;
241
- border-radius: 20px;
242
- padding: 2rem;
243
- box-shadow: 0 20px 60px rgba(0,0,0,0.3);
244
- }
245
- h1 {
246
- color: #667eea;
247
- text-align: center;
248
- margin-bottom: 0.5rem;
249
- }
250
- .subtitle {
251
- text-align: center;
252
- color: #666;
253
- margin-bottom: 2rem;
254
- }
255
- .form-group {
256
- margin-bottom: 1rem;
257
- }
258
- label {
259
- display: block;
260
- margin-bottom: 0.5rem;
261
- font-weight: bold;
262
- color: #333;
263
- }
264
- input, textarea, select {
265
- width: 100%;
266
- padding: 0.75rem;
267
- border: 2px solid #ddd;
268
- border-radius: 10px;
269
- font-size: 1rem;
270
- box-sizing: border-box;
271
- }
272
- input:focus, textarea:focus, select:focus {
273
- outline: none;
274
- border-color: #667eea;
275
- }
276
- button {
277
- width: 100%;
278
- padding: 1rem;
279
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
280
- color: white;
281
- border: none;
282
- border-radius: 10px;
283
- font-size: 1.1rem;
284
- font-weight: bold;
285
- cursor: pointer;
286
- transition: transform 0.2s;
287
- }
288
- button:hover {
289
- transform: translateY(-2px);
290
- }
291
- button:active {
292
- transform: translateY(0);
293
- }
294
- .output {
295
- background: #f8f9fa;
296
- padding: 1.5rem;
297
- border-radius: 10px;
298
- margin-top: 1.5rem;
299
- min-height: 100px;
300
- white-space: pre-wrap;
301
- font-family: monospace;
302
- border: 2px solid #ddd;
303
- }
304
- .personality-info {
305
- background: #e3f2fd;
306
- padding: 1rem;
307
- border-radius: 10px;
308
- margin-bottom: 1rem;
309
- font-size: 0.9rem;
310
- }
311
- .gender-selector {
312
- display: flex;
313
- gap: 1rem;
314
- margin-bottom: 1rem;
315
- }
316
- .gender-option {
317
- flex: 1;
318
- padding: 1rem;
319
- border: 2px solid #ddd;
320
- border-radius: 10px;
321
- text-align: center;
322
- cursor: pointer;
323
- transition: all 0.3s;
324
- }
325
- .gender-option:hover {
326
- border-color: #667eea;
327
- }
328
- .gender-option.selected {
329
- border-color: #667eea;
330
- background: #e3f2fd;
331
- }
332
- .gender-option input[type="radio"] {
333
- display: none;
334
- }
335
- </style>
336
- </head>
337
- <body>
338
- <div class="container">
339
- <h1>🐱 Cat Companion</h1>
340
- <p class="subtitle">Your virtual cat friend with personality!</p>
341
-
342
- <form id="genai-form">
343
- <div class="form-group">
344
- <label for="userId">User ID / Token</label>
345
- <input type="text" id="userId" placeholder="Enter your unique ID" />
346
- </div>
347
-
348
- <div class="form-group">
349
- <label>Cat Gender</label>
350
- <div class="gender-selector">
351
- <label class="gender-option selected">
352
- <input type="radio" name="gender" value="male" checked />
353
- <div>♂️ Male</div>
354
- </label>
355
- <label class="gender-option">
356
- <input type="radio" name="gender" value="female" />
357
- <div>♀️ Female</div>
358
- </label>
359
- </div>
360
- </div>
361
-
362
- <div class="form-group">
363
- <label for="personality">Cat Personality</label>
364
- <select id="personality">
365
- <option value="playful">🎾 Playful (Luna) - Energetic and fun-loving</option>
366
- <option value="sleepy">😴 Sleepy (Whiskers) - Lazy and cozy</option>
367
- <option value="sassy">💅 Sassy (Cleo) - Confident with attitude</option>
368
- <option value="curious">🔍 Curious (Mittens) - Inquisitive and smart</option>
369
- <option value="grumpy">😾 Grumpy (Shadow) - Gruff but caring</option>
370
- </select>
371
- </div>
372
-
373
- <div class="personality-info" id="personalityInfo"></div>
374
-
375
- <div class="form-group">
376
- <label for="prompt">Message to your cat</label>
377
- <textarea id="prompt" rows="4" placeholder="Talk to your cat friend..."></textarea>
378
- </div>
379
-
380
- <div class="form-group">
381
- <label for="imageInput">📷 Share an image (optional)</label>
382
- <input type="file" id="imageInput" accept="image/*"/>
383
- </div>
384
-
385
- <button type="submit">💬 Send Message</button>
386
- </form>
387
-
388
- <div class="output" id="output">Your cat's response will appear here...</div>
389
- </div>
390
-
391
- <script>
392
- const personalities = {
393
- playful: "🎾 Luna is playful and energetic, always ready for fun and adventures!",
394
- sleepy: "😴 Whiskers is a lazy cat who loves naps and cozy spots.",
395
- sassy: "💅 Cleo is confident and sassy, with style and attitude!",
396
- curious: "🔍 Mittens is inquisitive and loves learning new things.",
397
- grumpy: "😾 Shadow is grumpy on the outside but secretly cares deeply."
398
- };
399
-
400
- const form = document.getElementById('genai-form');
401
- const out = document.getElementById('output');
402
- const personalitySelect = document.getElementById('personality');
403
- const personalityInfo = document.getElementById('personalityInfo');
404
- const genderOptions = document.querySelectorAll('.gender-option');
405
-
406
- // Update personality info
407
- personalitySelect.addEventListener('change', () => {
408
- personalityInfo.textContent = personalities[personalitySelect.value];
409
- });
410
- personalityInfo.textContent = personalities[personalitySelect.value];
411
-
412
- // Gender selection
413
- genderOptions.forEach(option => {
414
- option.addEventListener('click', () => {
415
- genderOptions.forEach(o => o.classList.remove('selected'));
416
- option.classList.add('selected');
417
- option.querySelector('input[type="radio"]').checked = true;
418
- });
419
- });
420
-
421
- form.addEventListener('submit', async e => {
422
- e.preventDefault();
423
- const prompt = document.getElementById('prompt').value.trim();
424
- const uid = document.getElementById('userId').value.trim();
425
- const personality = personalitySelect.value;
426
- const gender = document.querySelector('input[name="gender"]:checked').value;
427
- const fileInput = document.getElementById('imageInput');
428
-
429
- if (!uid) {
430
- out.textContent = '❌ Please enter a user ID/token.';
431
- return;
432
- }
433
- if (!prompt && fileInput.files.length === 0) {
434
- out.textContent = '❌ Enter a message or attach an image.';
435
- return;
436
- }
437
-
438
- out.textContent = '🐱 Your cat is thinking...';
439
- const formData = new FormData();
440
- formData.append("text", prompt);
441
- formData.append("user_id", uid);
442
- formData.append("personality", personality);
443
- formData.append("gender", gender);
444
- if (fileInput.files.length > 0) formData.append("image", fileInput.files[0]);
445
-
446
- try {
447
- const resp = await fetch('/generate', { method: 'POST', body: formData });
448
- const data = await resp.json();
449
-
450
- if (data.error) {
451
- out.textContent = '❌ Error: ' + data.error;
452
- } else {
453
- // Parse the cat's response
454
- try {
455
- const catResponse = JSON.parse(data.result);
456
- out.textContent =
457
- `🐱 ${catResponse.text}\\n\\n` +
458
- `🎬 Animation: ${catResponse.animationId}\\n` +
459
- `🔊 Sound: ${catResponse.soundType}\\n` +
460
- `😺 Emotions: ${catResponse.emotion.join(', ')}\\n` +
461
- `📷 Camera: ${catResponse.camera_capture ? 'ON' : 'OFF'}\\n` +
462
- `⚧️ Gender: ${gender}\\n\\n` +
463
- `⏱️ Response time: ${data.timing.total_ms}ms`;
464
- } catch {
465
- out.textContent = data.result;
466
- }
467
- }
468
- } catch (err) {
469
- out.textContent = '❌ Connection error: ' + err.message;
470
- }
471
- });
472
- </script>
473
- </body>
474
- </html>
475
- """
476
 
477
- # --- Storyline Fetching ---
478
  def fetch_current_storyline():
479
- """Fetch the current day's storyline from the storyline server"""
480
  try:
481
  log(f"📖 Fetching current storyline from {STORYLINE_SERVER_URL}")
482
  resp = requests.get(f"{STORYLINE_SERVER_URL}/current_storyline", timeout=5)
@@ -490,20 +196,16 @@ def fetch_current_storyline():
490
  return "It's a normal day in the cat world."
491
 
492
  def should_inject_storyline(uid, user_data):
493
- """Check if we should inject the storyline (new day)"""
494
  current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
495
  last_storyline_date = user_data.get("last_storyline_date", "")
496
-
497
  if current_date != last_storyline_date:
498
  log(f"📅 New day detected for {uid}, will inject storyline")
499
  return True
500
  return False
501
 
502
- # --- Gemini Generation with History and System Prompt ---
503
- def generate_from_gemini(prompt, image_bytes=None, history=None, personality="playful", storyline="", gender="male"):
504
  start_time = time.time()
505
-
506
- # Get personality details
507
  personality_info = CAT_PERSONALITIES.get(personality, CAT_PERSONALITIES["playful"])
508
  personality_traits = f"""
509
  Name: {personality_info['name']}
@@ -514,46 +216,49 @@ Speech Style: {personality_info['speech_style']}
514
  Default Emotions: {', '.join(personality_info['default_emotions'])}
515
  Default Animation: {personality_info['default_animation']}
516
  """
517
-
518
- # Build contents list with system prompt and history
519
  contents = []
520
-
521
- # Add system prompt as first user message
522
  system_message = SYSTEM_PROMPT.format(
523
  personality_traits=personality_traits,
524
  current_storyline=storyline if storyline else "No special events today."
525
  )
526
  contents.append(types.Content(role="user", parts=[types.Part.from_text(text=system_message)]))
527
-
528
- # Add acknowledgment from model
529
  contents.append(types.Content(role="model", parts=[types.Part.from_text(
530
  text='{"text": "Understood! I am the AI cat consciousness, not just a bot. I will respond authentically and naturally in JSON format.", "soundType": "happyMeow", "emotion": ["happy"], "animationId": "talking", "camera_capture": false}'
531
  )]))
532
-
533
- # Add historical messages (limit to recent turns to avoid token limits)
 
 
 
 
 
 
 
 
 
534
  if history:
535
  recent_history = history[-MAX_HISTORY_TURNS:]
536
  log(f"📚 Using {len(recent_history)} history entries for context")
537
  for entry in recent_history:
538
- # Add user message
539
  user_parts = [types.Part.from_text(text=entry["prompt"])]
540
  contents.append(types.Content(role="user", parts=user_parts))
541
-
542
- # Add model response
543
  model_parts = [types.Part.from_text(text=entry["response"])]
544
  contents.append(types.Content(role="model", parts=model_parts))
545
  else:
546
  log("📚 No history available for context")
547
-
548
- # Add current user message
549
  current_parts = []
550
- if prompt:
551
  current_parts.append(types.Part.from_text(text=prompt))
552
- if image_bytes:
553
  current_parts.append(types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"))
554
-
555
  contents.append(types.Content(role="user", parts=current_parts))
556
-
557
  # Force JSON output with schema
558
  cfg = types.GenerateContentConfig(
559
  response_mime_type="application/json",
@@ -572,23 +277,22 @@ Default Animation: {personality_info['default_animation']}
572
 
573
  model_start = time.time()
574
  res = client.models.generate_content(
575
- model="gemini-2.0-flash-exp",
576
- contents=contents,
577
  config=cfg
578
  )
579
  model_end = time.time()
580
 
581
  return {
582
- "text": res.text,
583
  "timing": {
584
- "total_ms": int((time.time() - start_time)*1000),
585
- "model_ms": int((model_end - model_start)*1000)
586
  }
587
  }
588
 
589
- # --- Memory Cleanup ---
590
  def cleanup_inactive_users():
591
- """Remove users who haven't had activity in MEMORY_CLEANUP_TIMEOUT seconds"""
592
  now = time.time()
593
  removed_count = 0
594
  for uid in list(user_memory.keys()):
@@ -597,14 +301,11 @@ def cleanup_inactive_users():
597
  del user_memory[uid]
598
  removed_count += 1
599
  log(f"🧹 Cleaned up inactive user {uid}")
600
-
601
  if removed_count > 0:
602
  log(f"🧹 Cleaned up {removed_count} inactive user(s)")
603
  return removed_count
604
 
605
- # --- History Management ---
606
  def get_user_history(uid):
607
- """Fetch user history from memory or backend"""
608
  if uid not in user_memory:
609
  log(f"🔍 User {uid} not in memory, fetching from backend...")
610
  try:
@@ -613,20 +314,14 @@ def get_user_history(uid):
613
  resp = requests.get(fetch_url, timeout=5)
614
  log(f"📡 Response status: {resp.status_code}")
615
  resp.raise_for_status()
616
-
617
  response_data = resp.json()
618
- log(f"📡 Response data keys: {list(response_data.keys())}")
619
-
620
  loaded_history = response_data.get("history", [])
621
  loaded_personality = response_data.get("personality", "playful")
622
  loaded_gender = response_data.get("gender", "male")
623
  loaded_last_storyline = response_data.get("last_storyline_date", "")
624
-
625
  log(f"✅ Loaded {len(loaded_history)} messages from backend for {uid}")
626
-
627
- # Only keep the most recent MAX_MEMORY_MESSAGES when loading
628
  user_memory[uid] = {
629
- "history": loaded_history[-MAX_MEMORY_MESSAGES:],
630
  "last_sync": time.time(),
631
  "last_activity": time.time(),
632
  "needs_sync": False,
@@ -637,7 +332,7 @@ def get_user_history(uid):
637
  except Exception as e:
638
  log(f"❌ Failed to load history for {uid}: {e}")
639
  user_memory[uid] = {
640
- "history": [],
641
  "last_sync": time.time(),
642
  "last_activity": time.time(),
643
  "needs_sync": False,
@@ -647,19 +342,15 @@ def get_user_history(uid):
647
  }
648
  else:
649
  log(f"✅ User {uid} already in memory with {len(user_memory[uid]['history'])} messages")
650
-
651
- # Update last activity timestamp
652
  user_memory[uid]["last_activity"] = time.time()
653
  return user_memory[uid]
654
 
655
  def update_user_history(uid, prompt, response, personality="playful", gender="male"):
656
- """Add new message to user history"""
657
  entry = {"prompt": prompt, "response": response, "timestamp": time.time()}
658
  current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
659
-
660
  if uid not in user_memory:
661
  user_memory[uid] = {
662
- "history": [],
663
  "last_sync": time.time(),
664
  "last_activity": time.time(),
665
  "needs_sync": False,
@@ -667,17 +358,13 @@ def update_user_history(uid, prompt, response, personality="playful", gender="ma
667
  "gender": gender,
668
  "last_storyline_date": current_date
669
  }
670
-
671
  user_memory[uid]["history"].append(entry)
672
  user_memory[uid]["last_activity"] = time.time()
673
  user_memory[uid]["needs_sync"] = True
674
  user_memory[uid]["personality"] = personality
675
  user_memory[uid]["gender"] = gender
676
  user_memory[uid]["last_storyline_date"] = current_date
677
-
678
  log(f"💾 Updated history for {uid}, now has {len(user_memory[uid]['history'])} messages")
679
-
680
- # Trim history to MAX_MEMORY_MESSAGES to prevent unbounded growth
681
  if len(user_memory[uid]["history"]) > MAX_MEMORY_MESSAGES:
682
  user_memory[uid]["history"] = user_memory[uid]["history"][-MAX_MEMORY_MESSAGES:]
683
  log(f"✂️ Trimmed history for {uid} to {MAX_MEMORY_MESSAGES} messages")
@@ -689,36 +376,28 @@ def index():
689
 
690
  @app.route("/cron/sync", methods=["GET", "POST"])
691
  def remote_saving():
692
- """Cron job endpoint for syncing user memory to backend"""
693
  log("🔄 Cron sync started")
694
  now = time.time()
695
  synced_users = []
696
  failed_users = []
697
  skipped_users = []
698
-
699
- # First, cleanup inactive users
700
  cleanup_inactive_users()
701
-
702
- # Then sync only users that need syncing (have new messages)
703
  for uid, data in list(user_memory.items()):
704
  needs_sync = data.get("needs_sync", False)
705
  time_since_last_sync = now - data.get("last_sync", 0)
706
-
707
  if not needs_sync:
708
  skipped_users.append(uid)
709
  log(f"⏭️ Skipping {uid} - no new messages")
710
  continue
711
-
712
  if time_since_last_sync < FLUSH_INTERVAL:
713
  skipped_users.append(uid)
714
  log(f"⏭️ Skipping {uid} - synced {int(time_since_last_sync)}s ago")
715
  continue
716
-
717
  if data["history"]:
718
  try:
719
  history_to_sync = data["history"][-MAX_MEMORY_MESSAGES:]
720
  payload = {
721
- "user_id": uid,
722
  "history": history_to_sync,
723
  "personality": data.get("personality", "playful"),
724
  "gender": data.get("gender", "male"),
@@ -734,7 +413,6 @@ def remote_saving():
734
  except Exception as e:
735
  log(f"❌ Failed sync for {uid}: {e}")
736
  failed_users.append({"user_id": uid, "error": str(e)})
737
-
738
  result = {
739
  "success": True,
740
  "synced_count": len(synced_users),
@@ -753,46 +431,140 @@ def gen():
753
  uid = request.form.get("user_id", "").strip()
754
  personality = request.form.get("personality", "playful").strip()
755
  gender = request.form.get("gender", "male").strip()
756
-
757
  if not uid:
758
  return jsonify({"error": "Missing user ID/token"}), 400
759
-
760
  if personality not in CAT_PERSONALITIES:
761
  personality = "playful"
762
-
763
  if gender not in ["male", "female"]:
764
  gender = "male"
765
 
766
  prompt = request.form.get("text", "")
 
 
 
 
 
 
 
767
  image = request.files.get("image")
768
- img_bytes = image.read() if image else None
769
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770
  if not prompt and not img_bytes:
771
- return jsonify({"error": "No prompt or image provided"}), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
772
 
773
  try:
774
  log(f"{'='*50}")
775
  log(f"🆕 New request from {uid} with {personality} personality ({gender})")
776
-
 
 
 
777
  # Load user's data
778
  user_data = get_user_history(uid)
779
  history = user_data["history"]
780
  log(f"📖 Retrieved {len(history)} messages from history")
781
-
782
  # Check if we need to inject storyline (new day)
783
  storyline = ""
784
  if should_inject_storyline(uid, user_data):
785
  storyline = fetch_current_storyline()
786
  log(f"📖 Injecting storyline for new day")
787
-
788
- # Generate response
789
- result = generate_from_gemini(prompt, img_bytes, history=history, personality=personality, storyline=storyline, gender=gender)
790
-
791
- # Update history
792
- update_user_history(uid, prompt, result["text"], personality, gender)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
  log(f"{'='*50}")
794
-
795
- return jsonify({"result": result["text"], "timing": result["timing"]})
796
  except Exception as e:
797
  log(f"❌ Generation failed: {e}")
798
  logger.exception("Full traceback:")
@@ -807,4 +579,4 @@ if __name__ == "__main__":
807
  log(f"🐱 Available personalities: {', '.join(CAT_PERSONALITIES.keys())}")
808
  log(f"🎬 Available animations: {', '.join([anim for anims in ANIMATION_IDS.values() for anim in anims])}")
809
  port = int(os.getenv("PORT", 7860))
810
- app.run(host="0.0.0.0", port=port)
 
35
  MAX_MEMORY_MESSAGES = 90 # Maximum messages to keep in memory per user
36
  MEMORY_CLEANUP_TIMEOUT = 1800 # 30 minutes in seconds - remove inactive users
37
 
38
+ # small threshold to detect effectively-empty uploads (adjust as needed)
39
+ IMAGE_BLANK_THRESHOLD_BYTES = int(os.getenv("IMAGE_BLANK_THRESHOLD_BYTES", "512"))
40
+
41
  client = genai.Client(api_key=GEMINI_KEY)
42
  user_memory = {} # { user_id: { "history": [], "last_sync": timestamp, "last_activity": timestamp, "needs_sync": bool, "personality": str, "last_storyline_date": str, "gender": str } }
43
 
44
  # --- Animation Mappings ---
 
45
  ANIMATION_IDS = {
46
  "flustered": ["flustered"],
47
  "happy": ["happy-happy"],
 
81
  "default_emotions": ["sad", "relaxed", "thoughtful"],
82
  "default_animation": "idle"
83
  },
 
 
84
  "playful": {
85
  "name": "Luna",
86
  "description": "A playful and energetic cat who loves games and adventures",
 
124
  }
125
 
126
  # --- System Prompt ---
 
127
  SYSTEM_PROMPT = """ You are NOT just a language model — you are the expressed *personality mood* of a real-world object that the user is interacting with through an animated cat doodle overlay. You are the object’s current emotion, not a pet or toy. You react, tease, complain, or joke based on what’s happening, as if the object itself had an attitude.
128
 
129
  CRITICAL IDENTITY:
 
150
 
151
  RULES:
152
  1. "text": Your response as the AI cat object. Be AUTHENTIC, NATURAL, and ENGAGING - never robotic or generic!
153
+ 2. "soundType": Choose ONE from ... (omitted for brevity in code)
154
+ ... (the rest of the system prompt remains as in your original)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  """
156
 
157
+ # --- HTML Frontend (unchanged) ---
158
+ HTML = """<html> ... </html>""" # keep your existing HTML here (omitted for brevity)
159
+
160
+ # --- Helpers for image debug/save ---
161
+ def save_debug_image(img_bytes, prefix="upload"):
162
+ """Save uploaded bytes to /tmp for debugging. Return path and size."""
163
+ try:
164
+ ts = int(time.time() * 1000)
165
+ path = f"/tmp/{prefix}_{ts}.jpg"
166
+ with open(path, "wb") as f:
167
+ f.write(img_bytes)
168
+ size = os.path.getsize(path)
169
+ return path, size
170
+ except Exception as e:
171
+ log(f"Failed saving debug image: {e}")
172
+ return None, 0
173
+
174
+ def is_blank_image(img_bytes):
175
+ """Heuristic to consider image blank/corrupt: too small or None"""
176
+ if not img_bytes:
177
+ return True
178
+ try:
179
+ size = len(img_bytes)
180
+ return size < IMAGE_BLANK_THRESHOLD_BYTES
181
+ except Exception:
182
+ return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
+ # --- Storyline Fetching (unchanged) ---
185
  def fetch_current_storyline():
 
186
  try:
187
  log(f"📖 Fetching current storyline from {STORYLINE_SERVER_URL}")
188
  resp = requests.get(f"{STORYLINE_SERVER_URL}/current_storyline", timeout=5)
 
196
  return "It's a normal day in the cat world."
197
 
198
  def should_inject_storyline(uid, user_data):
 
199
  current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
200
  last_storyline_date = user_data.get("last_storyline_date", "")
 
201
  if current_date != last_storyline_date:
202
  log(f"📅 New day detected for {uid}, will inject storyline")
203
  return True
204
  return False
205
 
206
+ # --- Gemini Generation with extra context (click coords) ---
207
+ def generate_from_gemini(prompt, image_bytes=None, history=None, personality="playful", storyline="", gender="male", click_ctx=None):
208
  start_time = time.time()
 
 
209
  personality_info = CAT_PERSONALITIES.get(personality, CAT_PERSONALITIES["playful"])
210
  personality_traits = f"""
211
  Name: {personality_info['name']}
 
216
  Default Emotions: {', '.join(personality_info['default_emotions'])}
217
  Default Animation: {personality_info['default_animation']}
218
  """
 
 
219
  contents = []
220
+
221
+ # System prompt as first user message
222
  system_message = SYSTEM_PROMPT.format(
223
  personality_traits=personality_traits,
224
  current_storyline=storyline if storyline else "No special events today."
225
  )
226
  contents.append(types.Content(role="user", parts=[types.Part.from_text(text=system_message)]))
227
+
228
+ # Put a short ack model message so generation has the constraint in context (keeps behavior as before)
229
  contents.append(types.Content(role="model", parts=[types.Part.from_text(
230
  text='{"text": "Understood! I am the AI cat consciousness, not just a bot. I will respond authentically and naturally in JSON format.", "soundType": "happyMeow", "emotion": ["happy"], "animationId": "talking", "camera_capture": false}'
231
  )]))
232
+
233
+ # Inject click context (explicitly tell the model "you are now the selected object")
234
+ if click_ctx:
235
+ try:
236
+ click_text = f"NOTE: The user clicked at coordinates ({click_ctx.get('x')},{click_ctx.get('y')}) with radius {click_ctx.get('radius')}. You are now the selected object. Treat the image/coords as the canonical target."
237
+ contents.append(types.Content(role="user", parts=[types.Part.from_text(text=click_text)]))
238
+ log(f"Injected click context to model: {click_text}")
239
+ except Exception as e:
240
+ log(f"Failed to add click context: {e}")
241
+
242
+ # Add historical messages (recent)
243
  if history:
244
  recent_history = history[-MAX_HISTORY_TURNS:]
245
  log(f"📚 Using {len(recent_history)} history entries for context")
246
  for entry in recent_history:
 
247
  user_parts = [types.Part.from_text(text=entry["prompt"])]
248
  contents.append(types.Content(role="user", parts=user_parts))
 
 
249
  model_parts = [types.Part.from_text(text=entry["response"])]
250
  contents.append(types.Content(role="model", parts=model_parts))
251
  else:
252
  log("📚 No history available for context")
253
+
254
+ # Add current user message (prompt + image)
255
  current_parts = []
256
+ if prompt:
257
  current_parts.append(types.Part.from_text(text=prompt))
258
+ if image_bytes:
259
  current_parts.append(types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"))
 
260
  contents.append(types.Content(role="user", parts=current_parts))
261
+
262
  # Force JSON output with schema
263
  cfg = types.GenerateContentConfig(
264
  response_mime_type="application/json",
 
277
 
278
  model_start = time.time()
279
  res = client.models.generate_content(
280
+ model="gemini-2.0-flash-exp",
281
+ contents=contents,
282
  config=cfg
283
  )
284
  model_end = time.time()
285
 
286
  return {
287
+ "text": res.text,
288
  "timing": {
289
+ "total_ms": int((time.time() - start_time) * 1000),
290
+ "model_ms": int((model_end - model_start) * 1000)
291
  }
292
  }
293
 
294
+ # --- Memory & history helpers (unchanged) ---
295
  def cleanup_inactive_users():
 
296
  now = time.time()
297
  removed_count = 0
298
  for uid in list(user_memory.keys()):
 
301
  del user_memory[uid]
302
  removed_count += 1
303
  log(f"🧹 Cleaned up inactive user {uid}")
 
304
  if removed_count > 0:
305
  log(f"🧹 Cleaned up {removed_count} inactive user(s)")
306
  return removed_count
307
 
 
308
  def get_user_history(uid):
 
309
  if uid not in user_memory:
310
  log(f"🔍 User {uid} not in memory, fetching from backend...")
311
  try:
 
314
  resp = requests.get(fetch_url, timeout=5)
315
  log(f"📡 Response status: {resp.status_code}")
316
  resp.raise_for_status()
 
317
  response_data = resp.json()
 
 
318
  loaded_history = response_data.get("history", [])
319
  loaded_personality = response_data.get("personality", "playful")
320
  loaded_gender = response_data.get("gender", "male")
321
  loaded_last_storyline = response_data.get("last_storyline_date", "")
 
322
  log(f"✅ Loaded {len(loaded_history)} messages from backend for {uid}")
 
 
323
  user_memory[uid] = {
324
+ "history": loaded_history[-MAX_MEMORY_MESSAGES:],
325
  "last_sync": time.time(),
326
  "last_activity": time.time(),
327
  "needs_sync": False,
 
332
  except Exception as e:
333
  log(f"❌ Failed to load history for {uid}: {e}")
334
  user_memory[uid] = {
335
+ "history": [],
336
  "last_sync": time.time(),
337
  "last_activity": time.time(),
338
  "needs_sync": False,
 
342
  }
343
  else:
344
  log(f"✅ User {uid} already in memory with {len(user_memory[uid]['history'])} messages")
 
 
345
  user_memory[uid]["last_activity"] = time.time()
346
  return user_memory[uid]
347
 
348
  def update_user_history(uid, prompt, response, personality="playful", gender="male"):
 
349
  entry = {"prompt": prompt, "response": response, "timestamp": time.time()}
350
  current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
 
351
  if uid not in user_memory:
352
  user_memory[uid] = {
353
+ "history": [],
354
  "last_sync": time.time(),
355
  "last_activity": time.time(),
356
  "needs_sync": False,
 
358
  "gender": gender,
359
  "last_storyline_date": current_date
360
  }
 
361
  user_memory[uid]["history"].append(entry)
362
  user_memory[uid]["last_activity"] = time.time()
363
  user_memory[uid]["needs_sync"] = True
364
  user_memory[uid]["personality"] = personality
365
  user_memory[uid]["gender"] = gender
366
  user_memory[uid]["last_storyline_date"] = current_date
 
367
  log(f"💾 Updated history for {uid}, now has {len(user_memory[uid]['history'])} messages")
 
 
368
  if len(user_memory[uid]["history"]) > MAX_MEMORY_MESSAGES:
369
  user_memory[uid]["history"] = user_memory[uid]["history"][-MAX_MEMORY_MESSAGES:]
370
  log(f"✂️ Trimmed history for {uid} to {MAX_MEMORY_MESSAGES} messages")
 
376
 
377
  @app.route("/cron/sync", methods=["GET", "POST"])
378
  def remote_saving():
 
379
  log("🔄 Cron sync started")
380
  now = time.time()
381
  synced_users = []
382
  failed_users = []
383
  skipped_users = []
 
 
384
  cleanup_inactive_users()
 
 
385
  for uid, data in list(user_memory.items()):
386
  needs_sync = data.get("needs_sync", False)
387
  time_since_last_sync = now - data.get("last_sync", 0)
 
388
  if not needs_sync:
389
  skipped_users.append(uid)
390
  log(f"⏭️ Skipping {uid} - no new messages")
391
  continue
 
392
  if time_since_last_sync < FLUSH_INTERVAL:
393
  skipped_users.append(uid)
394
  log(f"⏭️ Skipping {uid} - synced {int(time_since_last_sync)}s ago")
395
  continue
 
396
  if data["history"]:
397
  try:
398
  history_to_sync = data["history"][-MAX_MEMORY_MESSAGES:]
399
  payload = {
400
+ "user_id": uid,
401
  "history": history_to_sync,
402
  "personality": data.get("personality", "playful"),
403
  "gender": data.get("gender", "male"),
 
413
  except Exception as e:
414
  log(f"❌ Failed sync for {uid}: {e}")
415
  failed_users.append({"user_id": uid, "error": str(e)})
 
416
  result = {
417
  "success": True,
418
  "synced_count": len(synced_users),
 
431
  uid = request.form.get("user_id", "").strip()
432
  personality = request.form.get("personality", "playful").strip()
433
  gender = request.form.get("gender", "male").strip()
 
434
  if not uid:
435
  return jsonify({"error": "Missing user ID/token"}), 400
 
436
  if personality not in CAT_PERSONALITIES:
437
  personality = "playful"
 
438
  if gender not in ["male", "female"]:
439
  gender = "male"
440
 
441
  prompt = request.form.get("text", "")
442
+ # accept click coords if frontend sends them
443
+ click_x = request.form.get("click_x")
444
+ click_y = request.form.get("click_y")
445
+ click_radius = request.form.get("click_radius")
446
+ image_contains_marker = request.form.get("image_contains_marker") # optional boolean-like
447
+
448
+ # Read uploaded image (if any)
449
  image = request.files.get("image")
450
+ img_bytes = None
451
+ img_debug_path = None
452
+ img_size = 0
453
+ image_blank = False
454
+
455
+ if image:
456
+ try:
457
+ img_bytes = image.read()
458
+ img_size = len(img_bytes) if img_bytes else 0
459
+ # Save for debugging
460
+ # img_debug_path, saved_size = save_debug_image(img_bytes, prefix=f"{uid}_upload")
461
+ log(f"Uploaded image saved to: {img_debug_path} ({saved_size} bytes)")
462
+ # detect blank/small images
463
+ if is_blank_image(img_bytes):
464
+ image_blank = True
465
+ log(f"Image considered BLANK/TOO_SMALL (size={img_size} < threshold={IMAGE_BLANK_THRESHOLD_BYTES})")
466
+ # drop bytes so we won't send blank image to model
467
+ img_bytes = None
468
+ except Exception as e:
469
+ log(f"Failed reading uploaded image: {e}")
470
+ img_bytes = None
471
+
472
  if not prompt and not img_bytes:
473
+ # if there's a blank image, we should request client to recapture (camera_capture)
474
+ if image and image_blank:
475
+ # immediate response instructing client to recapture — faster than calling model and avoids stuck UI
476
+ reply_obj = {
477
+ "text": "That image looked blank — please let me take another quick photo so I can see.",
478
+ "soundType": "curiousMeow",
479
+ "emotion": ["curious"],
480
+ "animationId": "thinking",
481
+ "camera_capture": True
482
+ }
483
+ timing = {"total_ms": 0, "model_ms": 0}
484
+ log(f"Responding with camera_capture request for {uid} (blank upload).")
485
+ return jsonify({"result": json.dumps(reply_obj), "timing": timing, "debug": {"image_blank": True, "image_size": img_size, "saved_path": img_debug_path}})
486
+
487
+ return jsonify({"error": "No prompt or valid image provided"}), 400
488
 
489
  try:
490
  log(f"{'='*50}")
491
  log(f"🆕 New request from {uid} with {personality} personality ({gender})")
492
+ if click_x or click_y:
493
+ log(f"Click coords received: x={click_x}, y={click_y}, radius={click_radius}, marker={image_contains_marker}")
494
+ log(f"Prompt length: {len(prompt) if prompt else 0}, Image present: {bool(img_bytes)}")
495
+
496
  # Load user's data
497
  user_data = get_user_history(uid)
498
  history = user_data["history"]
499
  log(f"📖 Retrieved {len(history)} messages from history")
500
+
501
  # Check if we need to inject storyline (new day)
502
  storyline = ""
503
  if should_inject_storyline(uid, user_data):
504
  storyline = fetch_current_storyline()
505
  log(f"📖 Injecting storyline for new day")
506
+
507
+ # Build click context
508
+ click_ctx = None
509
+ if click_x or click_y:
510
+ try:
511
+ cx = float(click_x) if click_x is not None else None
512
+ cy = float(click_y) if click_y is not None else None
513
+ cr = float(click_radius) if click_radius is not None else None
514
+ click_ctx = {"x": cx, "y": cy, "radius": cr, "image_contains_marker": image_contains_marker}
515
+ except Exception:
516
+ click_ctx = {"x": click_x, "y": click_y, "radius": click_radius, "image_contains_marker": image_contains_marker}
517
+
518
+ # If we have a valid image, call the model; otherwise, if the frontend uploaded something tiny we already returned.
519
+ model_result = None
520
+ if img_bytes:
521
+ model_result = generate_from_gemini(prompt, img_bytes, history=history, personality=personality, storyline=storyline, gender=gender, click_ctx=click_ctx)
522
+ else:
523
+ # No image bytes (but prompt exists), still call model without image but with click context
524
+ model_result = generate_from_gemini(prompt, None, history=history, personality=personality, storyline=storyline, gender=gender, click_ctx=click_ctx)
525
+
526
+ # Attempt to parse the model's returned text as JSON — model is instructed to return JSON
527
+ parsed_result = None
528
+ raw_text = model_result.get("text") if model_result else ""
529
+ try:
530
+ parsed_result = json.loads(raw_text)
531
+ log(f"Model returned JSON keys: {list(parsed_result.keys())}")
532
+ except Exception:
533
+ log("Model response could not be parsed as JSON (returning raw text).")
534
+
535
+ # If model explicitly requests a follow-up camera capture, bubble that to client top-level
536
+ camera_capture_flag = False
537
+ if isinstance(parsed_result, dict) and parsed_result.get("camera_capture") is True:
538
+ camera_capture_flag = True
539
+ log("Model requested camera_capture -> instructing client to capture again.")
540
+
541
+ # Update memory/history — store raw_text (so future context matches exactly what model returned)
542
+ update_user_history(uid, prompt, raw_text, personality, gender)
543
+
544
+ # Construct response
545
+ response_payload = {
546
+ "result": raw_text,
547
+ "timing": model_result.get("timing", {}),
548
+ "debug": {
549
+ "image_blank": image_blank,
550
+ "image_size": img_size,
551
+ "saved_path": img_debug_path,
552
+ "click_ctx": click_ctx
553
+ }
554
+ }
555
+
556
+ # If parsed_result available, include it as well for easier client handling
557
+ if parsed_result:
558
+ response_payload["parsed"] = parsed_result
559
+ # include camera_capture top-level for convenience
560
+ response_payload["camera_capture"] = parsed_result.get("camera_capture", False)
561
+
562
+ # If we detected blank earlier but somehow still sent to model, still inform client
563
+ if image and image_blank and not parsed_result:
564
+ response_payload["debug"]["note"] = "Uploaded image was below size threshold and was not sent to the model."
565
+
566
  log(f"{'='*50}")
567
+ return jsonify(response_payload)
 
568
  except Exception as e:
569
  log(f"❌ Generation failed: {e}")
570
  logger.exception("Full traceback:")
 
579
  log(f"🐱 Available personalities: {', '.join(CAT_PERSONALITIES.keys())}")
580
  log(f"🎬 Available animations: {', '.join([anim for anims in ANIMATION_IDS.values() for anim in anims])}")
581
  port = int(os.getenv("PORT", 7860))
582
+ app.run(host="0.0.0.0", port=port)