File size: 35,105 Bytes
13435ce
5b8e1c7
 
 
 
 
f17b795
947d81e
5b8e1c7
 
0bb20c9
 
5b8e1c7
0bb20c9
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
f7cc4cf
 
 
 
 
52970f5
 
f7cc4cf
 
 
900b8d5
f7cc4cf
 
 
52970f5
900b8d5
f7cc4cf
 
 
 
 
 
 
 
 
 
 
900b8d5
f7cc4cf
 
 
 
 
900b8d5
52970f5
f5baf8e
 
900b8d5
 
 
 
 
 
 
 
 
52970f5
900b8d5
 
 
 
 
 
 
f7cc4cf
4ca7e80
 
900b8d5
 
 
 
52970f5
5b8e1c7
 
 
 
 
4dde0fb
5b8e1c7
f17b795
 
 
 
5b8e1c7
 
 
 
 
0bb20c9
 
 
 
 
 
5b8e1c7
 
0bb20c9
5b8e1c7
 
0bb20c9
 
 
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100254d
 
 
5b8e1c7
 
9c0eb85
 
5b8e1c7
63424e9
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f17b795
5b8e1c7
 
f17b795
5b8e1c7
 
 
f17b795
5b8e1c7
 
f17b795
5b8e1c7
 
 
f17b795
5b8e1c7
 
f17b795
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c397d8
099286f
 
 
96fa7c0
 
 
74ee77c
96fa7c0
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ea3799
 
 
 
 
 
 
5b8e1c7
3c397d8
5b8e1c7
3c397d8
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc4367d
 
 
 
 
 
5b8e1c7
3c397d8
 
 
5b8e1c7
 
 
 
 
 
 
 
 
13435ce
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13435ce
 
 
 
 
6171e65
13435ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d496c7
13435ce
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc4367d
 
 
 
 
 
 
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4dde0fb
 
5b8e1c7
4dde0fb
5b8e1c7
 
 
 
 
ac9a89a
5b8e1c7
 
4dde0fb
 
 
 
 
5b8e1c7
 
4dde0fb
5b8e1c7
 
 
 
 
 
 
 
 
4dde0fb
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
4dde0fb
5b8e1c7
 
 
 
 
 
 
 
 
 
 
4dde0fb
5b8e1c7
 
 
 
 
 
 
 
4dde0fb
5b8e1c7
 
 
 
 
 
 
 
 
 
 
3e8e1da
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
b355886
 
 
 
 
f46bdc4
b355886
900b8d5
b355886
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e7f737
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63424e9
 
 
 
 
 
 
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c397d8
5b8e1c7
4e7f737
 
 
 
 
 
 
 
 
 
 
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ea3799
5b8e1c7
 
 
 
 
 
 
 
 
 
3ea3799
5b8e1c7
3ea3799
 
 
 
 
5b8e1c7
 
 
3ea3799
5b8e1c7
 
b355886
 
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63424e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7f6d8b
 
 
 
fc39adf
 
63424e9
5b8e1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667b209
5b8e1c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
from google.api_core import exceptions
from flask import Flask, request, render_template, redirect, url_for, session, make_response, render_template_string
from flask_socketio import SocketIO, join_room, leave_room, send
from pymongo import MongoClient
from datetime import datetime, timedelta
import random
from pathlib import Path
import numpy as np
import time
import math
import os
import json
import google.auth
from google.oauth2 import service_account
from google.auth.transport.requests import AuthorizedSession
from vertexai.tuning import sft
from vertexai.generative_models import GenerativeModel
import re
import concurrent.futures
from google import genai
from google.genai.types import GenerateContentConfig, HttpOptions
from text_corruption import corrupt
from humanizing import humanize
from quote_removal import remove_quotes
from weird_char_removal import remove_weird_characters
from duplicate_detection import duplicate_check

# Database backup code
from huggingface_hub import upload_folder
from huggingface_hub import HfApi
from huggingface_hub import login
from datetime import datetime


class datasetHandler():

  def __init__(self,hf_token,mongoDB_name,max_dumps = 10):
    login(hf_token)
    self.api = HfApi(token = hf_token)
    self.DB_name = mongoDB_name
    self.timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    self.max_dumps = max_dumps

  def make_dump(self):
    try:
      os.system(f"mongodump --db {self.DB_name} --out /tmp/mongoDBContents")
      return 0
    except Exception as e:
      print(e)
      return 1

  def upload_dump(self):
    try:
      upload_folder(folder_path="/tmp/mongoDBContents",path_in_repo=f"mongoDump_{self.timestamp}", repo_id="ProjectFrozone/MongoDBDumps", repo_type="dataset")          
      return 0
    except Exception as e:
      print(e)
      return 1

  def list_dumps(self):
    all_files = self.api.list_repo_files(repo_id="ProjectFrozone/MongoDBDumps", repo_type="dataset") 
    all_dirs = [f[:f.index("/")] for f in all_files if "mongoDump_" in f] 
    dates = [date[date.index("_") + 1:] for date in all_dirs]
    return (all_dirs, dates)     
  
  def delete_dump(self,dump_name):
    self.api.delete_folder(
      repo_id="ProjectFrozone/MongoDBDumps",
      path_in_repo=f"{dump_name}",                      
      repo_type="dataset",
      commit_message=f"Deleted {dump_name}"
    )

  def cleanup_dataset(self,dirs,dates):
    if len(dates) > self.max_dumps:
      to_remove = dirs[0]
      self.delete_dump(to_remove)
      return f"Deleted {to_remove}"
    return "Nothing to delete"
                                    
  def dump_db(self):
    self.make_dump()
    self.upload_dump()

  def clean(self):
    dirs,dates = self.list_dumps()
    print(self.cleanup_dataset(dirs,dates))
# End database backup code

#controls
CHAT_CONTEXT = 20 #how many messages from chat history to append to inference prompt
#minimum number of chars where we start checking for duplicate messages
DUP_LEN = 25 #since short messages may reasonably be the same
REMOVE_PUNC_RATE = .8 #how often to remove final punctuation

# Directory alignment
BASE_DIR = Path(__file__).resolve().parent
PROJECT_ROOT = BASE_DIR.parent

app = Flask(__name__)
app.config["SECRET_KEY"] = "supersecretkey"
socketio = SocketIO(app)

# Setup for Vertex API calls
serviceAccount_json = os.environ["GOOGLE_SERVICE_ACCOUNT_JSON"]
serviceAccount_info = json.loads(serviceAccount_json)

credentials = service_account.Credentials.from_service_account_info(
    serviceAccount_info,
    scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
google_session = AuthorizedSession(credentials)

vertex_client = genai.Client(
        vertexai=True,
        project=os.environ["GOOGLE_CLOUD_PROJECT"],
        location=os.environ.get("VERTEX_LOCATION", "us-central1"),
        credentials=credentials,
)
"""
#original lines before separating system instructions and prompts
# Initialize the bots
pirate_tuning_job_name = f"projects/frozone-475719/locations/us-central1/tuningJobs/3296615187565510656"
tuning_job_frobot = f"projects/frozone-475719/locations/us-central1/tuningJobs/1280259296294076416"
tuning_job_hotbot = f"projects/frozone-475719/locations/us-central1/tuningJobs/4961166390611410944"
tuning_job_coolbot = f"projects/frozone-475719/locations/us-central1/tuningJobs/4112237860852072448"

hottj = sft.SupervisedTuningJob(tuning_job_hotbot)
cooltj = sft.SupervisedTuningJob(tuning_job_coolbot)
frotj = sft.SupervisedTuningJob(tuning_job_frobot)
# Create the bot models

hotbot = GenerativeModel(hottj.tuned_model_endpoint_name)
coolbot = GenerativeModel(cooltj.tuned_model_endpoint_name)
frobot = GenerativeModel(frotj.tuned_model_endpoint_name)
"""
#change to endpoints
hotbot = "projects/700531062565/locations/us-central1/endpoints/6225523347153747968"
coolbot = "projects/700531062565/locations/us-central1/endpoints/1700531621553242112"
frobot = "projects/700531062565/locations/us-central1/endpoints/2951406418055397376"

# MongoDB setup
client = MongoClient("mongodb://127.0.0.1:27017/")
db = client["huggingFaceData"]
rooms_collection = db.rooms
feedback_collection = db.feedback

# List of fruits to choose display names from
FRUIT_NAMES = ["blueberry", "strawberry", "orange", "cherry"]
aliases = {"watermelon":"W", "apple":"L", "banana":"B", "blueberry":"C", "strawberry":"D", "orange":"E", "grape":"G", "cherry":"H"}
reverse_aliases = { value:key for key,value in aliases.items() }
# List of discussion topics
TOPICS_LIST = [
    {
        "title": "Abortion",
        "text": "Since the Supreme Court overturned Roe vs. Wade in 2022, there has been an increase in patients crossing state lines to receive abortions in less restrictive states. Pro-choice advocates argue that these restrictions exacerbate unequal access to healthcare due to financial strain and other factors and believe that a patient should be able to make personal medical decisions about their own body and future. Pro-life advocates argue that abortion legislation should be left to the states and believe that abortion is amoral and tantamount to murder. Both sides disagree on how to handle cases of rape, incest, terminal medical conditions, and risks to the mother’s life and health. What stance do you take on abortion and why?",
        "post": "Idk its hard bc both sides have good points. People should be able to make their own decisions about their own body but theres also moral stuff to think about too you know"
    },
    {
        "title": "Gun Rights/Control",
        "text": "Gun rights advocates argue that the right to bear arms is a protected second amendment right necessary for self-defense. Meanwhile, gun control advocates argue that stricter regulations are necessary to reduce gun violence. Potential reforms include stricter background checks, banning assault weapons, enacting red flag laws, and increasing the minimum age to purchase a gun. What stance do you take on gun rights vs. gun control and why?",
        "post": "i think people should be able to own guns but there has to be some check like background stuff so crazy people dont get them"
    },
    {
        "title": "Education and Trans Students",
        "text": "Laws and policies affecting trans people are highly contested, especially those involving education. Several states have passed laws restricting the use of preferred pronouns and names in schools, limiting transgender athletes' ability to participate in sports, and banning books containing LGBTQ+ content from school libraries. How do you think decisions on school policies regarding trans students should be made and why?",
        "post": "I dont think its that big a deal to use different pronouns but also trans athletes should be playing with the gender they were born as. I know thats an unpopular opinion but its the only way its fair."
    },
    {
        "title": "Immigration and ICE Activity",
        "text": "The current year has seen an increase in ICE (U.S. Immigration and Customs Enforcement) activity, including raids at workplaces, courthouses, schools, churches, and hospitals. Some argue that ICE is going too far and is violating the Constitutional due process rights of both immigrants and citizens. Others argue that these actions are necessary to maintain national security and enforce immigration law. What stance do you take on recent ICE activity and why?",
        "post": "I think ice is doing their job they're literally immigration enforcement. It sucks but if you come here illegally youre going to face the consequence."
    },
    {
        "title": "Universal Healthcare",
        "text": "Some argue that universal healthcare is necessary to ensure everyone has access to lifesaving medical treatments and a minimum standard of living, regardless of income or employment. Others argue that the choice of how to access healthcare is a private responsibility and that it is more efficient for the government to limit intervention. What stance do you take on government involvement in providing healthcare and why?",
        "post": "I think people should handle their own healthcare. the government is slow plus competition means more innovation. i dont trust the idea of one size fits all"
    }
] 

# FroBot Main Prompt
with open(PROJECT_ROOT / "data" / "prompts" / "frobot_prompt_main.txt") as f:
    FROBOT_PROMPT = f.read()
# Instructions
with open(PROJECT_ROOT / "data" / "inference_instructions" / "frobot_instructions_main.txt") as f:
    FROBOT_INSTRUCT = f.read()

# HotBot Prompt
with open(PROJECT_ROOT / "data" / "prompts" / "hotbot_prompt_main.txt") as h:
    HOTBOT_PROMPT = h.read()
# Instructions
with open(PROJECT_ROOT / "data" / "inference_instructions" / "hotbot_instructions_main.txt") as h:
    HOTBOT_INSTRUCT = h.read()

# CoolBot Prompt
with open(PROJECT_ROOT / "data" / "prompts" / "coolbot_prompt_main.txt") as c:
    COOLBOT_PROMPT = c.read()
# Instructions
with open(PROJECT_ROOT / "data" / "inference_instructions" / "coolbot_instructions_main.txt") as c:
    COOLBOT_INSTRUCT = c.read()

# Randomly select fruits to use for display names
def choose_names(n):
    # Return n unique random fruit names
    return random.sample(FRUIT_NAMES, n)

# Send initial watermelon post
def send_initial_post(room_id, delay):
    # Wait 1 second before sending
    time.sleep(delay)
    # Get the inital post for this topic
    room_doc = rooms_collection.find_one({"_id": room_id})
    topic_title = room_doc["topic"]
    topic_info = next((t for t in TOPICS_LIST if t["title"] == topic_title), None)
    if not topic_info:
        return
    initialPost = topic_info["post"]
    # Add the topic text to bots prompts
    global FROBOT_PROMPT
    global COOLBOT_PROMPT
    global HOTBOT_PROMPT
    FROBOT_PROMPT = f"The topic of this chat is: {topic_info['title']}\nThe description of this topic is: {topic_info['text']}\n"+FROBOT_PROMPT
    COOLBOT_PROMPT = f"The topic of this chat is: {topic_info['title']}\nThe description of this topic is: {topic_info['text']}\n"+COOLBOT_PROMPT
    HOTBOT_PROMPT = f"The topic of this chat is: {topic_info['title']}\nThe description of this topic is: {topic_info['text']}\n"+HOTBOT_PROMPT
    print(FROBOT_PROMPT)
    print(HOTBOT_PROMPT)
    print(COOLBOT_PROMPT)
    # Store the initial post in the database
    db_msg = {
        "sender": "watermelon",
        "message": initialPost,
        "timestamp": datetime.utcnow()
    }
    rooms_collection.update_one(
        {"_id": room_id},
        {"$push": {"messages": db_msg}}
    )
    # Send to the client (must use emit when in background thread)
    socketio.emit("message", {"sender": "watermelon", "message": initialPost}, to=room_id)

    #send to the bots
    socketio.start_background_task(ask_bot_round, room_id)

# Send message that a bot joined the room
def send_bot_joined(room_id, bot_name, delay):
    # Wait 1 second before sending
    time.sleep(delay)
    socketio.emit("message", {"sender": "", "message": f"{bot_name} has entered the chat"}, to=room_id)

# Send message displaying all participant names
def send_bot_names_message(room_id, bot_names):
    if len(bot_names) == 0:
        return
    # Wait 1 second before sending
    socketio.emit("message", {"sender": "", "message": f"This chat currently contains {', '.join(bot_names)}, and watermelon."}, to=room_id)

# Trigger a round of bot calls if user has been inactive for a while
def user_inactivity_tracker(room_id, timeout_seconds=120,randomNorm = (0,15)):
    print(f"Started user inactivity tracker for Room ID#{room_id}")
    # add randomness to timeout
    timeout_seconds += np.random.normal(randomNorm[0],randomNorm[1])
    while True:
        room_doc = rooms_collection.find_one({"_id": room_id})
        # Stop if this room's chat has ended
        if not room_doc or room_doc.get("ended", False):
            print(f"User inactivity tracker stopping for Room ID#{room_id}")
            return
        lastTime = room_doc.get("last_activity")
        if lastTime:
            if datetime.utcnow() - lastTime > timedelta(seconds=timeout_seconds):
                print(f"User has been inactive in Room ID#{room_id} - triggering new round of bot calls.")
                socketio.start_background_task(ask_bot_round, room_id)
                # Prevent multiple bot call triggers due to inactivity
                rooms_collection.update_one(
                    {"_id": room_id},
                    {"$set": {"last_activity": datetime.utcnow()}}
                )
        time.sleep(5) # re-check inactivity every 5s

def let_to_name(room_id, text):
    named_response = str(text)
    letters = [aliases[name] for name in (FRUIT_NAMES + ["watermelon"])] # makes a copy, rather than directly modifying
    for letter in set(re.findall(r"\b[A-Z]\b", named_response)):
        if letter in letters:
            named_response = re.sub(r"\b" + letter + r"\b", reverse_aliases[letter], named_response)
    return named_response

def name_to_let(room_id, text):
    named_response = str(text)
    names = FRUIT_NAMES + ["watermelon"] # makes a copy, rather than directly modifying
    for name in names:
        if name in text:
            text = re.sub(r"\b" + name + r"\b", aliases[name], text, flags=re.I)
    return text

def replace_semicolons(text, probability=0.80):
    modified_text = []
    for char in text:
        if char == ';' and random.random() <= probability:
            modified_text.append(',')
        else:
            modified_text.append(char)
    return ''.join(modified_text)

def get_last_paragraph(text):
    text = text.strip()
    if "\n" not in text:
        return text
    return text.rsplit("\n", 1)[-1].strip()

def get_response_delay(response):
    baseDelay = 5 # standard delay for thinking
    randFactor = np.random.uniform(0,30, len(response) // 4)
    randFactor = max(randFactor) # Make longer responses more likely to take longer
    perCharacterDelay = 0.12
    # was .25 -> average speed: 3.33 characters/second = 0.3
    maxDelay = 150 # maximum cap of 2.5 minutes (so the bots don't take too long)
    # Add total delay
    totalDelay = baseDelay + perCharacterDelay * len(response) + randFactor
    return min(totalDelay, maxDelay)

# Ask a bot for its response, store in DB, and send to client
    # Returns true if the bot passed
def ask_bot(room_id, bot, bot_display_name, initial_prompt, instruct_prompt , wait_time = 1):
    # Prevents crashing if bot model did not load
    if bot is None:
        return False
    # Get the full chat room history
    room_doc = rooms_collection.find_one({"_id": room_id})
    # Do not proceed if the chat has ended
    if not room_doc or room_doc.get("ended", False):
        return False
    history = room_doc["messages"]
    # Build the LLM prompt
    prompt = re.sub(r"<RE>", aliases[bot_display_name], initial_prompt)
    context = list() #get the context sent to bot for duplicate_check
    for message in history[-CHAT_CONTEXT:]:
        prompt += f"{aliases[message['sender']]}: {message['message']}\n"
        context.append(message['message'])

    prompt = name_to_let(room_id, prompt) #sub fruit names to letters to give to bots

    print("\n")
    print("=================================prompt")
    print(prompt)

    # Get the bot's response
    try:
        response = vertex_client.models.generate_content(
                model = bot,
                contents = prompt,
                config=GenerateContentConfig(
                    system_instruction = [instruct_prompt]
                ),
            )
        parsed_response = response.candidates[0].content.parts[0].text.strip()

    # Deal with rate limit issues
    except exceptions.TooManyRequests:
        print(f"429 Rate Limit Exceeded")
        socketio.sleep(wait_time)
        wait_time *= 2

        # Prevent Stack Overflow
        if wait_time > 32:
            print("Rate Limit Exceeded and Exponential Backoff Too Long")
            print("Treating this bot's response as a pass.")
            room_doc = rooms_collection.find_one({"_id": room_id})
            if not room_doc or room_doc.get("ended", False):
                return False
            # Store the error response in the database
            bot_message = {
                "sender": bot_display_name,
                "message": "ERROR in bot response - treated as a (pass)", 
                "timestamp": datetime.utcnow()
            }
            rooms_collection.update_one(
                {"_id": room_id},
                {"$push": {"messages": bot_message}}
            )
            return True

        return ask_bot(room_id, bot, bot_display_name, initial_prompt, instruct_prompt , wait_time = wait_time)


    except Exception as e:
        print("Error in bot response: ", e)
        print("Treating this bot's response as a pass.")
        # Do not store/send messages if the chat has ended
        room_doc = rooms_collection.find_one({"_id": room_id})
        if not room_doc or room_doc.get("ended", False):
            return False
        # Store the error response in the database
        bot_message = {
            "sender": bot_display_name,
            "message": "ERROR in bot response - treated as a (pass)", 
            "timestamp": datetime.utcnow()
        }
        rooms_collection.update_one(
            {"_id": room_id},
            {"$push": {"messages": bot_message}}
        )
        return True

    #remove bot formatting like <i></i> <b></b> that will render on the page
    parsed_response = re.sub(r"<([a-zA-Z]+)>(?=.*</\1>)", "", parsed_response)
    parsed_response = re.sub(r"</([a-zA-Z]+)>", "", parsed_response)
    #fix any escaped \\n --> \n so they are actual newlines
    parsed_response = re.sub(r"\\n", "\n", parsed_response).strip()
    #remove bot heading ("C: ...")
    if re.search(r"\b" + aliases[bot_display_name] + r"\b:",
                 parsed_response):
        parsed_response = re.sub(r"\b" 
                                 + aliases[bot_display_name] 
                                 + r"\b:\s?", '', parsed_response)

    # Only keep the last paragraph of frobot responses
    if bot == frobot:
        print("=========== OG FROBOT RESPONSE")
        print(parsed_response)
        parsed_response = get_last_paragraph(parsed_response)
        print("=========== LAST PARAGRAPH")
        print(parsed_response)
    
    # Check for if the bot passed (i.e. response = "(pass)")
    if ("(pass)" in parsed_response) or (parsed_response == ""):
        # Do not store/send messages if the chat has ended
        room_doc = rooms_collection.find_one({"_id": room_id})
        if not room_doc or room_doc.get("ended", False):
            return False
        # Store the pass in the database
        bot_message = {
            "sender": bot_display_name,
            "message": parsed_response,
            "timestamp": datetime.utcnow()
        }
        rooms_collection.update_one(
            {"_id": room_id},
            {"$push": {"messages": bot_message}}
        )
 
        print("PASSED")
        return True # a pass is still recorded in the database, but not sent to the client

    #sub letters for names, so if the bot addressed A -> Apple
    named_response = let_to_name(room_id, parsed_response)
    #remove encapsulating quotes
    no_quotes = remove_quotes(named_response)
    #humanize the response (remove obvious AI formatting styles)
    humanized_response = humanize(no_quotes)
    #replace most semicolons 
    less_semicolons_response = replace_semicolons(humanized_response)
    #corrupt the response (add some typos and misspellings)
    corrupted_response = corrupt(less_semicolons_response, misspell_aug_p=0.01, typo_aug_p=0.005)
    #remove weird chars
    no_weird_chars = remove_weird_characters(corrupted_response)
    #remove trailing punctuation % of the time
    if random.random() < REMOVE_PUNC_RATE:
        no_weird_chars = re.sub(r'[^\w\s]+$', '', no_weird_chars)

    final_response = no_weird_chars

    #check that there are no reccent duplicate messages
    if len(final_response) > DUP_LEN and duplicate_check(final_response, context):
        print("****DUPLICATE MESSAGE DETECTED")
        print("Treating this bot's response as a pass.")
        # Do not store/send messages if the chat has ended
        room_doc = rooms_collection.find_one({"_id": room_id})
        if not room_doc or room_doc.get("ended", False):
            return False
        # Store the error response in the database
        bot_message = {
            "sender": bot_display_name,
            "message": f"DUPLICATE message detected - treated as a (pass) : {final_response}", 
            "timestamp": datetime.utcnow()
        }
        rooms_collection.update_one(
            {"_id": room_id},
            {"$push": {"messages": bot_message}}
        )
        return False


    print("\n")
    print("=================================response")
    print(corrupted_response)

    # Add latency/wait time for bot responses 
    delay = get_response_delay(final_response);
    print(delay)
    time.sleep(delay)

    # Do not store/send messages if the chat has ended
    room_doc = rooms_collection.find_one({"_id": room_id})
    if not room_doc or room_doc.get("ended", False):
        return False

    # Store the response in the database
    bot_message = {
        "sender": bot_display_name,
        "message": final_response, #save fruits in db so page reload shows proper names
        "timestamp": datetime.utcnow()
    }
    rooms_collection.update_one(
        {"_id": room_id},
        {"$push": {"messages": bot_message}}
    )
    
    # Send the bot's response to the client
    socketio.emit("message", {"sender": bot_display_name, "message": final_response}, to=room_id)
    return False

def ask_bot_round(room_id):
    while True:
        room_doc = rooms_collection.find_one({"_id": room_id})
        if not room_doc or room_doc.get("ended", False):
            return

        with concurrent.futures.ThreadPoolExecutor() as exec:
            futures = [
                exec.submit(ask_bot, room_id, frobot, room_doc["FroBot_name"], FROBOT_PROMPT, FROBOT_INSTRUCT),
                exec.submit(ask_bot, room_id, hotbot, room_doc["HotBot_name"], HOTBOT_PROMPT, HOTBOT_INSTRUCT),
                exec.submit(ask_bot, room_id, coolbot, room_doc["CoolBot_name"], COOLBOT_PROMPT, COOLBOT_INSTRUCT)
                ]
        results = [f.result() for f in futures]

        print("Raw pass check results: ", results)
        if not all(results):
            print("At least one bot responded. Not re-prompting.\n")
            return # at least one bot responded
        
        # All bots passed - reprompt
        print("All bots passed. Re-prompting for responses.\n")
        time.sleep(2)  # prevents CPU thrashing & spamming
 
#background task which executes every two minutes to backup database to dataset
def backup_mongo(time):
    while (True):
        token = os.getenv("HF_TOKEN")
        handler = datasetHandler(token , 'huggingFaceData')
        handler.dump_db()
        handler.clean()
        socketio.sleep(time)

# Build the routes
#disabled landing
#@app.route('/', methods=["GET"])
def landing():
    return render_template('landing.html')
#disabled waiting
#@app.route('/wait', methods=["GET"])
def waiting():
    return render_template('waiting.html')
#changed /chat -> /
@app.route('/', methods=["GET", "POST"])
def home():
    #session.clear()

    #get PROLIFIC_PID from qualtrics
    #test if user_id in session
    prolific_pid = request.args.get("PROLIFIC_PID") or session.get('user_id') or ''

    if request.method == "POST":
        user_id = request.form.get('name')
        if not user_id:
            return render_template('home.html', error="Prolific ID is required", prolific_pid=prolific_pid)
        session['user_id'] = user_id
        return redirect(url_for('topics'))
    else:
        link = f"https://umw.qualtrics.com/jfe/form/SV_08v26NssCOwZTP8?PROLIFIC_PID={prolific_pid}"
        return render_template('home.html',prolific_pid=prolific_pid, feedback_form_url=link)

@app.route('/topics', methods=["GET", "POST"])
def topics():
    user_id = session.get('user_id')
    if not user_id:
        return redirect(url_for('home'))

    exists = db.rooms.find_one({"user_id":user_id})
    if exists:
        #set session vars for room()
        session['room'] = exists['_id']
        session['display_name'] = exists['user_name']
        return redirect(url_for('room'))
    
    #don't let browser cache this page
    resp = make_response( render_template('topics.html', topics=TOPICS_LIST) )
    resp.headers['Cache-Control'] = 'no-store'
    return resp

@app.route('/choose', methods=["POST"])
def choose():
    user_id = session.get('user_id')
    if not user_id:
        return redirect(url_for('home'))
    topic = request.form.get('topic')
    if not topic:
        return redirect(url_for('topics'))
    topic_info = next((t for t in TOPICS_LIST if t["title"] == topic), None)
    if topic_info is None:
        return redirect(url_for('topics'))
    # Get next room id (and add one)
    counter = db.counters.find_one_and_update(
        {"_id": "room_id"},
        {"$inc": {"seq": 1}}, # increment seq by 1
        upsert=True, # create if missing
        return_document=True
    )
    room_id = counter["seq"]
    # Pick fruit display names
    fruit_names = choose_names(4)
    user_name = fruit_names[0]
    frobot_name = fruit_names[1]
    hotbot_name = fruit_names[2]
    coolbot_name = fruit_names[3]

    # Create the new room in the database
    rooms_collection.insert_one({
        "_id": room_id,
        "topic": topic_info['title'],
        # creation date/time
        "created_at": datetime.utcnow(),
        # user identity
        "user_id": user_id,
        "user_name": user_name,
        # bot names
        "FroBot_name": frobot_name,
        "HotBot_name": hotbot_name,
        "CoolBot_name": coolbot_name,
        # flags needed for handling refreshes
        "initialPostsSent": False,
        "inactivity_tracker_started": False,
        # empty message history
        "messages": [],
        # last time user sent a message
        "last_activity": datetime.utcnow(),
        # flag for if the user aborts
        "aborted": False,
        # flag for if the chat has ended
        "ended": False,
        "ended_at": None
    })
    # Create the new feedback in the database
    feedback_collection.insert_one({
        "_id": room_id,
        # creation date/time
        "created_at": datetime.utcnow(),
        # user identity
        "user_id": user_id,
    })

    session['room'] = room_id
    session['display_name'] = user_name
    return redirect(url_for('room'))

@app.route('/room')
def room():
    room_id = session.get('room')
    display_name = session.get('display_name')
    if not room_id or not display_name:
        return redirect(url_for('home'))
    room_doc = rooms_collection.find_one({"_id": room_id})
    if not room_doc:
        return redirect(url_for('home'))
    topic = room_doc["topic"]
    topic_info = next((t for t in TOPICS_LIST if t["title"] == topic), None)
    if topic_info is None:
        return redirect(url_for('topics'))
    nonpass_messages = [
        m for m in room_doc["messages"]
        if len(re.findall(r"pass",m.get("message", "").strip())) == 0
    ]
    if session.get('user_id'):
        link = f"https://umw.qualtrics.com/jfe/form/SV_08v26NssCOwZTP8?PROLIFIC_PID={session.get('user_id')}"
        return render_template("room.html", room=room_id, topic_info=topic_info, user=display_name,
                               messages=nonpass_messages, FroBot_name=room_doc["FroBot_name"],
                               HotBot_name=room_doc["HotBot_name"], CoolBot_name=room_doc["CoolBot_name"],
                               ended=room_doc["ended"], feedback_form_url=link)
    else:
        return render_template("room.html", room=room_id, topic_info=topic_info, user=display_name,
                               messages=nonpass_messages, FroBot_name=room_doc["FroBot_name"],
                               HotBot_name=room_doc["HotBot_name"], CoolBot_name=room_doc["CoolBot_name"],
                               ended=room_doc["ended"])
  
@app.route("/abort", methods=["POST"])
def abort_room():
    room_id = session.get("room")
    if not room_id:
        return ("Error: No room in session.", 400)
    rooms_collection.update_one(
        {"_id": room_id},
        {"$set": {"aborted": True}}
    )
    return ("OK", 200)

@app.route("/post_survey", methods=["POST", "GET"])
def post_survey():
    user_id = session.get('user_id')
    if not user_id:
        return render_template('home.html', error="Enter your Prolific ID.") 
    info = db.rooms.find_one({"user_id":user_id}, {'FroBot_name':1,
                                                   'HotBot_name':1,
                                                   'CoolBot_name':1} )
    if not info:
        return render_template('home.html', error="Enter your ID.") 

    # Store in the DB that this chat has been ended
    db.rooms.update_one(
        {"user_id":user_id},
        {"$set": {"ended": True, "ended_at": datetime.utcnow()}}
    )

    CName = info['CoolBot_name']
    FName = info['FroBot_name']
    HName = info['HotBot_name']

    SURVEY_2_LINK = f"https://umw.qualtrics.com/jfe/form/SV_eIIbPlJ2D9k4zKC?PROLIFIC_PID={user_id}&CName={CName}&FName={FName}&HName={HName}"

    return redirect(SURVEY_2_LINK)

# Build the SocketIO event handlers

@socketio.on('connect')
def handle_connect():
    name = session.get('display_name')
    room = session.get('room')
    if not name or not room:
        return
    room_doc = rooms_collection.find_one({"_id": room})
    if not room_doc:
        return
    join_room(room)
    if (room_doc.get("initialPostsSent", False)):
        return
    """ Removed and replaced with send_bot_joined() below
    # Send the message that "watermelon" has already joined the chat
    send({
        "sender": "",
        "message": "watermelon has entered the chat"
    }, to=room)
    # Send the message that this user has joined the chat
    send({
        "sender": "",
        "message": f"{name} has entered the chat"
    }, to=room)
    """
    # Start background tasks for the bots to join after a short delay
    socketio.start_background_task(send_bot_names_message, room,
                                   [room_doc['CoolBot_name'], room_doc['FroBot_name'],
                                    room_doc['HotBot_name']])
    """ Disabling "__Bot_Name_ has entered the chat" message for each bot.
    Instead, displays a single message showing all the participants' names ^^
    socketio.start_background_task(send_bot_joined, room, room_doc['CoolBot_name'], 3)
    socketio.start_background_task(send_bot_joined, room, room_doc['FroBot_name'], 7)
    socketio.start_background_task(send_bot_joined, room, room_doc['HotBot_name'], 13)
    """
    # Start background task to send the initial watermelon post after a short delay
    socketio.start_background_task(send_initial_post, room, 10)
    # Start the background backup dataset task
    socketio.start_background_task(backup_mongo , 120)
    rooms_collection.update_one(
        {"_id": room},
        {"$set": {"initialPostsSent": True}}
    )
    # Start user inactivity tracker
    if not room_doc.get("inactivity_tracker_started", False):
        rooms_collection.update_one(
            {"_id": room},
            {
                "$set": {
                    "inactivity_tracker_started": True,
                    "last_activity": datetime.utcnow()
                }
            }
        )
        socketio.start_background_task(user_inactivity_tracker, room)

@socketio.on('message')
def handle_message(payload):
    room = session.get('room')
    name = session.get('display_name')
    if not room or not name:
        return

    # Stop message processing if the chat has ended
    room_doc = rooms_collection.find_one({"_id": room})
    if not room_doc or room_doc.get("ended", False):
        return

    text = payload.get("message", "").strip()
    if not text:
        return  # ignore empty messages
    
    # Client-visible message (no datetime)
    client_message = {
        "sender": name,
        "message": text
    }
    # Database-only message (with datetime)
    db_message = {
        "sender": name,
        "message": text,
        "timestamp": datetime.utcnow()
    }
    # Store the full version in the database
    rooms_collection.update_one(
        {"_id": room},
        {
            "$push": {"messages": db_message},
            "$set": {"last_activity": datetime.utcnow()}
        }
    )
    # Send only the client version (no datetime)
    send(client_message, to=room)

    # Ask each bot for a response
    socketio.start_background_task(ask_bot_round, room)


@socketio.on('feedback_given')
def handle_message(payload):
    room = session.get('room')
    name = session.get('display_name')
    if not room or not name:
        return

    text = payload.get("feedback", "").strip()
    if not text:
        return  # ignore empty text

    # Database-only message (with datetime)
    db_feedback = {
        "message": text,
        "timestamp": datetime.utcnow()
    }

    print(db_feedback)
    # Store the full version in the database
    result = feedback_collection.update_one(
        {"_id": room},
        {"$push": {"feedback_responses": db_feedback}}
    )
    
    if result:
        print(result)
        if result.modified_count > 0:
            return {'status':'True'}
    return {'ststus':'False'}

@socketio.on('disconnect')
def handle_disconnect():
    room = session.get("room")
    name = session.get("display_name")
    
    if room:
        send({
            "sender": "",
            "message": f"{name} has left the chat"
        }, to=room)
        leave_room(room)


if __name__ == "__main__":
    print("Async mode:", socketio.async_mode)
    socketio.run(app, host='0.0.0.0', port=7860, debug=True)