from flask import Flask, request, render_template, redirect, url_for, session, make_response, render_template_string from flask_socketio import SocketIO, join_room, leave_room, send from pymongo import MongoClient from datetime import datetime, timedelta import random from pathlib import Path import time import math import os import json import google.auth from google.oauth2 import service_account from google.auth.transport.requests import AuthorizedSession from vertexai.tuning import sft from vertexai.generative_models import GenerativeModel import re import concurrent.futures from google import genai from google.genai.types import GenerateContentConfig, HttpOptions from text_corruption import corrupt from humanizing import humanize from quote_removal import remove_quotes from weird_char_removal import remove_weird_characters from duplicate_detection import duplicate_check #controls CHAT_CONTEXT = 20 #how many messages from chat history to append to inference prompt #minimum number of chars where we start checking for duplicate messages DUP_LEN = 25 #since short messages may reasonably be the same # Directory alignment BASE_DIR = Path(__file__).resolve().parent PROJECT_ROOT = BASE_DIR.parent app = Flask(__name__) app.config["SECRET_KEY"] = "supersecretkey" socketio = SocketIO(app) # Setup for Vertex API calls serviceAccount_json = os.environ["GOOGLE_SERVICE_ACCOUNT_JSON"] serviceAccount_info = json.loads(serviceAccount_json) credentials = service_account.Credentials.from_service_account_info( serviceAccount_info, scopes=["https://www.googleapis.com/auth/cloud-platform"], ) google_session = AuthorizedSession(credentials) vertex_client = genai.Client( vertexai=True, project=os.environ["GOOGLE_CLOUD_PROJECT"], location=os.environ.get("VERTEX_LOCATION", "us-central1"), credentials=credentials, ) """ #original lines before separating system instructions and prompts # Initialize the bots pirate_tuning_job_name = f"projects/frozone-475719/locations/us-central1/tuningJobs/3296615187565510656" tuning_job_frobot = f"projects/frozone-475719/locations/us-central1/tuningJobs/1280259296294076416" tuning_job_hotbot = f"projects/frozone-475719/locations/us-central1/tuningJobs/4961166390611410944" tuning_job_coolbot = f"projects/frozone-475719/locations/us-central1/tuningJobs/4112237860852072448" hottj = sft.SupervisedTuningJob(tuning_job_hotbot) cooltj = sft.SupervisedTuningJob(tuning_job_coolbot) frotj = sft.SupervisedTuningJob(tuning_job_frobot) # Create the bot models hotbot = GenerativeModel(hottj.tuned_model_endpoint_name) coolbot = GenerativeModel(cooltj.tuned_model_endpoint_name) frobot = GenerativeModel(frotj.tuned_model_endpoint_name) """ #change to endpoints hotbot = "projects/700531062565/locations/us-central1/endpoints/6225523347153747968" coolbot = "projects/700531062565/locations/us-central1/endpoints/1700531621553242112" frobot = "projects/700531062565/locations/us-central1/endpoints/2951406418055397376" # MongoDB setup client = MongoClient("mongodb://127.0.0.1:27017/") db = client["huggingFaceData"] rooms_collection = db.rooms # List of fruits to choose display names from FRUIT_NAMES = ["blueberry", "strawberry", "orange", "cherry"] aliases = {"watermelon":"W", "apple":"L", "banana":"B", "blueberry":"C", "strawberry":"D", "orange":"E", "grape":"G", "cherry":"H"} reverse_aliases = { value:key for key,value in aliases.items() } # List of discussion topics TOPICS_LIST = [ { "title": "Abortion", "text": "Since the Supreme Court overturned Roe vs. Wade in 2022, there has been an increase in patients crossing state lines to receive abortions in less restrictive states. Pro-choice advocates argue that these restrictions exacerbate unequal access to healthcare due to financial strain and other factors and believe that a patient should be able to make personal medical decisions about their own body and future. Pro-life advocates argue that abortion legislation should be left to the states and believe that abortion is amoral and tantamount to murder. Both sides disagree on how to handle cases of rape, incest, terminal medical conditions, and risks to the mother’s life and health. What stance do you take on abortion and why?", "post": "Idk its hard bc both sides have good points. People should be able to make their own decisions about their own body but theres also moral stuff to think about too you know" }, { "title": "Gun Rights/Control", "text": "Gun rights advocates argue that the right to bear arms is a protected second amendment right necessary for self-defense. Meanwhile, gun control advocates argue that stricter regulations are necessary to reduce gun violence. Potential reforms include stricter background checks, banning assault weapons, enacting red flag laws, and increasing the minimum age to purchase a gun. What stance do you take on gun rights vs. gun control and why?", "post": "i think people should be able to own guns but there has to be some check like background stuff so crazy people dont get them" }, { "title": "Education and Trans Students", "text": "Laws and policies affecting trans people are highly contested, especially those involving education. Several states have passed laws restricting the use of preferred pronouns and names in schools, limiting transgender athletes' ability to participate in sports, and banning books containing LGBTQ+ content from school libraries. How do you think decisions on school policies regarding trans students should be made and why?", "post": "I dont think its that big a deal to use different pronouns but also trans athletes should be playing with the gender they were born as. I know thats an unpopular opinion but its the only way its fair." }, { "title": "Immigration and ICE Activity", "text": "The current year has seen an increase in ICE (U.S. Immigration and Customs Enforcement) activity, including raids at workplaces, courthouses, schools, churches, and hospitals. Some argue that ICE is going too far and is violating the Constitutional due process rights of both immigrants and citizens. Others argue that these actions are necessary to maintain national security and enforce immigration law. What stance do you take on recent ICE activity and why?", "post": "I think ice is doing their job they're literally immigration enforcement. It sucks but if you come here illegally youre going to face the consequence." }, { "title": "Universal Healthcare", "text": "Some argue that universal healthcare is necessary to ensure everyone has access to lifesaving medical treatments and a minimum standard of living, regardless of income or employment. Others argue that the choice of how to access healthcare is a private responsibility and that it is more efficient for the government to limit intervention. What stance do you take on government involvement in providing healthcare and why?", "post": "I think people should handle their own healthcare. the government is slow plus competition means more innovation. i dont trust the idea of one size fits all" } ] # FroBot Main Prompt with open(PROJECT_ROOT / "data" / "prompts" / "frobot_prompt_main.txt") as f: FROBOT_PROMPT = f.read() # Instructions with open(PROJECT_ROOT / "data" / "inference_instructions" / "frobot_instructions_main.txt") as f: FROBOT_INSTRUCT = f.read() # HotBot Prompt with open(PROJECT_ROOT / "data" / "prompts" / "hotbot_prompt_main.txt") as h: HOTBOT_PROMPT = h.read() # Instructions with open(PROJECT_ROOT / "data" / "inference_instructions" / "hotbot_instructions_main.txt") as h: HOTBOT_INSTRUCT = h.read() # CoolBot Prompt with open(PROJECT_ROOT / "data" / "prompts" / "coolbot_prompt_main.txt") as c: COOLBOT_PROMPT = c.read() # Instructions with open(PROJECT_ROOT / "data" / "inference_instructions" / "coolbot_instructions_main.txt") as c: COOLBOT_INSTRUCT = c.read() # Randomly select fruits to use for display names def choose_names(n): # Return n unique random fruit names return random.sample(FRUIT_NAMES, n) # Send initial watermelon post def send_initial_post(room_id, delay): # Wait 1 second before sending time.sleep(delay) # Get the inital post for this topic room_doc = rooms_collection.find_one({"_id": room_id}) topic_title = room_doc["topic"] topic_info = next((t for t in TOPICS_LIST if t["title"] == topic_title), None) if not topic_info: return initialPost = topic_info["post"] # Store the initial post in the database db_msg = { "sender": "watermelon", "message": initialPost, "timestamp": datetime.utcnow() } rooms_collection.update_one( {"_id": room_id}, {"$push": {"messages": db_msg}} ) # Send to the client (must use emit when in background thread) socketio.emit("message", {"sender": "watermelon", "message": initialPost}, to=room_id) #send to the bots socketio.start_background_task(ask_bot_round, room_id) # Send message that a bot joined the room def send_bot_joined(room_id, bot_name, delay): # Wait 1 second before sending time.sleep(delay) socketio.emit("message", {"sender": "", "message": f"{bot_name} has entered the chat"}, to=room_id) # Trigger a round of bot calls if user has been inactive for a while def user_inactivity_tracker(room_id, timeout_seconds=180): print(f"Started user inactivity tracker for Room ID#{room_id}") while True: room_doc = rooms_collection.find_one({"_id": room_id}) # Stop if this room's chat has ended if not room_doc or room_doc.get("ended", False): print(f"User inactivity tracker stopping for Room ID#{room_id}") return lastTime = room_doc.get("last_activity") if lastTime: if datetime.utcnow() - lastTime > timedelta(seconds=timeout_seconds): print(f"User has been inactive in Room ID#{room_id} - triggering new round of bot calls.") socketio.start_background_task(ask_bot_round, room_id) # Prevent multiple bot call triggers due to inactivity rooms_collection.update_one( {"_id": room_id}, {"$set": {"last_activity": datetime.utcnow()}} ) time.sleep(5) # re-check inactivity every 5s def let_to_name(room_id, text): named_response = str(text) letters = [aliases[name] for name in (FRUIT_NAMES + ["watermelon"])] # makes a copy, rather than directly modifying for letter in set(re.findall(r"\b[A-Z]\b", named_response)): if letter in letters: named_response = re.sub(r"\b" + letter + r"\b", reverse_aliases[letter], named_response) return named_response def name_to_let(room_id, text): named_response = str(text) names = FRUIT_NAMES + ["watermelon"] # makes a copy, rather than directly modifying for name in names: if name in text: text = re.sub(r"\b" + name + r"\b", aliases[name], text, flags=re.I) return text def replace_semicolons(text, probability=0.80): modified_text = [] for char in text: if char == ';' and random.random() <= probability: modified_text.append(',') else: modified_text.append(char) return ''.join(modified_text) def get_response_delay(response): baseDelay = 1 # standard delay for thinking randFactor = random.uniform(2, 12.) perCharacterDelay = 0.12 # was .25 -> average speed: 3.33 characters/second = 0.3 maxDelay = 150 # maximum cap of 2.5 minutes (so the bots don't take too long) # Add total delay totalDelay = baseDelay + perCharacterDelay * len(response) + randFactor return min(totalDelay, maxDelay) # Ask a bot for its response, store in DB, and send to client # Returns true if the bot passed def ask_bot(room_id, bot, bot_display_name, initial_prompt, instruct_prompt): # Prevents crashing if bot model did not load if bot is None: return False # Get the full chat room history room_doc = rooms_collection.find_one({"_id": room_id}) # Do not proceed if the chat has ended if not room_doc or room_doc.get("ended", False): return False history = room_doc["messages"] # Build the LLM prompt prompt = re.sub(r"", aliases[bot_display_name], initial_prompt) context = list() #get the context sent to bot for duplicate_check for message in history[-CHAT_CONTEXT:]: prompt += f"{aliases[message['sender']]}: {message['message']}\n" context.append(message['message']) prompt = name_to_let(room_id, prompt) #sub fruit names to letters to give to bots print("\n") print("=================================prompt") print(prompt) # Get the bot's response try: response = vertex_client.models.generate_content( model = bot, contents = prompt, config=GenerateContentConfig( system_instruction = [instruct_prompt] ), ) parsed_response = response.candidates[0].content.parts[0].text.strip() except Exception as e: print("Error in bot response: ", e) print("Treating this bot's response as a pass.") # Do not store/send messages if the chat has ended room_doc = rooms_collection.find_one({"_id": room_id}) if not room_doc or room_doc.get("ended", False): return False # Store the error response in the database bot_message = { "sender": bot_display_name, "message": "ERROR in bot response - treated as a (pass)", "timestamp": datetime.utcnow() } rooms_collection.update_one( {"_id": room_id}, {"$push": {"messages": bot_message}} ) return True #remove bot formatting like that will render on the page parsed_response = re.sub(r"<([a-zA-Z]+)>(?=.*)", "", parsed_response) parsed_response = re.sub(r"", "", parsed_response) #fix any escaped \\n --> \n so they are actual newlines parsed_response = re.sub(r"\\n", "\n", parsed_response).strip() #remove bot heading ("C: ...") if re.search(r"\b" + aliases[bot_display_name] + r"\b:", parsed_response): parsed_response = re.sub(r"\b" + aliases[bot_display_name] + r"\b:\s?", '', parsed_response) # Check for if the bot passed (i.e. response = "(pass)") if ("(pass)" in parsed_response) or (parsed_response == ""): # Do not store/send messages if the chat has ended room_doc = rooms_collection.find_one({"_id": room_id}) if not room_doc or room_doc.get("ended", False): return False # Store the pass in the database bot_message = { "sender": bot_display_name, "message": parsed_response, "timestamp": datetime.utcnow() } rooms_collection.update_one( {"_id": room_id}, {"$push": {"messages": bot_message}} ) print("PASSED") return True # a pass is still recorded in the database, but not sent to the client #remove encapsulating quotes no_quotes = remove_quotes(parsed_response) #humanize the response (remove obvious AI formatting styles) humanized_response = humanize(no_quotes) #replace most semicolons less_semicolons_response = replace_semicolons(humanized_response) #corrupt the response (add some typos and misspellings) corrupted_response = corrupt(less_semicolons_response) #remove weird chars no_weird_chars = remove_weird_characters(corrupted_response) #sub letters for names, so if the bot addressed A -> Apple named_response = let_to_name(room_id, no_weird_chars) #check that there are no reccent duplicate messages if len(named_response) > DUP_LEN and duplicate_check(named_response, context): print("****DUPLICATE MESSAGE DETECTED") print("Treating this bot's response as a pass.") # Do not store/send messages if the chat has ended room_doc = rooms_collection.find_one({"_id": room_id}) if not room_doc or room_doc.get("ended", False): return False # Store the error response in the database bot_message = { "sender": bot_display_name, "message": f"DUPLICATE message detected - treated as a (pass) : {named_response}", "timestamp": datetime.utcnow() } rooms_collection.update_one( {"_id": room_id}, {"$push": {"messages": bot_message}} ) return False print("\n") print("=================================response") print(corrupted_response) # Add latency/wait time for bot responses delay = get_response_delay(named_response); print(delay) time.sleep(delay) # Do not store/send messages if the chat has ended room_doc = rooms_collection.find_one({"_id": room_id}) if not room_doc or room_doc.get("ended", False): return False # Store the response in the database bot_message = { "sender": bot_display_name, "message": named_response, #save fruits in db so page reload shows proper names "timestamp": datetime.utcnow() } rooms_collection.update_one( {"_id": room_id}, {"$push": {"messages": bot_message}} ) # Send the bot's response to the client socketio.emit("message", {"sender": bot_display_name, "message": named_response}, to=room_id) return False def ask_bot_round(room_id): while True: room_doc = rooms_collection.find_one({"_id": room_id}) if not room_doc or room_doc.get("ended", False): return with concurrent.futures.ThreadPoolExecutor() as exec: futures = [ exec.submit(ask_bot, room_id, frobot, room_doc["FroBot_name"], FROBOT_PROMPT, FROBOT_INSTRUCT), exec.submit(ask_bot, room_id, hotbot, room_doc["HotBot_name"], HOTBOT_PROMPT, HOTBOT_INSTRUCT), exec.submit(ask_bot, room_id, coolbot, room_doc["CoolBot_name"], COOLBOT_PROMPT, COOLBOT_INSTRUCT) ] results = [f.result() for f in futures] print("Raw pass check results: ", results) if not all(results): print("At least one bot responded. Not re-prompting.\n") return # at least one bot responded # All bots passed - reprompt print("All bots passed. Re-prompting for responses.\n") time.sleep(2) # prevents CPU thrashing & spamming # Build the routes #disabled landing #@app.route('/', methods=["GET"]) def landing(): return render_template('landing.html') #disabled waiting #@app.route('/wait', methods=["GET"]) def waiting(): return render_template('waiting.html') #changed /chat -> / @app.route('/', methods=["GET", "POST"]) def home(): #session.clear() #get PROLIFIC_PID from qualtrics #test if user_id in session prolific_pid = request.args.get("PROLIFIC_PID") or session.get('user_id') or '' if request.method == "POST": user_id = request.form.get('name') if not user_id: return render_template('home.html', error="Prolific ID is required", prolific_pid=prolific_pid) session['user_id'] = user_id return redirect(url_for('topics')) else: return render_template('home.html',prolific_pid=prolific_pid) @app.route('/topics', methods=["GET", "POST"]) def topics(): user_id = session.get('user_id') if not user_id: return redirect(url_for('home')) exists = db.rooms.find_one({"user_id":user_id}) if exists: #set session vars for room() session['room'] = exists['_id'] session['display_name'] = exists['user_name'] return redirect(url_for('room')) #don't let browser cache this page resp = make_response( render_template('topics.html', topics=TOPICS_LIST) ) resp.headers['Cache-Control'] = 'no-store' return resp @app.route('/choose', methods=["POST"]) def choose(): user_id = session.get('user_id') if not user_id: return redirect(url_for('home')) topic = request.form.get('topic') if not topic: return redirect(url_for('topics')) topic_info = next((t for t in TOPICS_LIST if t["title"] == topic), None) if topic_info is None: return redirect(url_for('topics')) # Get next room id (and add one) counter = db.counters.find_one_and_update( {"_id": "room_id"}, {"$inc": {"seq": 1}}, # increment seq by 1 upsert=True, # create if missing return_document=True ) room_id = counter["seq"] # Pick fruit display names fruit_names = choose_names(4) user_name = fruit_names[0] frobot_name = fruit_names[1] hotbot_name = fruit_names[2] coolbot_name = fruit_names[3] # Create the new room in the database rooms_collection.insert_one({ "_id": room_id, "topic": topic_info['title'], # creation date/time "created_at": datetime.utcnow(), # user identity "user_id": user_id, "user_name": user_name, # bot names "FroBot_name": frobot_name, "HotBot_name": hotbot_name, "CoolBot_name": coolbot_name, # flags needed for handling refreshes "initialPostsSent": False, "inactivity_tracker_started": False, # empty message history "messages": [], # last time user sent a message "last_activity": datetime.utcnow(), # flag for if the user aborts "aborted": False, # flag for if the chat has ended "ended": False, "ended_at": None }) session['room'] = room_id session['display_name'] = user_name return redirect(url_for('room')) @app.route('/room') def room(): room_id = session.get('room') display_name = session.get('display_name') if not room_id or not display_name: return redirect(url_for('home')) room_doc = rooms_collection.find_one({"_id": room_id}) if not room_doc: return redirect(url_for('home')) topic = room_doc["topic"] topic_info = next((t for t in TOPICS_LIST if t["title"] == topic), None) if topic_info is None: return redirect(url_for('topics')) nonpass_messages = [ m for m in room_doc["messages"] if m.get("message", "").strip() != "(pass)" ] return render_template("room.html", room=room_id, topic_info=topic_info, user=display_name, messages=nonpass_messages, FroBot_name=room_doc["FroBot_name"], HotBot_name=room_doc["HotBot_name"], CoolBot_name=room_doc["CoolBot_name"], ended=room_doc["ended"]) @app.route("/abort", methods=["POST"]) def abort_room(): room_id = session.get("room") if not room_id: return ("Error: No room in session.", 400) rooms_collection.update_one( {"_id": room_id}, {"$set": {"aborted": True}} ) return ("OK", 200) @app.route("/post_survey", methods=["POST", "GET"]) def post_survey(): user_id = session.get('user_id') if not user_id: return render_template('home.html', error="Enter your Prolific ID.") info = db.rooms.find_one({"user_id":user_id}, {'FroBot_name':1, 'HotBot_name':1, 'CoolBot_name':1} ) if not info: return render_template('home.html', error="Enter your ID.") # Store in the DB that this chat has been ended db.rooms.update_one( {"user_id":user_id}, {"$set": {"ended": True, "ended_at": datetime.utcnow()}} ) CName = info['CoolBot_name'] FName = info['FroBot_name'] HName = info['HotBot_name'] SURVEY_2_LINK = f"https://umw.qualtrics.com/jfe/form/SV_eIIbPlJ2D9k4zKC?PROLIFIC_PID={user_id}&CName={CName}&FName={FName}&HName={HName}" return redirect(SURVEY_2_LINK) # Build the SocketIO event handlers @socketio.on('connect') def handle_connect(): name = session.get('display_name') room = session.get('room') if not name or not room: return room_doc = rooms_collection.find_one({"_id": room}) if not room_doc: return join_room(room) if (room_doc.get("initialPostsSent", False)): return # Send the message that "watermelon" has already joined the chat send({ "sender": "", "message": "watermelon has entered the chat" }, to=room) # Send the message that this user has joined the chat send({ "sender": "", "message": f"{name} has entered the chat" }, to=room) # Start background tasks for the bots to join after a short delay socketio.start_background_task(send_bot_joined, room, room_doc['CoolBot_name'], 3) socketio.start_background_task(send_bot_joined, room, room_doc['FroBot_name'], 7) socketio.start_background_task(send_bot_joined, room, room_doc['HotBot_name'], 13) # Start background task to send the initial watermelon post after a short delay socketio.start_background_task(send_initial_post, room, 10) rooms_collection.update_one( {"_id": room}, {"$set": {"initialPostsSent": True}} ) # Start user inactivity tracker if not room_doc.get("inactivity_tracker_started", False): rooms_collection.update_one( {"_id": room}, { "$set": { "inactivity_tracker_started": True, "last_activity": datetime.utcnow() } } ) socketio.start_background_task(user_inactivity_tracker, room) @socketio.on('message') def handle_message(payload): room = session.get('room') name = session.get('display_name') if not room or not name: return # Stop message processing if the chat has ended room_doc = rooms_collection.find_one({"_id": room}) if not room_doc or room_doc.get("ended", False): return text = payload.get("message", "").strip() if not text: return # ignore empty messages # Client-visible message (no datetime) client_message = { "sender": name, "message": text } # Database-only message (with datetime) db_message = { "sender": name, "message": text, "timestamp": datetime.utcnow() } # Store the full version in the database rooms_collection.update_one( {"_id": room}, { "$push": {"messages": db_message}, "$set": {"last_activity": datetime.utcnow()} } ) # Send only the client version (no datetime) send(client_message, to=room) # Ask each bot for a response socketio.start_background_task(ask_bot_round, room) @socketio.on('disconnect') def handle_disconnect(): room = session.get("room") name = session.get("display_name") if room: send({ "sender": "", "message": f"{name} has left the chat" }, to=room) leave_room(room) if __name__ == "__main__": print("Async mode:", socketio.async_mode) socketio.run(app, host='0.0.0.0', port=7860, debug=True)