bethanie05's picture
Change database name and give explicit MongoDB connection address
9c0eb85 verified
from flask import Flask, request, render_template, redirect, url_for, session, make_response, render_template_string
from flask_socketio import SocketIO, join_room, leave_room, send
from pymongo import MongoClient
from datetime import datetime, timedelta
import random
from pathlib import Path
import time
import math
import os
import json
import google.auth
from google.oauth2 import service_account
from google.auth.transport.requests import AuthorizedSession
from vertexai.tuning import sft
from vertexai.generative_models import GenerativeModel
import re
import concurrent.futures
from google import genai
from google.genai.types import GenerateContentConfig, HttpOptions
from text_corruption import corrupt
from humanizing import humanize
from quote_removal import remove_quotes
from weird_char_removal import remove_weird_characters
from duplicate_detection import duplicate_check
#controls
CHAT_CONTEXT = 20 #how many messages from chat history to append to inference prompt
#minimum number of chars where we start checking for duplicate messages
DUP_LEN = 25 #since short messages may reasonably be the same
# Directory alignment
BASE_DIR = Path(__file__).resolve().parent
PROJECT_ROOT = BASE_DIR.parent
app = Flask(__name__)
app.config["SECRET_KEY"] = "supersecretkey"
socketio = SocketIO(app)
# Setup for Vertex API calls
serviceAccount_json = os.environ["GOOGLE_SERVICE_ACCOUNT_JSON"]
serviceAccount_info = json.loads(serviceAccount_json)
credentials = service_account.Credentials.from_service_account_info(
serviceAccount_info,
scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
google_session = AuthorizedSession(credentials)
vertex_client = genai.Client(
vertexai=True,
project=os.environ["GOOGLE_CLOUD_PROJECT"],
location=os.environ.get("VERTEX_LOCATION", "us-central1"),
credentials=credentials,
)
"""
#original lines before separating system instructions and prompts
# Initialize the bots
pirate_tuning_job_name = f"projects/frozone-475719/locations/us-central1/tuningJobs/3296615187565510656"
tuning_job_frobot = f"projects/frozone-475719/locations/us-central1/tuningJobs/1280259296294076416"
tuning_job_hotbot = f"projects/frozone-475719/locations/us-central1/tuningJobs/4961166390611410944"
tuning_job_coolbot = f"projects/frozone-475719/locations/us-central1/tuningJobs/4112237860852072448"
hottj = sft.SupervisedTuningJob(tuning_job_hotbot)
cooltj = sft.SupervisedTuningJob(tuning_job_coolbot)
frotj = sft.SupervisedTuningJob(tuning_job_frobot)
# Create the bot models
hotbot = GenerativeModel(hottj.tuned_model_endpoint_name)
coolbot = GenerativeModel(cooltj.tuned_model_endpoint_name)
frobot = GenerativeModel(frotj.tuned_model_endpoint_name)
"""
#change to endpoints
hotbot = "projects/700531062565/locations/us-central1/endpoints/6225523347153747968"
coolbot = "projects/700531062565/locations/us-central1/endpoints/1700531621553242112"
frobot = "projects/700531062565/locations/us-central1/endpoints/2951406418055397376"
# MongoDB setup
client = MongoClient("mongodb://127.0.0.1:27017/")
db = client["huggingFaceData"]
rooms_collection = db.rooms
# List of fruits to choose display names from
FRUIT_NAMES = ["blueberry", "strawberry", "orange", "cherry"]
aliases = {"watermelon":"W", "apple":"L", "banana":"B", "blueberry":"C", "strawberry":"D", "orange":"E", "grape":"G", "cherry":"H"}
reverse_aliases = { value:key for key,value in aliases.items() }
# List of discussion topics
TOPICS_LIST = [
{
"title": "Abortion",
"text": "Since the Supreme Court overturned Roe vs. Wade in 2022, there has been an increase in patients crossing state lines to receive abortions in less restrictive states. Pro-choice advocates argue that these restrictions exacerbate unequal access to healthcare due to financial strain and other factors and believe that a patient should be able to make personal medical decisions about their own body and future. Pro-life advocates argue that abortion legislation should be left to the states and believe that abortion is amoral and tantamount to murder. Both sides disagree on how to handle cases of rape, incest, terminal medical conditions, and risks to the mother’s life and health. What stance do you take on abortion and why?",
"post": "Idk its hard bc both sides have good points. People should be able to make their own decisions about their own body but theres also moral stuff to think about too you know"
},
{
"title": "Gun Rights/Control",
"text": "Gun rights advocates argue that the right to bear arms is a protected second amendment right necessary for self-defense. Meanwhile, gun control advocates argue that stricter regulations are necessary to reduce gun violence. Potential reforms include stricter background checks, banning assault weapons, enacting red flag laws, and increasing the minimum age to purchase a gun. What stance do you take on gun rights vs. gun control and why?",
"post": "i think people should be able to own guns but there has to be some check like background stuff so crazy people dont get them"
},
{
"title": "Education and Trans Students",
"text": "Laws and policies affecting trans people are highly contested, especially those involving education. Several states have passed laws restricting the use of preferred pronouns and names in schools, limiting transgender athletes' ability to participate in sports, and banning books containing LGBTQ+ content from school libraries. How do you think decisions on school policies regarding trans students should be made and why?",
"post": "I dont think its that big a deal to use different pronouns but also trans athletes should be playing with the gender they were born as. I know thats an unpopular opinion but its the only way its fair."
},
{
"title": "Immigration and ICE Activity",
"text": "The current year has seen an increase in ICE (U.S. Immigration and Customs Enforcement) activity, including raids at workplaces, courthouses, schools, churches, and hospitals. Some argue that ICE is going too far and is violating the Constitutional due process rights of both immigrants and citizens. Others argue that these actions are necessary to maintain national security and enforce immigration law. What stance do you take on recent ICE activity and why?",
"post": "I think ice is doing their job they're literally immigration enforcement. It sucks but if you come here illegally youre going to face the consequence."
},
{
"title": "Universal Healthcare",
"text": "Some argue that universal healthcare is necessary to ensure everyone has access to lifesaving medical treatments and a minimum standard of living, regardless of income or employment. Others argue that the choice of how to access healthcare is a private responsibility and that it is more efficient for the government to limit intervention. What stance do you take on government involvement in providing healthcare and why?",
"post": "I think people should handle their own healthcare. the government is slow plus competition means more innovation. i dont trust the idea of one size fits all"
}
]
# FroBot Main Prompt
with open(PROJECT_ROOT / "data" / "prompts" / "frobot_prompt_main.txt") as f:
FROBOT_PROMPT = f.read()
# Instructions
with open(PROJECT_ROOT / "data" / "inference_instructions" / "frobot_instructions_main.txt") as f:
FROBOT_INSTRUCT = f.read()
# HotBot Prompt
with open(PROJECT_ROOT / "data" / "prompts" / "hotbot_prompt_main.txt") as h:
HOTBOT_PROMPT = h.read()
# Instructions
with open(PROJECT_ROOT / "data" / "inference_instructions" / "hotbot_instructions_main.txt") as h:
HOTBOT_INSTRUCT = h.read()
# CoolBot Prompt
with open(PROJECT_ROOT / "data" / "prompts" / "coolbot_prompt_main.txt") as c:
COOLBOT_PROMPT = c.read()
# Instructions
with open(PROJECT_ROOT / "data" / "inference_instructions" / "coolbot_instructions_main.txt") as c:
COOLBOT_INSTRUCT = c.read()
# Randomly select fruits to use for display names
def choose_names(n):
# Return n unique random fruit names
return random.sample(FRUIT_NAMES, n)
# Send initial watermelon post
def send_initial_post(room_id, delay):
# Wait 1 second before sending
time.sleep(delay)
# Get the inital post for this topic
room_doc = rooms_collection.find_one({"_id": room_id})
topic_title = room_doc["topic"]
topic_info = next((t for t in TOPICS_LIST if t["title"] == topic_title), None)
if not topic_info:
return
initialPost = topic_info["post"]
# Store the initial post in the database
db_msg = {
"sender": "watermelon",
"message": initialPost,
"timestamp": datetime.utcnow()
}
rooms_collection.update_one(
{"_id": room_id},
{"$push": {"messages": db_msg}}
)
# Send to the client (must use emit when in background thread)
socketio.emit("message", {"sender": "watermelon", "message": initialPost}, to=room_id)
#send to the bots
socketio.start_background_task(ask_bot_round, room_id)
# Send message that a bot joined the room
def send_bot_joined(room_id, bot_name, delay):
# Wait 1 second before sending
time.sleep(delay)
socketio.emit("message", {"sender": "", "message": f"{bot_name} has entered the chat"}, to=room_id)
# Trigger a round of bot calls if user has been inactive for a while
def user_inactivity_tracker(room_id, timeout_seconds=180):
print(f"Started user inactivity tracker for Room ID#{room_id}")
while True:
room_doc = rooms_collection.find_one({"_id": room_id})
# Stop if this room's chat has ended
if not room_doc or room_doc.get("ended", False):
print(f"User inactivity tracker stopping for Room ID#{room_id}")
return
lastTime = room_doc.get("last_activity")
if lastTime:
if datetime.utcnow() - lastTime > timedelta(seconds=timeout_seconds):
print(f"User has been inactive in Room ID#{room_id} - triggering new round of bot calls.")
socketio.start_background_task(ask_bot_round, room_id)
# Prevent multiple bot call triggers due to inactivity
rooms_collection.update_one(
{"_id": room_id},
{"$set": {"last_activity": datetime.utcnow()}}
)
time.sleep(5) # re-check inactivity every 5s
def let_to_name(room_id, text):
named_response = str(text)
letters = [aliases[name] for name in (FRUIT_NAMES + ["watermelon"])] # makes a copy, rather than directly modifying
for letter in set(re.findall(r"\b[A-Z]\b", named_response)):
if letter in letters:
named_response = re.sub(r"\b" + letter + r"\b", reverse_aliases[letter], named_response)
return named_response
def name_to_let(room_id, text):
named_response = str(text)
names = FRUIT_NAMES + ["watermelon"] # makes a copy, rather than directly modifying
for name in names:
if name in text:
text = re.sub(r"\b" + name + r"\b", aliases[name], text, flags=re.I)
return text
def replace_semicolons(text, probability=0.80):
modified_text = []
for char in text:
if char == ';' and random.random() <= probability:
modified_text.append(',')
else:
modified_text.append(char)
return ''.join(modified_text)
def get_response_delay(response):
baseDelay = 1 # standard delay for thinking
randFactor = random.uniform(2, 12.)
perCharacterDelay = 0.12
# was .25 -> average speed: 3.33 characters/second = 0.3
maxDelay = 150 # maximum cap of 2.5 minutes (so the bots don't take too long)
# Add total delay
totalDelay = baseDelay + perCharacterDelay * len(response) + randFactor
return min(totalDelay, maxDelay)
# Ask a bot for its response, store in DB, and send to client
# Returns true if the bot passed
def ask_bot(room_id, bot, bot_display_name, initial_prompt, instruct_prompt):
# Prevents crashing if bot model did not load
if bot is None:
return False
# Get the full chat room history
room_doc = rooms_collection.find_one({"_id": room_id})
# Do not proceed if the chat has ended
if not room_doc or room_doc.get("ended", False):
return False
history = room_doc["messages"]
# Build the LLM prompt
prompt = re.sub(r"<RE>", aliases[bot_display_name], initial_prompt)
context = list() #get the context sent to bot for duplicate_check
for message in history[-CHAT_CONTEXT:]:
prompt += f"{aliases[message['sender']]}: {message['message']}\n"
context.append(message['message'])
prompt = name_to_let(room_id, prompt) #sub fruit names to letters to give to bots
print("\n")
print("=================================prompt")
print(prompt)
# Get the bot's response
try:
response = vertex_client.models.generate_content(
model = bot,
contents = prompt,
config=GenerateContentConfig(
system_instruction = [instruct_prompt]
),
)
parsed_response = response.candidates[0].content.parts[0].text.strip()
except Exception as e:
print("Error in bot response: ", e)
print("Treating this bot's response as a pass.")
# Do not store/send messages if the chat has ended
room_doc = rooms_collection.find_one({"_id": room_id})
if not room_doc or room_doc.get("ended", False):
return False
# Store the error response in the database
bot_message = {
"sender": bot_display_name,
"message": "ERROR in bot response - treated as a (pass)",
"timestamp": datetime.utcnow()
}
rooms_collection.update_one(
{"_id": room_id},
{"$push": {"messages": bot_message}}
)
return True
#remove bot formatting like <i></i> <b></b> that will render on the page
parsed_response = re.sub(r"<([a-zA-Z]+)>(?=.*</\1>)", "", parsed_response)
parsed_response = re.sub(r"</([a-zA-Z]+)>", "", parsed_response)
#fix any escaped \\n --> \n so they are actual newlines
parsed_response = re.sub(r"\\n", "\n", parsed_response).strip()
#remove bot heading ("C: ...")
if re.search(r"\b" + aliases[bot_display_name] + r"\b:",
parsed_response):
parsed_response = re.sub(r"\b"
+ aliases[bot_display_name]
+ r"\b:\s?", '', parsed_response)
# Check for if the bot passed (i.e. response = "(pass)")
if ("(pass)" in parsed_response) or (parsed_response == ""):
# Do not store/send messages if the chat has ended
room_doc = rooms_collection.find_one({"_id": room_id})
if not room_doc or room_doc.get("ended", False):
return False
# Store the pass in the database
bot_message = {
"sender": bot_display_name,
"message": parsed_response,
"timestamp": datetime.utcnow()
}
rooms_collection.update_one(
{"_id": room_id},
{"$push": {"messages": bot_message}}
)
print("PASSED")
return True # a pass is still recorded in the database, but not sent to the client
#remove encapsulating quotes
no_quotes = remove_quotes(parsed_response)
#humanize the response (remove obvious AI formatting styles)
humanized_response = humanize(no_quotes)
#replace most semicolons
less_semicolons_response = replace_semicolons(humanized_response)
#corrupt the response (add some typos and misspellings)
corrupted_response = corrupt(less_semicolons_response)
#remove weird chars
no_weird_chars = remove_weird_characters(corrupted_response)
#sub letters for names, so if the bot addressed A -> Apple
named_response = let_to_name(room_id, no_weird_chars)
#check that there are no reccent duplicate messages
if len(named_response) > DUP_LEN and duplicate_check(named_response, context):
print("****DUPLICATE MESSAGE DETECTED")
print("Treating this bot's response as a pass.")
# Do not store/send messages if the chat has ended
room_doc = rooms_collection.find_one({"_id": room_id})
if not room_doc or room_doc.get("ended", False):
return False
# Store the error response in the database
bot_message = {
"sender": bot_display_name,
"message": f"DUPLICATE message detected - treated as a (pass) : {named_response}",
"timestamp": datetime.utcnow()
}
rooms_collection.update_one(
{"_id": room_id},
{"$push": {"messages": bot_message}}
)
return False
print("\n")
print("=================================response")
print(corrupted_response)
# Add latency/wait time for bot responses
delay = get_response_delay(named_response);
print(delay)
time.sleep(delay)
# Do not store/send messages if the chat has ended
room_doc = rooms_collection.find_one({"_id": room_id})
if not room_doc or room_doc.get("ended", False):
return False
# Store the response in the database
bot_message = {
"sender": bot_display_name,
"message": named_response, #save fruits in db so page reload shows proper names
"timestamp": datetime.utcnow()
}
rooms_collection.update_one(
{"_id": room_id},
{"$push": {"messages": bot_message}}
)
# Send the bot's response to the client
socketio.emit("message", {"sender": bot_display_name, "message": named_response}, to=room_id)
return False
def ask_bot_round(room_id):
while True:
room_doc = rooms_collection.find_one({"_id": room_id})
if not room_doc or room_doc.get("ended", False):
return
with concurrent.futures.ThreadPoolExecutor() as exec:
futures = [
exec.submit(ask_bot, room_id, frobot, room_doc["FroBot_name"], FROBOT_PROMPT, FROBOT_INSTRUCT),
exec.submit(ask_bot, room_id, hotbot, room_doc["HotBot_name"], HOTBOT_PROMPT, HOTBOT_INSTRUCT),
exec.submit(ask_bot, room_id, coolbot, room_doc["CoolBot_name"], COOLBOT_PROMPT, COOLBOT_INSTRUCT)
]
results = [f.result() for f in futures]
print("Raw pass check results: ", results)
if not all(results):
print("At least one bot responded. Not re-prompting.\n")
return # at least one bot responded
# All bots passed - reprompt
print("All bots passed. Re-prompting for responses.\n")
time.sleep(2) # prevents CPU thrashing & spamming
# Build the routes
#disabled landing
#@app.route('/', methods=["GET"])
def landing():
return render_template('landing.html')
#disabled waiting
#@app.route('/wait', methods=["GET"])
def waiting():
return render_template('waiting.html')
#changed /chat -> /
@app.route('/', methods=["GET", "POST"])
def home():
#session.clear()
#get PROLIFIC_PID from qualtrics
#test if user_id in session
prolific_pid = request.args.get("PROLIFIC_PID") or session.get('user_id') or ''
if request.method == "POST":
user_id = request.form.get('name')
if not user_id:
return render_template('home.html', error="Prolific ID is required", prolific_pid=prolific_pid)
session['user_id'] = user_id
return redirect(url_for('topics'))
else:
return render_template('home.html',prolific_pid=prolific_pid)
@app.route('/topics', methods=["GET", "POST"])
def topics():
user_id = session.get('user_id')
if not user_id:
return redirect(url_for('home'))
exists = db.rooms.find_one({"user_id":user_id})
if exists:
#set session vars for room()
session['room'] = exists['_id']
session['display_name'] = exists['user_name']
return redirect(url_for('room'))
#don't let browser cache this page
resp = make_response( render_template('topics.html', topics=TOPICS_LIST) )
resp.headers['Cache-Control'] = 'no-store'
return resp
@app.route('/choose', methods=["POST"])
def choose():
user_id = session.get('user_id')
if not user_id:
return redirect(url_for('home'))
topic = request.form.get('topic')
if not topic:
return redirect(url_for('topics'))
topic_info = next((t for t in TOPICS_LIST if t["title"] == topic), None)
if topic_info is None:
return redirect(url_for('topics'))
# Get next room id (and add one)
counter = db.counters.find_one_and_update(
{"_id": "room_id"},
{"$inc": {"seq": 1}}, # increment seq by 1
upsert=True, # create if missing
return_document=True
)
room_id = counter["seq"]
# Pick fruit display names
fruit_names = choose_names(4)
user_name = fruit_names[0]
frobot_name = fruit_names[1]
hotbot_name = fruit_names[2]
coolbot_name = fruit_names[3]
# Create the new room in the database
rooms_collection.insert_one({
"_id": room_id,
"topic": topic_info['title'],
# creation date/time
"created_at": datetime.utcnow(),
# user identity
"user_id": user_id,
"user_name": user_name,
# bot names
"FroBot_name": frobot_name,
"HotBot_name": hotbot_name,
"CoolBot_name": coolbot_name,
# flags needed for handling refreshes
"initialPostsSent": False,
"inactivity_tracker_started": False,
# empty message history
"messages": [],
# last time user sent a message
"last_activity": datetime.utcnow(),
# flag for if the user aborts
"aborted": False,
# flag for if the chat has ended
"ended": False,
"ended_at": None
})
session['room'] = room_id
session['display_name'] = user_name
return redirect(url_for('room'))
@app.route('/room')
def room():
room_id = session.get('room')
display_name = session.get('display_name')
if not room_id or not display_name:
return redirect(url_for('home'))
room_doc = rooms_collection.find_one({"_id": room_id})
if not room_doc:
return redirect(url_for('home'))
topic = room_doc["topic"]
topic_info = next((t for t in TOPICS_LIST if t["title"] == topic), None)
if topic_info is None:
return redirect(url_for('topics'))
nonpass_messages = [
m for m in room_doc["messages"]
if m.get("message", "").strip() != "(pass)"
]
return render_template("room.html", room=room_id, topic_info=topic_info, user=display_name, messages=nonpass_messages, FroBot_name=room_doc["FroBot_name"], HotBot_name=room_doc["HotBot_name"], CoolBot_name=room_doc["CoolBot_name"], ended=room_doc["ended"])
@app.route("/abort", methods=["POST"])
def abort_room():
room_id = session.get("room")
if not room_id:
return ("Error: No room in session.", 400)
rooms_collection.update_one(
{"_id": room_id},
{"$set": {"aborted": True}}
)
return ("OK", 200)
@app.route("/post_survey", methods=["POST", "GET"])
def post_survey():
user_id = session.get('user_id')
if not user_id:
return render_template('home.html', error="Enter your Prolific ID.")
info = db.rooms.find_one({"user_id":user_id}, {'FroBot_name':1,
'HotBot_name':1,
'CoolBot_name':1} )
if not info:
return render_template('home.html', error="Enter your ID.")
# Store in the DB that this chat has been ended
db.rooms.update_one(
{"user_id":user_id},
{"$set": {"ended": True, "ended_at": datetime.utcnow()}}
)
CName = info['CoolBot_name']
FName = info['FroBot_name']
HName = info['HotBot_name']
SURVEY_2_LINK = f"https://umw.qualtrics.com/jfe/form/SV_eIIbPlJ2D9k4zKC?PROLIFIC_PID={user_id}&CName={CName}&FName={FName}&HName={HName}"
return redirect(SURVEY_2_LINK)
# Build the SocketIO event handlers
@socketio.on('connect')
def handle_connect():
name = session.get('display_name')
room = session.get('room')
if not name or not room:
return
room_doc = rooms_collection.find_one({"_id": room})
if not room_doc:
return
join_room(room)
if (room_doc.get("initialPostsSent", False)):
return
# Send the message that "watermelon" has already joined the chat
send({
"sender": "",
"message": "watermelon has entered the chat"
}, to=room)
# Send the message that this user has joined the chat
send({
"sender": "",
"message": f"{name} has entered the chat"
}, to=room)
# Start background tasks for the bots to join after a short delay
socketio.start_background_task(send_bot_joined, room, room_doc['CoolBot_name'], 3)
socketio.start_background_task(send_bot_joined, room, room_doc['FroBot_name'], 7)
socketio.start_background_task(send_bot_joined, room, room_doc['HotBot_name'], 13)
# Start background task to send the initial watermelon post after a short delay
socketio.start_background_task(send_initial_post, room, 10)
rooms_collection.update_one(
{"_id": room},
{"$set": {"initialPostsSent": True}}
)
# Start user inactivity tracker
if not room_doc.get("inactivity_tracker_started", False):
rooms_collection.update_one(
{"_id": room},
{
"$set": {
"inactivity_tracker_started": True,
"last_activity": datetime.utcnow()
}
}
)
socketio.start_background_task(user_inactivity_tracker, room)
@socketio.on('message')
def handle_message(payload):
room = session.get('room')
name = session.get('display_name')
if not room or not name:
return
# Stop message processing if the chat has ended
room_doc = rooms_collection.find_one({"_id": room})
if not room_doc or room_doc.get("ended", False):
return
text = payload.get("message", "").strip()
if not text:
return # ignore empty messages
# Client-visible message (no datetime)
client_message = {
"sender": name,
"message": text
}
# Database-only message (with datetime)
db_message = {
"sender": name,
"message": text,
"timestamp": datetime.utcnow()
}
# Store the full version in the database
rooms_collection.update_one(
{"_id": room},
{
"$push": {"messages": db_message},
"$set": {"last_activity": datetime.utcnow()}
}
)
# Send only the client version (no datetime)
send(client_message, to=room)
# Ask each bot for a response
socketio.start_background_task(ask_bot_round, room)
@socketio.on('disconnect')
def handle_disconnect():
room = session.get("room")
name = session.get("display_name")
if room:
send({
"sender": "",
"message": f"{name} has left the chat"
}, to=room)
leave_room(room)
if __name__ == "__main__":
print("Async mode:", socketio.async_mode)
socketio.run(app, host='0.0.0.0', port=7860, debug=True)