Messaging_system_variant_B / messaging_main_test.py
Danialebrat's picture
- updating requirements.txt
0c9d257
import html
import json
import os
import pandas as pd
from snowflake.snowpark import Session
from bs4 import BeautifulSoup
from Messaging_system.Permes import Permes
import streamlit as st
from Messaging_system.SnowFlakeConnection import SnowFlakeConn
from Messaging_system.context_validator import Validator
# --------------------------------------------------------------
def load_config_(file_path):
"""
Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
:param file_path: local path to the JSON file
:return: JSON file
"""
with open(file_path, 'r') as file:
return json.load(file)
# --------------------------------------------------------------
def clean_html_tags(users_df):
"""
accept the data as a Pandas Dataframe and return the preprocessed dataframe.
This function has access to the columns that contain HTML tags and codes, Therefore it will apply cleaning
procedures to those columns.
functions to preprocess the data
:return: updates users_df
"""
for col in users_df.columns:
# Apply the cleaning function to each cell in the column
users_df[col] = users_df[col].apply(clean_text)
return users_df
# --------------------------------------------------------------
def clean_text(text):
if isinstance(text, str):
# Unescape HTML entities
text = html.unescape(text)
# Parse HTML and get text
soup = BeautifulSoup(text, "html.parser")
return soup.get_text()
else:
return text
# =============================================================
def get_credential(key):
return st.secrets.get(key) or os.getenv(key)
# --------------------------------------------------------------
def filter_validated_users(users):
"""
Filters the input DataFrame by removing rows where the 'valid' column has the value 'False'.
Parameters:
users (DataFrame): A pandas DataFrame with a 'valid' column containing strings 'True' or 'False'.
Returns:
DataFrame: A filtered DataFrame containing only rows where 'valid' is 'True'.
"""
# Convert the 'valid' column to boolean for easier filtering
users['valid'] = users['valid'].map({'True': True, 'False': False})
# Filter the DataFrame to include only rows where 'valid' is True
filtered_users = users[users['valid']]
# Optional: Reset the index of the filtered DataFrame
filtered_users = filtered_users.reset_index(drop=True)
return filtered_users
# --------------------------------------------------------------
if __name__ == "__main__":
# path to sample data
# path = "Data/Singeo_Camp.csv"
path = "Data/Test_users.csv"
# loading sample data
users = pd.read_csv(path)
# users = clean_html_tags(users)
config_file_path = 'Config_files/message_system_config.json'
config_file = load_config_(config_file_path)
openai_api_key = get_credential("OPENAI_API")
conn = dict(
user=get_credential("snowflake_user"),
password=get_credential("snowflake_password"),
account=get_credential("snowflake_account"),
role=get_credential("snowflake_role"),
database=get_credential("snowflake_database"),
warehouse=get_credential("snowflake_warehouse"),
schema=get_credential("snowflake_schema")
)
# --------------------
# #Do we need to validate user-generated context?
# user_generated_context = True
# input_validator = Validator(api_key=openai_api_key)
# input_validator.set_validator_instructions()
# users = input_validator.validate_dataframe(dataframe=users, target_column="forum_content")
# users = filter_validated_users(users)
# --------------------
session = Session.builder.configs(conn).create()
brand = "singeo"
identifier_column = "email"
snowflake = SnowFlakeConn(session=session, brand=brand)
# users = snowflake.get_users_in_campaign(brand=brand)
segment_info = """Student who haven't practiced for a few days"""
# sample inputs
CTA = """The goal is to tell them to practice singing"""
# additional_instructions = """Include weeks_since _last_interaction in the message if you can create a better message to re-engage the user."""
additional_instructions = None
recsys_contents = ["workout", "course", "quick_tips"]
# number_of_samples = users.shape[0]
number_of_samples = 10
# number of messages to generate
number_of_messages = 3
ex1 = """
Header: Sing your heart out
Message: Your next lesson is waiting. Get back to singing today!
Header: Here Comes The Sun
Message: A quick practice session will light up your day. Let’s get right back at it.
Header: Ain’t No Mountain High Enough
Message: Daily practice makes you unstoppable. Time to build your skills!
"""
sample_example = ex1
ex2 = """
Header: Get Back On Track!
Message: Join thousands of singers in reaching their goals. Take a lesson today!
Header: It’s Been A While
Message: We haven’t seen you in a bit. Slide back into your practice sessions now.
Header: It Only Takes 10 Minutes
Message: Build your momentum and get you back into the groove!
Header: Let’s Sing!
Message: Haven’t practiced yet? This will get you going.
"""
ex3 = """
Header: We Miss Your Singing!
Message: You haven’t practiced for 25 days. It’s time to dive back in!
Header: Lost In Your Singing Journey?
Message: These lessons are curated just for you. Start singing today!
"""
subsequent_examples = {
2: ex2,
3: ex3
}
involve_recsys_result = True
involve_last_interaction = False
# messaging_mode = "recommend_playlist"
platform = "push"
selected_source_features = None
selected_input_features = None
segment_name = "no_recent_activity"
permes = Permes()
# o3-mini o1-mini o4-mini o1
users_message = permes.create_personalize_messages(session=session,
model="gpt-4.1-mini",
users=users,
brand=brand,
config_file=config_file,
openai_api_key=openai_api_key,
segment_info=segment_info,
number_of_samples=number_of_samples,
number_of_messages=number_of_messages,
subsequent_examples = subsequent_examples,
platform=platform,
involve_recsys_result=involve_recsys_result,
identifier_column=identifier_column,
recsys_contents=recsys_contents,
sample_example=sample_example,
segment_name=segment_name,
personalization=True)
users_message.to_csv(f"Singeo_camp.csv", encoding='utf-8-sig', index=False)