File size: 7,381 Bytes
514a1ba 0c9d257 514a1ba 0c9d257 514a1ba 0c9d257 514a1ba 0c9d257 514a1ba 5700222 0c9d257 514a1ba 093632f 514a1ba 093632f 514a1ba 5700222 514a1ba 282b49d 514a1ba 690a763 b942b6b 4c91236 b942b6b 514a1ba 093632f 514a1ba 32585a6 514a1ba 0c9d257 514a1ba 4c91236 514a1ba b942b6b 514a1ba 5700222 514a1ba | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | import html
import json
import os
import pandas as pd
from snowflake.snowpark import Session
from bs4 import BeautifulSoup
from Messaging_system.Permes import Permes
import streamlit as st
from Messaging_system.SnowFlakeConnection import SnowFlakeConn
from Messaging_system.context_validator import Validator
# --------------------------------------------------------------
def load_config_(file_path):
"""
Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
:param file_path: local path to the JSON file
:return: JSON file
"""
with open(file_path, 'r') as file:
return json.load(file)
# --------------------------------------------------------------
def clean_html_tags(users_df):
"""
accept the data as a Pandas Dataframe and return the preprocessed dataframe.
This function has access to the columns that contain HTML tags and codes, Therefore it will apply cleaning
procedures to those columns.
functions to preprocess the data
:return: updates users_df
"""
for col in users_df.columns:
# Apply the cleaning function to each cell in the column
users_df[col] = users_df[col].apply(clean_text)
return users_df
# --------------------------------------------------------------
def clean_text(text):
if isinstance(text, str):
# Unescape HTML entities
text = html.unescape(text)
# Parse HTML and get text
soup = BeautifulSoup(text, "html.parser")
return soup.get_text()
else:
return text
# =============================================================
def get_credential(key):
return st.secrets.get(key) or os.getenv(key)
# --------------------------------------------------------------
def filter_validated_users(users):
"""
Filters the input DataFrame by removing rows where the 'valid' column has the value 'False'.
Parameters:
users (DataFrame): A pandas DataFrame with a 'valid' column containing strings 'True' or 'False'.
Returns:
DataFrame: A filtered DataFrame containing only rows where 'valid' is 'True'.
"""
# Convert the 'valid' column to boolean for easier filtering
users['valid'] = users['valid'].map({'True': True, 'False': False})
# Filter the DataFrame to include only rows where 'valid' is True
filtered_users = users[users['valid']]
# Optional: Reset the index of the filtered DataFrame
filtered_users = filtered_users.reset_index(drop=True)
return filtered_users
# --------------------------------------------------------------
if __name__ == "__main__":
# path to sample data
# path = "Data/Singeo_Camp.csv"
path = "Data/Test_users.csv"
# loading sample data
users = pd.read_csv(path)
# users = clean_html_tags(users)
config_file_path = 'Config_files/message_system_config.json'
config_file = load_config_(config_file_path)
openai_api_key = get_credential("OPENAI_API")
conn = dict(
user=get_credential("snowflake_user"),
password=get_credential("snowflake_password"),
account=get_credential("snowflake_account"),
role=get_credential("snowflake_role"),
database=get_credential("snowflake_database"),
warehouse=get_credential("snowflake_warehouse"),
schema=get_credential("snowflake_schema")
)
# --------------------
# #Do we need to validate user-generated context?
# user_generated_context = True
# input_validator = Validator(api_key=openai_api_key)
# input_validator.set_validator_instructions()
# users = input_validator.validate_dataframe(dataframe=users, target_column="forum_content")
# users = filter_validated_users(users)
# --------------------
session = Session.builder.configs(conn).create()
brand = "singeo"
identifier_column = "email"
snowflake = SnowFlakeConn(session=session, brand=brand)
# users = snowflake.get_users_in_campaign(brand=brand)
segment_info = """Student who haven't practiced for a few days"""
# sample inputs
CTA = """The goal is to tell them to practice singing"""
# additional_instructions = """Include weeks_since _last_interaction in the message if you can create a better message to re-engage the user."""
additional_instructions = None
recsys_contents = ["workout", "course", "quick_tips"]
# number_of_samples = users.shape[0]
number_of_samples = 10
# number of messages to generate
number_of_messages = 3
ex1 = """
Header: Sing your heart out
Message: Your next lesson is waiting. Get back to singing today!
Header: Here Comes The Sun
Message: A quick practice session will light up your day. Let’s get right back at it.
Header: Ain’t No Mountain High Enough
Message: Daily practice makes you unstoppable. Time to build your skills!
"""
sample_example = ex1
ex2 = """
Header: Get Back On Track!
Message: Join thousands of singers in reaching their goals. Take a lesson today!
Header: It’s Been A While
Message: We haven’t seen you in a bit. Slide back into your practice sessions now.
Header: It Only Takes 10 Minutes
Message: Build your momentum and get you back into the groove!
Header: Let’s Sing!
Message: Haven’t practiced yet? This will get you going.
"""
ex3 = """
Header: We Miss Your Singing!
Message: You haven’t practiced for 25 days. It’s time to dive back in!
Header: Lost In Your Singing Journey?
Message: These lessons are curated just for you. Start singing today!
"""
subsequent_examples = {
2: ex2,
3: ex3
}
involve_recsys_result = True
involve_last_interaction = False
# messaging_mode = "recommend_playlist"
platform = "push"
selected_source_features = None
selected_input_features = None
segment_name = "no_recent_activity"
permes = Permes()
# o3-mini o1-mini o4-mini o1
users_message = permes.create_personalize_messages(session=session,
model="gpt-4.1-mini",
users=users,
brand=brand,
config_file=config_file,
openai_api_key=openai_api_key,
segment_info=segment_info,
number_of_samples=number_of_samples,
number_of_messages=number_of_messages,
subsequent_examples = subsequent_examples,
platform=platform,
involve_recsys_result=involve_recsys_result,
identifier_column=identifier_column,
recsys_contents=recsys_contents,
sample_example=sample_example,
segment_name=segment_name,
personalization=True)
users_message.to_csv(f"Singeo_camp.csv", encoding='utf-8-sig', index=False)
|