File size: 7,381 Bytes
514a1ba
 
 
 
 
 
 
 
 
0c9d257
 
514a1ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c9d257
 
514a1ba
 
 
0c9d257
514a1ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c9d257
514a1ba
 
 
 
 
 
 
 
 
 
 
5700222
0c9d257
 
 
 
514a1ba
093632f
514a1ba
 
 
093632f
514a1ba
5700222
 
514a1ba
 
 
 
282b49d
514a1ba
 
690a763
b942b6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c91236
 
b942b6b
 
514a1ba
 
 
 
 
 
 
 
 
093632f
514a1ba
 
 
 
 
32585a6
 
514a1ba
0c9d257
514a1ba
 
 
 
 
 
 
4c91236
514a1ba
 
 
 
 
b942b6b
 
514a1ba
5700222
514a1ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import html
import json
import os

import pandas as pd
from snowflake.snowpark import Session
from bs4 import BeautifulSoup
from Messaging_system.Permes import Permes
import streamlit as st

from Messaging_system.SnowFlakeConnection import SnowFlakeConn
from Messaging_system.context_validator import Validator


# --------------------------------------------------------------
def load_config_(file_path):
    """
    Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
    :param file_path: local path to the JSON file
    :return: JSON file
    """
    with open(file_path, 'r') as file:
        return json.load(file)

# --------------------------------------------------------------
def clean_html_tags(users_df):
    """
    accept the data as a Pandas Dataframe and return the preprocessed dataframe.
    This function has access to the columns that contain HTML tags and codes, Therefore it will apply cleaning
    procedures to those columns.
    functions to preprocess the data
    :return: updates users_df
    """

    for col in users_df.columns:
        # Apply the cleaning function to each cell in the column
        users_df[col] = users_df[col].apply(clean_text)

    return users_df


# --------------------------------------------------------------
def clean_text(text):
    if isinstance(text, str):
        # Unescape HTML entities
        text = html.unescape(text)
        # Parse HTML and get text
        soup = BeautifulSoup(text, "html.parser")
        return soup.get_text()
    else:
        return text

# =============================================================
def get_credential(key):
    return st.secrets.get(key) or os.getenv(key)

# --------------------------------------------------------------
def filter_validated_users(users):
    """
    Filters the input DataFrame by removing rows where the 'valid' column has the value 'False'.

    Parameters:
    users (DataFrame): A pandas DataFrame with a 'valid' column containing strings 'True' or 'False'.

    Returns:
    DataFrame: A filtered DataFrame containing only rows where 'valid' is 'True'.
    """
    # Convert the 'valid' column to boolean for easier filtering
    users['valid'] = users['valid'].map({'True': True, 'False': False})

    # Filter the DataFrame to include only rows where 'valid' is True
    filtered_users = users[users['valid']]

    # Optional: Reset the index of the filtered DataFrame
    filtered_users = filtered_users.reset_index(drop=True)

    return filtered_users


# --------------------------------------------------------------
if __name__ == "__main__":
    # path to sample data
    # path = "Data/Singeo_Camp.csv"
    path = "Data/Test_users.csv"

    # loading sample data
    users = pd.read_csv(path)
    # users = clean_html_tags(users)

    config_file_path = 'Config_files/message_system_config.json'
    config_file = load_config_(config_file_path)

    openai_api_key = get_credential("OPENAI_API")

    conn = dict(
        user=get_credential("snowflake_user"),
        password=get_credential("snowflake_password"),
        account=get_credential("snowflake_account"),
        role=get_credential("snowflake_role"),
        database=get_credential("snowflake_database"),
        warehouse=get_credential("snowflake_warehouse"),
        schema=get_credential("snowflake_schema")
    )


    # --------------------
    # #Do we need to validate user-generated context?
    # user_generated_context = True
    # input_validator = Validator(api_key=openai_api_key)
    # input_validator.set_validator_instructions()
    # users = input_validator.validate_dataframe(dataframe=users, target_column="forum_content")
    # users = filter_validated_users(users)
    # --------------------

    session = Session.builder.configs(conn).create()

    brand = "singeo"
    identifier_column = "email"

    snowflake = SnowFlakeConn(session=session, brand=brand)
    # users = snowflake.get_users_in_campaign(brand=brand)

    segment_info = """Student who haven't practiced for a few days"""

    # sample inputs

    CTA = """The goal is to tell them to practice singing"""

    # additional_instructions = """Include weeks_since _last_interaction in the message if you can create a better message to re-engage the user."""
    additional_instructions = None

    recsys_contents = ["workout", "course", "quick_tips"]

    # number_of_samples = users.shape[0]
    number_of_samples = 10

    # number of messages to generate
    number_of_messages = 3

    ex1 = """
Header: Sing your heart out 
Message: Your next lesson is waiting. Get back to singing today!


Header: Here Comes The Sun 
Message: A quick practice session will light up your day. Let’s get right back at it. 

Header: Ain’t No Mountain High Enough
Message: Daily practice makes you unstoppable. Time to build your skills! 
    """

    sample_example = ex1

    ex2 = """
Header: Get Back On Track!
Message: Join thousands of singers in reaching their goals. Take a lesson today!


Header: It’s Been A While
Message: We haven’t seen you in a bit. Slide back into your practice sessions now.


Header: It Only Takes 10 Minutes
Message: Build your momentum and get you back into the groove!


Header: Let’s Sing!
Message: Haven’t practiced yet? This will get you going. 
            
        """

    ex3 = """
Header: We Miss Your Singing!
Message: You haven’t practiced for 25 days. It’s time to dive back in!


Header: Lost In Your Singing Journey?
Message: These lessons are curated just for you. Start singing today!

            """

    subsequent_examples = {
        2: ex2,
        3: ex3
    }

    involve_recsys_result = True
    involve_last_interaction = False

    # messaging_mode = "recommend_playlist"

    platform = "push"

    selected_source_features = None
    selected_input_features = None

    segment_name = "no_recent_activity"
    permes = Permes()

    # o3-mini o1-mini o4-mini o1

    users_message = permes.create_personalize_messages(session=session,
                                                model="gpt-4.1-mini",
                                                users=users,
                                                brand=brand,
                                                config_file=config_file,
                                                openai_api_key=openai_api_key,
                                                segment_info=segment_info,
                                                number_of_samples=number_of_samples,
                                                number_of_messages=number_of_messages,
                                                subsequent_examples = subsequent_examples,
                                                platform=platform,
                                                involve_recsys_result=involve_recsys_result,
                                                identifier_column=identifier_column,
                                                recsys_contents=recsys_contents,
                                                sample_example=sample_example,
                                                segment_name=segment_name,
                                                personalization=True)

    users_message.to_csv(f"Singeo_camp.csv", encoding='utf-8-sig', index=False)