Danialebrat commited on
Commit
d0e3307
·
1 Parent(s): 6b1529a

Adding files and directories

Browse files

- System works with CSV, we still need CIO and snowflake integration

.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore the .streamlit directory and its contents
2
+ Config_files/mysql_credentials.json
3
+ Config_files/secrets.json
4
+ Config_files/snowflake_credentials_Danial.json
5
+ .streamlit/secrets.toml
6
+
7
+ # Ignore the .env file
8
+ Analysis/.env
9
+ .env
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore the .streamlit directory and its contents
2
+ Config_files/mysql_credentials.json
3
+ Config_files/secrets.json
4
+ Config_files/snowflake_credentials_Danial.json
5
+ .streamlit/secrets.toml
6
+
7
+ # Ignore the .env file
8
+ Analysis/.env
9
+ .env
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="153">
8
+ <item index="0" class="java.lang.String" itemvalue="urllib3" />
9
+ <item index="1" class="java.lang.String" itemvalue="tokenizers" />
10
+ <item index="2" class="java.lang.String" itemvalue="transformers" />
11
+ <item index="3" class="java.lang.String" itemvalue="huggingface-hub" />
12
+ <item index="4" class="java.lang.String" itemvalue="safetensors" />
13
+ <item index="5" class="java.lang.String" itemvalue="pandas" />
14
+ <item index="6" class="java.lang.String" itemvalue="protobuf" />
15
+ <item index="7" class="java.lang.String" itemvalue="httpx" />
16
+ <item index="8" class="java.lang.String" itemvalue="openai" />
17
+ <item index="9" class="java.lang.String" itemvalue="anyio" />
18
+ <item index="10" class="java.lang.String" itemvalue="h11" />
19
+ <item index="11" class="java.lang.String" itemvalue="httpcore" />
20
+ <item index="12" class="java.lang.String" itemvalue="tangled-up-in-unicode" />
21
+ <item index="13" class="java.lang.String" itemvalue="numba" />
22
+ <item index="14" class="java.lang.String" itemvalue="Babel" />
23
+ <item index="15" class="java.lang.String" itemvalue="PyYAML" />
24
+ <item index="16" class="java.lang.String" itemvalue="pickleshare" />
25
+ <item index="17" class="java.lang.String" itemvalue="defusedxml" />
26
+ <item index="18" class="java.lang.String" itemvalue="executing" />
27
+ <item index="19" class="java.lang.String" itemvalue="pycparser" />
28
+ <item index="20" class="java.lang.String" itemvalue="torchvision" />
29
+ <item index="21" class="java.lang.String" itemvalue="patsy" />
30
+ <item index="22" class="java.lang.String" itemvalue="ipython-genutils" />
31
+ <item index="23" class="java.lang.String" itemvalue="Pygments" />
32
+ <item index="24" class="java.lang.String" itemvalue="bleach" />
33
+ <item index="25" class="java.lang.String" itemvalue="jupyter_server_terminals" />
34
+ <item index="26" class="java.lang.String" itemvalue="soupsieve" />
35
+ <item index="27" class="java.lang.String" itemvalue="torchaudio" />
36
+ <item index="28" class="java.lang.String" itemvalue="jsonschema" />
37
+ <item index="29" class="java.lang.String" itemvalue="pywin32" />
38
+ <item index="30" class="java.lang.String" itemvalue="qtconsole" />
39
+ <item index="31" class="java.lang.String" itemvalue="terminado" />
40
+ <item index="32" class="java.lang.String" itemvalue="comm" />
41
+ <item index="33" class="java.lang.String" itemvalue="pydantic" />
42
+ <item index="34" class="java.lang.String" itemvalue="wordcloud" />
43
+ <item index="35" class="java.lang.String" itemvalue="jupyterlab-pygments" />
44
+ <item index="36" class="java.lang.String" itemvalue="ipykernel" />
45
+ <item index="37" class="java.lang.String" itemvalue="nbconvert" />
46
+ <item index="38" class="java.lang.String" itemvalue="phik" />
47
+ <item index="39" class="java.lang.String" itemvalue="attrs" />
48
+ <item index="40" class="java.lang.String" itemvalue="contourpy" />
49
+ <item index="41" class="java.lang.String" itemvalue="psutil" />
50
+ <item index="42" class="java.lang.String" itemvalue="jedi" />
51
+ <item index="43" class="java.lang.String" itemvalue="jupyter_server" />
52
+ <item index="44" class="java.lang.String" itemvalue="pure-eval" />
53
+ <item index="45" class="java.lang.String" itemvalue="regex" />
54
+ <item index="46" class="java.lang.String" itemvalue="asttokens" />
55
+ <item index="47" class="java.lang.String" itemvalue="platformdirs" />
56
+ <item index="48" class="java.lang.String" itemvalue="matplotlib" />
57
+ <item index="49" class="java.lang.String" itemvalue="idna" />
58
+ <item index="50" class="java.lang.String" itemvalue="referencing" />
59
+ <item index="51" class="java.lang.String" itemvalue="decorator" />
60
+ <item index="52" class="java.lang.String" itemvalue="networkx" />
61
+ <item index="53" class="java.lang.String" itemvalue="pandas-profiling" />
62
+ <item index="54" class="java.lang.String" itemvalue="json5" />
63
+ <item index="55" class="java.lang.String" itemvalue="cffi" />
64
+ <item index="56" class="java.lang.String" itemvalue="pandocfilters" />
65
+ <item index="57" class="java.lang.String" itemvalue="numpy" />
66
+ <item index="58" class="java.lang.String" itemvalue="jupyter-events" />
67
+ <item index="59" class="java.lang.String" itemvalue="sniffio" />
68
+ <item index="60" class="java.lang.String" itemvalue="websocket-client" />
69
+ <item index="61" class="java.lang.String" itemvalue="exceptiongroup" />
70
+ <item index="62" class="java.lang.String" itemvalue="jupyter" />
71
+ <item index="63" class="java.lang.String" itemvalue="seaborn" />
72
+ <item index="64" class="java.lang.String" itemvalue="stack-data" />
73
+ <item index="65" class="java.lang.String" itemvalue="multimethod" />
74
+ <item index="66" class="java.lang.String" itemvalue="PyWavelets" />
75
+ <item index="67" class="java.lang.String" itemvalue="zipp" />
76
+ <item index="68" class="java.lang.String" itemvalue="nest-asyncio" />
77
+ <item index="69" class="java.lang.String" itemvalue="prompt-toolkit" />
78
+ <item index="70" class="java.lang.String" itemvalue="visions" />
79
+ <item index="71" class="java.lang.String" itemvalue="ipywidgets" />
80
+ <item index="72" class="java.lang.String" itemvalue="scipy" />
81
+ <item index="73" class="java.lang.String" itemvalue="tornado" />
82
+ <item index="74" class="java.lang.String" itemvalue="ydata-profiling" />
83
+ <item index="75" class="java.lang.String" itemvalue="jsonpointer" />
84
+ <item index="76" class="java.lang.String" itemvalue="Send2Trash" />
85
+ <item index="77" class="java.lang.String" itemvalue="torch" />
86
+ <item index="78" class="java.lang.String" itemvalue="overrides" />
87
+ <item index="79" class="java.lang.String" itemvalue="mistune" />
88
+ <item index="80" class="java.lang.String" itemvalue="importlib-resources" />
89
+ <item index="81" class="java.lang.String" itemvalue="mpmath" />
90
+ <item index="82" class="java.lang.String" itemvalue="jupyter-console" />
91
+ <item index="83" class="java.lang.String" itemvalue="typing_extensions" />
92
+ <item index="84" class="java.lang.String" itemvalue="debugpy" />
93
+ <item index="85" class="java.lang.String" itemvalue="statsmodels" />
94
+ <item index="86" class="java.lang.String" itemvalue="argon2-cffi" />
95
+ <item index="87" class="java.lang.String" itemvalue="pytz" />
96
+ <item index="88" class="java.lang.String" itemvalue="dacite" />
97
+ <item index="89" class="java.lang.String" itemvalue="webencodings" />
98
+ <item index="90" class="java.lang.String" itemvalue="Pillow" />
99
+ <item index="91" class="java.lang.String" itemvalue="notebook_shim" />
100
+ <item index="92" class="java.lang.String" itemvalue="tiktoken" />
101
+ <item index="93" class="java.lang.String" itemvalue="traitlets" />
102
+ <item index="94" class="java.lang.String" itemvalue="pywinpty" />
103
+ <item index="95" class="java.lang.String" itemvalue="rfc3339-validator" />
104
+ <item index="96" class="java.lang.String" itemvalue="joblib" />
105
+ <item index="97" class="java.lang.String" itemvalue="arrow" />
106
+ <item index="98" class="java.lang.String" itemvalue="python-dateutil" />
107
+ <item index="99" class="java.lang.String" itemvalue="nbclient" />
108
+ <item index="100" class="java.lang.String" itemvalue="QtPy" />
109
+ <item index="101" class="java.lang.String" itemvalue="cycler" />
110
+ <item index="102" class="java.lang.String" itemvalue="MarkupSafe" />
111
+ <item index="103" class="java.lang.String" itemvalue="tinycss2" />
112
+ <item index="104" class="java.lang.String" itemvalue="mkl" />
113
+ <item index="105" class="java.lang.String" itemvalue="fsspec" />
114
+ <item index="106" class="java.lang.String" itemvalue="python-json-logger" />
115
+ <item index="107" class="java.lang.String" itemvalue="filelock" />
116
+ <item index="108" class="java.lang.String" itemvalue="jupyterlab-widgets" />
117
+ <item index="109" class="java.lang.String" itemvalue="pyzmq" />
118
+ <item index="110" class="java.lang.String" itemvalue="certifi" />
119
+ <item index="111" class="java.lang.String" itemvalue="pyparsing" />
120
+ <item index="112" class="java.lang.String" itemvalue="sympy" />
121
+ <item index="113" class="java.lang.String" itemvalue="notebook" />
122
+ <item index="114" class="java.lang.String" itemvalue="isoduration" />
123
+ <item index="115" class="java.lang.String" itemvalue="jupyter-lsp" />
124
+ <item index="116" class="java.lang.String" itemvalue="fqdn" />
125
+ <item index="117" class="java.lang.String" itemvalue="jupyter_client" />
126
+ <item index="118" class="java.lang.String" itemvalue="kiwisolver" />
127
+ <item index="119" class="java.lang.String" itemvalue="jupyterlab_server" />
128
+ <item index="120" class="java.lang.String" itemvalue="fonttools" />
129
+ <item index="121" class="java.lang.String" itemvalue="backcall" />
130
+ <item index="122" class="java.lang.String" itemvalue="tbb" />
131
+ <item index="123" class="java.lang.String" itemvalue="widgetsnbextension" />
132
+ <item index="124" class="java.lang.String" itemvalue="argon2-cffi-bindings" />
133
+ <item index="125" class="java.lang.String" itemvalue="distro" />
134
+ <item index="126" class="java.lang.String" itemvalue="matplotlib-inline" />
135
+ <item index="127" class="java.lang.String" itemvalue="webcolors" />
136
+ <item index="128" class="java.lang.String" itemvalue="more-itertools" />
137
+ <item index="129" class="java.lang.String" itemvalue="wcwidth" />
138
+ <item index="130" class="java.lang.String" itemvalue="llvmlite" />
139
+ <item index="131" class="java.lang.String" itemvalue="jupyter_core" />
140
+ <item index="132" class="java.lang.String" itemvalue="importlib-metadata" />
141
+ <item index="133" class="java.lang.String" itemvalue="Jinja2" />
142
+ <item index="134" class="java.lang.String" itemvalue="rfc3986-validator" />
143
+ <item index="135" class="java.lang.String" itemvalue="typeguard" />
144
+ <item index="136" class="java.lang.String" itemvalue="jsonschema-specifications" />
145
+ <item index="137" class="java.lang.String" itemvalue="rpds-py" />
146
+ <item index="138" class="java.lang.String" itemvalue="uri-template" />
147
+ <item index="139" class="java.lang.String" itemvalue="tomli" />
148
+ <item index="140" class="java.lang.String" itemvalue="jupyterlab" />
149
+ <item index="141" class="java.lang.String" itemvalue="parso" />
150
+ <item index="142" class="java.lang.String" itemvalue="intel-openmp" />
151
+ <item index="143" class="java.lang.String" itemvalue="nbformat" />
152
+ <item index="144" class="java.lang.String" itemvalue="tzdata" />
153
+ <item index="145" class="java.lang.String" itemvalue="ipython" />
154
+ <item index="146" class="java.lang.String" itemvalue="packaging" />
155
+ <item index="147" class="java.lang.String" itemvalue="fastjsonschema" />
156
+ <item index="148" class="java.lang.String" itemvalue="prometheus-client" />
157
+ <item index="149" class="java.lang.String" itemvalue="tqdm" />
158
+ <item index="150" class="java.lang.String" itemvalue="colorama" />
159
+ <item index="151" class="java.lang.String" itemvalue="async-lru" />
160
+ <item index="152" class="java.lang.String" itemvalue="ImageHash" />
161
+ </list>
162
+ </value>
163
+ </option>
164
+ </inspection_tool>
165
+ <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
166
+ <option name="ignoredErrors">
167
+ <list>
168
+ <option value="N803" />
169
+ <option value="N806" />
170
+ </list>
171
+ </option>
172
+ </inspection_tool>
173
+ <inspection_tool class="PyStubPackagesAdvertiser" enabled="true" level="WARNING" enabled_by_default="true">
174
+ <option name="ignoredPackages">
175
+ <list>
176
+ <option value="pyspark-stubs==3.0.0.post3" />
177
+ </list>
178
+ </option>
179
+ </inspection_tool>
180
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
181
+ <option name="ignoredIdentifiers">
182
+ <list>
183
+ <option value="str.__or__" />
184
+ </list>
185
+ </option>
186
+ </inspection_tool>
187
+ </profile>
188
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.9 (AI_Message_Generator)" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/AI_Message_Generator.iml" filepath="$PROJECT_DIR$/.idea/AI_Message_Generator.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
CIO/CIO_integration_Python.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import http.client
2
+ import json
3
+ import pandas as pd
4
+ import logging
5
+ import base64
6
+ import requests
7
+ from customerio import CustomerIO, Regions
8
+
9
+
10
+ class CustomerIOIntegration:
11
+ def __init__(self, site_id, api_key):
12
+
13
+ self.cio = CustomerIO(site_id=site_id, api_key=api_key)
14
+ logging.basicConfig(level=logging.INFO)
15
+
16
+ # Authentication
17
+ self.site_id = site_id
18
+ self.api_key = api_key
19
+ # Base URL for Customer.io App API endpoints (used for segments management)
20
+ self.base_url = "https://api.customer.io/v1"
21
+
22
+ # Create Basic Auth header
23
+ auth_b64 = base64.b64encode(f"{self.site_id}:{self.api_key}".encode('utf-8')).decode('utf-8')
24
+ self.headers = {
25
+ "Authorization": f"Basic {auth_b64}",
26
+ "Content-Type": "application/json"
27
+ }
28
+
29
+ def add_attributes(self, dataframe):
30
+
31
+ # Filter out rows without messages or cio_id
32
+ filtered_df = dataframe.dropna(subset=['ai_generated_message', 'email'])
33
+
34
+ for index, row in filtered_df.iterrows():
35
+ try:
36
+ self.cio.identify(id=row['email'], ai_generated_message=row['ai_generated_message'])
37
+ logging.info(f"Successfully updated user {row['email']} with message")
38
+ except Exception as e:
39
+ logging.error(f"Failed to update user {row['email']}: {e}")
40
+
41
+ def get_segment(self, segment_name):
42
+
43
+ # Step 1: Check if the segment exists
44
+ resp = requests.get(f"{self.base_url}/segments", headers=self.headers)
45
+ if resp.status_code != 200:
46
+ raise Exception(f"Error fetching segments: {resp.text}")
47
+
48
+ segments = resp.json() # assuming a list of segments is returned
49
+ segment_id = None
50
+ for seg in segments:
51
+ if seg.get("name") == segment_name:
52
+ segment_id = seg.get("id")
53
+ break
54
+ return segment_id
55
+
56
+ def update_segment_from_dataframe(self, df: pd.DataFrame,
57
+ segment_name: str,
58
+ segment_description: str) -> str:
59
+ """
60
+ Given a pandas DataFrame, create (if needed) and update a Customer.io manual segment.
61
+
62
+ The DataFrame must contain an "email" column (used as the unique identifier) plus other columns
63
+ that become customer attributes.
64
+
65
+ Parameters:
66
+ df: DataFrame containing customer data.
67
+ segment_name: The name of the segment to create or update.
68
+ segment_description: A description for the segment (used when creating it).
69
+
70
+ Returns:
71
+ The segment ID (as returned by the API).
72
+ """
73
+ segment_id = self.get_segment(segment_name)
74
+
75
+ # If segment does not exist, create it
76
+ if segment_id is None:
77
+ payload = {
78
+ "name": segment_name,
79
+ "description": segment_description,
80
+ "type": "manual" # manual segments require that you add customers explicitly
81
+ }
82
+ resp = requests.post(f"{self.base_url}/segments", headers=self.headers, data=json.dumps(payload))
83
+ if resp.status_code not in (200, 201):
84
+ raise Exception(f"Error creating segment: {resp.text}")
85
+ segment = resp.json()
86
+ segment_id = segment.get("id")
87
+ print(f"Segment '{segment_name}' created with ID: {segment_id}")
88
+ else:
89
+ print(f"Segment '{segment_name}' already exists with ID: {segment_id}")
90
+
91
+ # Step 2: For each row in the DataFrame, update the customer profile.
92
+ # We use the "email" column as the id.
93
+ for index, row in df.iterrows():
94
+ email = row["email"]
95
+ # Prepare a dictionary of attributes (all columns except email)
96
+ attrs = row.drop("email").to_dict()
97
+ # Use the customer.io client to create or update the profile.
98
+ # Note: any keyword argument you pass becomes a custom attribute.
99
+ self.cio.identify(id=email, **attrs)
100
+
101
+ # Step 3: Add all customers (emails) from the DataFrame to the segment.
102
+ customer_ids = df["email"].tolist()
103
+ payload = {
104
+ "ids": customer_ids,
105
+ "id_type": "email" # since we use emails as the identifier
106
+ }
107
+ resp = requests.put(f"{self.base_url}/segments/{segment_id}", headers=self.headers, data=json.dumps(payload))
108
+ if resp.status_code != 200:
109
+ raise Exception(f"Error adding customers to segment: {resp.text}")
110
+
111
+ print(f"Successfully updated segment '{segment_name}' with {len(customer_ids)} customers.")
112
+ return segment_id
113
+
114
+
115
+ def load_config_(file_path):
116
+ """
117
+ Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
118
+ :param file_path: local path to the JSON file
119
+ :return: JSON file
120
+ """
121
+ with open(file_path, 'r') as file:
122
+ return json.load(file)
123
+
124
+
125
+ # Example usage
126
+ if __name__ == "__main__":
127
+ data = pd.DataFrame({'email': ['danial@musora.com'],
128
+ 'message': ['This is the second test message'],
129
+ 'json_att': [{"message": "test", "url": "test"}]})
130
+ df = pd.DataFrame(data)
131
+
132
+ secrets_file = 'Config_files/secrets.json'
133
+ secrets = load_config_(secrets_file)
134
+
135
+ track_api_key = secrets["MUSORA_CUSTOMER_IO_TRACK_API_KEY"]
136
+ site_id = secrets["MUSORA_CUSTOMER_IO_SITE_ID"]
137
+ api_key = secrets["MUSORA_CUSTOMER_IO_APP_API_KEY"]
138
+ workspace_id = secrets["MUSORA_CUSTOMER_IO_WORKSPACE_ID"]
139
+
140
+ cio_integration = CustomerIOIntegration(api_key=track_api_key, site_id=site_id)
141
+
142
+ # Update (or create) the segment
143
+ segment_id = cio_integration.update_segment_from_dataframe(df,
144
+ segment_name="Danial_ Manual Segment _ AI",
145
+ segment_description="Customers imported from DataFrame")
146
+ print(f"Segment ID: {segment_id}")
Config_files/message_system_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "user_info_features": [
3
+ "first_name",
4
+ "country",
5
+ "instrument",
6
+ "biography",
7
+ "birthday_reminder",
8
+ "topics",
9
+ "genres",
10
+ "last_completed_content"
11
+ ],
12
+ "interaction_features": ["last_content_info"],
13
+ "check_feasibility": [
14
+ "first_name",
15
+ "biography",
16
+ "birthday",
17
+ "topics",
18
+ "genres"
19
+ ],
20
+ "AI_Jargon": ["elevate", "enhance", "reignite", "passion", "boost", "fuel", "thrill", "revive", "spark", "performing", "fresh", "tone"],
21
+ "header_limit": 30,
22
+ "message_limit": 110
23
+ }
24
+
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Start from a lightweight Python image
2
+ FROM python:3.9
3
+
4
+ # Set environment variables for better behavior in containers
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PIP_NO_CACHE_DIR=1 \
7
+
8
+ # Create and set the working directory
9
+ WORKDIR /app
10
+
11
+ # Copy requirements and install Python dependencies
12
+ COPY requirements.txt .
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Copy the rest of the application code
16
+ COPY . .
17
+
18
+ # Expose the port that the application will listen on
19
+ EXPOSE 7860
20
+
21
+ # Run the Streamlit app
22
+ # Streamlit will read PORT from the environment and bind to 0.0.0.0
23
+ CMD streamlit run app.py --server.port=$PORT --server.headless true --server.address 0.0.0.0
Messaging_system/CoreConfig.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ the flow of the Program starts from create_personalized_message function
3
+ """
4
+
5
+ import time
6
+ from Messaging_system.SnowFlakeConnection import SnowFlakeConn
7
+
8
+
9
+ class CoreConfig:
10
+ """
11
+ LLM-based personalized message generator:
12
+ """
13
+
14
+ def __init__(self, session, users_df, brand, platform, config_file):
15
+
16
+ self.session = session
17
+ self.users_df = users_df
18
+ self.config_file = config_file
19
+ self.platform = platform # valid values: [push, app]
20
+ self.brand = brand
21
+
22
+ # LLM configs
23
+ self.api_key = None # will be set by user
24
+ self.model = "gpt-4o-mini" # will be set by user
25
+
26
+ # will be set by user
27
+ self.CTA = None
28
+ self.message_style = None
29
+ self.sample_example = None
30
+ self.template_message = self.CTA
31
+ self.segment_info = None
32
+ self.subsequence_messages = 1
33
+
34
+ self.number_of_samples = 0
35
+ self.list_of_features = None
36
+ self.consider_last_interaction = True
37
+ self.additional_instructions = None
38
+
39
+ # to trace the number of tokens and estimate the cost if needed
40
+ self.temp_token_counter = 0
41
+ self.total_tokens = {
42
+ 'prompt_tokens': 0,
43
+ 'completion_tokens': 0,
44
+ }
45
+
46
+ # Recsys_result
47
+ self.recsys_result = None
48
+ self.recsys_contents = ["song", "workout", "course", "quick_tips"]
49
+ self.content_info = None
50
+ self.involve_recsys_result = False
51
+ self.popular_contents_df = None
52
+
53
+ # Additional_info
54
+ self.additional_info_columns = None
55
+ self.messaging_mode = "message"
56
+ self.target_content = None
57
+
58
+ self.start_time = time.time()
59
+ self.remaining_tokens = None
60
+ self.wait_time = None
61
+
62
+ # Instantiate the connection to Snowflake
63
+ self.SF = SnowFlakeConn(session=self.session, brand=self.brand)
64
+
65
+ # segment name
66
+ self.segment_name = None
67
+
68
+ # --------------------------------------------------------------
69
+ # --------------------------------------------------------------
70
+ def set_message_style(self, message_style):
71
+ """
72
+ Setting message style
73
+ :param message_style: a string with placeholders
74
+ :return:
75
+ """
76
+ self.message_style = message_style
77
+
78
+ # --------------------------------------------------------------
79
+ # --------------------------------------------------------------
80
+ def set_involve_recsys_result(self, involve_recsys_result):
81
+ self.involve_recsys_result = involve_recsys_result
82
+
83
+ # --------------------------------------------------------------
84
+ # --------------------------------------------------------------
85
+ def set_recsys_contents(self, recsys_contents):
86
+ """
87
+ setting recsys contents -> content types that we want to include in recommendations. Default value will contain all.
88
+ :param recsys_contents:
89
+ :return:
90
+ """
91
+ self.recsys_contents = recsys_contents
92
+
93
+ # --------------------------------------------------------------
94
+ # --------------------------------------------------------------
95
+ def set_messaging_mode(self, messaging_mode):
96
+ """
97
+ setting the messaging mode -> [recsys_result, message(default), recommend_playlist, recommend_content]
98
+ :param messaging_mode:
99
+ :return:
100
+ """
101
+ valid_modes = ["recsys_result", "message", "recommend_playlist", "recommend_content"]
102
+ if messaging_mode in valid_modes:
103
+ self.messaging_mode = messaging_mode
104
+ else:
105
+ print(f"{messaging_mode} is not a valid messaging mode. available modes are: \n {valid_modes}")
106
+
107
+ # --------------------------------------------------------------
108
+ # --------------------------------------------------------------
109
+ def set_openai_api(self, openai_key):
110
+ """
111
+ Setting openai key
112
+ :param openai_key: a string with placeholders
113
+ :return:
114
+ """
115
+ self.api_key = openai_key
116
+
117
+ # --------------------------------------------------------------
118
+ # --------------------------------------------------------------
119
+ def set_number_of_samples(self, number_of_samples):
120
+ """
121
+ Setting number_of_samples to generate messages
122
+ :param number_of_samples: int
123
+ """
124
+ self.number_of_samples = int(number_of_samples)
125
+
126
+ # --------------------------------------------------------------
127
+ # --------------------------------------------------------------
128
+ def set_sample_example(self, sample_example):
129
+ """
130
+ Setting sample_example for one-shot prompting
131
+ :param sample_example: a string with placeholders
132
+ :return:
133
+ """
134
+ self.sample_example = sample_example
135
+
136
+ # --------------------------------------------------------------
137
+ # --------------------------------------------------------------
138
+ def set_CTA(self, CTA):
139
+ """
140
+ Setting CTA (call to action), the main goal that we are sending the message.
141
+ :param CTA: a string with placeholders
142
+ :return:
143
+ """
144
+ self.CTA = CTA
145
+
146
+ # --------------------------------------------------------------
147
+ # --------------------------------------------------------------
148
+ def set_segment_info(self, segment_info):
149
+ """
150
+ Setting segment_info: information that is common between all the users in the segment
151
+ :param segment_info: a string with placeholders
152
+ :return:
153
+ """
154
+ self.segment_info = segment_info
155
+
156
+ # --------------------------------------------------------------
157
+ # --------------------------------------------------------------
158
+ def set_additional_instructions(self, additional_instructions):
159
+ """
160
+ setting additional_instructions
161
+ :param additional_instructions:
162
+ :return:
163
+ """
164
+ self.additional_instructions = additional_instructions
165
+
166
+ # --------------------------------------------------------------
167
+ # --------------------------------------------------------------
168
+ def set_features_to_use(self, list_of_features):
169
+ """
170
+ Setting list_of_features to use in the message
171
+ :param list_of_features: list of features to user
172
+ :return:
173
+ """
174
+ self.list_of_features = list_of_features
175
+
176
+ # --------------------------------------------------------------
177
+ # --------------------------------------------------------------
178
+ def set_target_feature(self, target_content):
179
+ """
180
+ Setting the target feature when we want to recommend a content from the input data (it should be a content_id)
181
+ :return:
182
+ """
183
+ self.target_content = target_content
184
+
185
+ # --------------------------------------------------------------
186
+ # --------------------------------------------------------------
187
+ def set_number_of_messages(self, number_of_messages=1, instructionset=None):
188
+ """
189
+ If the number of messages is more than 1, we will set self.subsequence_messages to a dictionary where
190
+ the key is an integer from 1 to number_of_messages, and the values are corresponding instructions in instructionset.
191
+ :param number_of_messages: int
192
+ :param instructionset: list of instructions
193
+ :return:
194
+ """
195
+ if number_of_messages == 1:
196
+ self.subsequence_messages = {1: None}
197
+ else:
198
+ if instructionset is not None:
199
+ self.subsequence_messages = instructionset
200
+ else:
201
+ raise ValueError("Instructionset must have instructions for each subsequence message")
202
+
203
+ # --------------------------------------------------------------
204
+ # --------------------------------------------------------------
205
+
206
+ def get_instrument(self):
207
+ """
208
+ get the instrument name based on the brand
209
+ :return: instrument (str)
210
+ """
211
+ # map
212
+ switch_dict = {
213
+ "drumeo": "Drum",
214
+ "pianote": "Piano",
215
+ "guitareo": "Guitar",
216
+ "singeo": "Vocal"
217
+ }
218
+
219
+ return switch_dict[self.brand]
220
+
221
+ # --------------------------------------------------------------
222
+ # --------------------------------------------------------------
223
+ def respect_request_ratio(self):
224
+ """
225
+ sleeping for a while to respect request rate ratio
226
+ :return:
227
+ """
228
+ current_time = time.time()
229
+ delta = current_time - self.start_time
230
+
231
+ # Check token limits
232
+ if self.temp_token_counter > 3997000 and delta <= 60: # Using a safe margin
233
+ print("Sleeping for few seconds to respect the token limit...")
234
+ # reset the token counter
235
+ self.temp_token_counter = 0
236
+ self.start_time = time.time()
237
+ time.sleep(10) # Sleep for a while before making new requests
238
+
239
+ if delta > 60:
240
+ # reset the token counter
241
+ self.temp_token_counter = 0
242
+ self.start_time = time.time()
243
+
244
+ # --------------------------------------------------------------
245
+ # --------------------------------------------------------------
246
+
247
+ def checkpoint(self):
248
+ """
249
+ saving the current process
250
+ :return:
251
+ """
252
+
253
+ save_data = self.users_df[
254
+ ["user_id", "email", "first_name", "message", "additional_info", "recommendation_info"]]
255
+ save_data.to_csv(f"drumeo_not_active_segment.csv", encoding='utf-8-sig', index=False)
256
+
257
+ # --------------------------------------------------------------
258
+ # --------------------------------------------------------------
259
+ def set_segment_name(self, segment_name):
260
+ """
261
+ saving the current process
262
+ :return:
263
+ """
264
+
265
+ self.segment_name = segment_name
266
+
267
+
268
+
269
+
270
+
Messaging_system/DataCollector.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ setting instructions and inputs required to generate personalized messages
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+
8
+ class DataCollector:
9
+
10
+ def __init__(self, CoreConfig):
11
+
12
+ self.Core = CoreConfig
13
+
14
+ # -----------------------------------------------------------------
15
+ # -----------------------------------------------------------------
16
+ def gather_data(self):
17
+ """
18
+ main function of the class to flow the work for gathering all the data that we need.
19
+ :return: updated users_df with extracted information
20
+ """
21
+
22
+ # extract user_ids and other data
23
+ self.extract_musora_id()
24
+
25
+ # selecting a sample of users
26
+ self.select_sample()
27
+ self.fetch_data()
28
+
29
+ # calculate the remaining days to their birthday
30
+ self.remaining_days_to_birthday()
31
+ self.create_columns()
32
+
33
+ # creating additional info if applicable
34
+ if len(self.Core.additional_info_columns) != 0:
35
+ self.create_additional_information()
36
+
37
+ return self.Core
38
+
39
+ # -----------------------------------------------------------------
40
+ # -----------------------------------------------------------------
41
+ def extract_musora_id(self):
42
+ """
43
+ Extracts the musora user id and preserves additional columns.
44
+ """
45
+ self.Core.users_df.columns = self.Core.users_df.columns.str.lower()
46
+
47
+ # Define valid columns in order of preference.
48
+ valid_columns = ['user_id', 'musora_user_id', 'id', 'email']
49
+ # Find the first valid column present in the DataFrame.
50
+ id_col = next((col for col in valid_columns if col in self.Core.users_df.columns), None)
51
+
52
+ if id_col is None:
53
+ raise Exception("Input data must contain user_id, musora_user_id, id, or email column.")
54
+
55
+ # Normalize the identification column to 'user_id'
56
+ if id_col in ['musora_user_id', 'id']:
57
+ self.Core.users_df.rename(columns={id_col: 'user_id'}, inplace=True)
58
+ elif id_col == 'email':
59
+ self.Core._lookup_user_ids_from_email()
60
+
61
+ # Identify additional columns: exclude identification columns
62
+ identification_columns = {'user_id', 'email'} if 'email' in self.Core.users_df.columns else {'user_id'}
63
+ additional_columns = [col for col in self.Core.users_df.columns if col not in identification_columns]
64
+ self.Core.additional_info_columns = [col.lower() for col in additional_columns]
65
+
66
+ # -----------------------------------------------------------------
67
+ # -----------------------------------------------------------------
68
+ def _lookup_user_ids_from_email(self):
69
+ """
70
+ Looks up user IDs based on unique email addresses and merges the results
71
+ into self.users_df. Assumes self.users_df contains an 'email' column.
72
+ """
73
+ unique_emails = self.Core.users_df["email"].unique()
74
+ data = self.Core.SF.extract_id_from_email(emails=unique_emails)
75
+ self.Core.users_df = pd.merge(self.Core.users_df, data, on='email', how='left')
76
+
77
+ # -----------------------------------------------------------------
78
+ # -----------------------------------------------------------------
79
+ def remaining_days_to_birthday(self):
80
+ """
81
+ calculating the remaining days to the user's birthday
82
+ :return: updating users_df
83
+ """
84
+
85
+ # Iterate through each row in the DataFrame
86
+ for idx, row in self.Core.users_df.iterrows():
87
+ if pd.notna(row.get("birthday")):
88
+ if int(row["birthday_reminder"]) <= 7:
89
+ remaining_days = int(row["birthday_reminder"])
90
+ self.Core.users_df.at[idx, "birthday_reminder"] = f"{remaining_days} days until student's birthday"
91
+ else:
92
+ self.Core.users_df.at[idx, "birthday_reminder"] = None
93
+
94
+ # -----------------------------------------------------------------
95
+ # -----------------------------------------------------------------
96
+ def fetch_data(self):
97
+
98
+ # Fetch datasets
99
+ user_ids = self.Core.users_df["user_id"].unique()
100
+
101
+ users_data = self.Core.SF.get_data("users", user_ids)
102
+ interactions_data = self.Core.SF.get_data("interactions",user_ids)
103
+ recsys_data = self.Core.SF.get_data("recsys", user_ids)
104
+ contents_data = self.Core.SF.get_data("contents")
105
+ popular_contents_data = self.Core.SF.get_data("popular_contents")
106
+
107
+ self.Core.users_df["user_id"] = self.Core.users_df["user_id"].astype(int)
108
+ interactions_data["user_id"] = interactions_data["user_id"].astype(int)
109
+
110
+ # Merge additional user details into the base dataframe (self.users_df)
111
+ # Assuming self.users_df already exists and contains a "USER_ID" column
112
+ self.Core.users_df = self.Core.users_df.merge(users_data, on="user_id", how="left", suffixes=("", "_users"))
113
+ self.Core.users_df = self.Core.users_df.merge(interactions_data, on="user_id", how="left",
114
+ suffixes=("", "_interactions"))
115
+ self.Core.users_df = self.Core.users_df.merge(recsys_data, on="user_id", how="left", suffixes=("", "_recsys"))
116
+
117
+ for col in self.Core.users_df.columns:
118
+ # Replace additional empty representations with np.nan
119
+ self.Core.users_df[col] = self.Core.users_df[col].replace(['', 'None', 'nan'], np.nan)
120
+
121
+ # Now drop rows where 'permission' is missing
122
+ self.Core.users_df.dropna(subset=["permission"], inplace=True)
123
+
124
+ self.Core.content_info = contents_data
125
+ # self.content_info['content_info'] = self.content_info['content_info'].str.replace(chr(10), '\n')
126
+ self.Core.popular_contents_df = popular_contents_data
127
+
128
+ # -----------------------------------------------------------------
129
+ # -----------------------------------------------------------------
130
+ def create_columns(self):
131
+ """
132
+ Creating user profile based on available information, and adding additional columns for messages
133
+ :return: updates users_df
134
+ """
135
+
136
+ # adding new columns, initially with none values
137
+ # self.users_df["user_info"] = self.users_df["user_profile"] # represent users
138
+ self.Core.users_df["message"] = None # will contain the final message
139
+ self.Core.users_df["source"] = None # [AI-generated]
140
+ self.Core.users_df["prompt"] = None # will contain final prompt
141
+ self.Core.users_df["instrument"] = self.Core.get_instrument()
142
+ self.Core.users_df["platform"] = self.Core.platform
143
+ self.Core.users_df["segment_name"] = self.Core.segment_name
144
+
145
+ # -------------------------------------------------------------
146
+ # -------------------------------------------------------------
147
+ def create_additional_information(self):
148
+ """
149
+ providing additional input and instructions based on available columns in the input file
150
+ :return: instructions
151
+ """
152
+ self.Core.users_df["additional_info"] = None
153
+
154
+ # Iterate through each row in the DataFrame
155
+ for idx, row in self.Core.users_df.iterrows():
156
+ additional_info = []
157
+
158
+ # populating additional_info
159
+ for feature in self.Core.additional_info_columns:
160
+ value = row.get(feature)
161
+ if pd.notna(value) and value not in [None, [], {}] and (
162
+ not isinstance(value, str) or value.strip()):
163
+ additional_info.append(f"{feature}: {str(value)}")
164
+
165
+ self.Core.users_df.at[idx, "additional_info"] = "\n".join(additional_info)
166
+
167
+ # -----------------------------------------------------------------
168
+ # -----------------------------------------------------------------
169
+ def select_sample(self, sample_size=None):
170
+ """
171
+ Select a sample of the input users.
172
+ :param sample_size: Number of users to select (default to 20).
173
+ :return: DataFrame containing the selected sample.
174
+ """
175
+
176
+ # Use self.number_of_samples if sample_size is None, otherwise default to 20
177
+ if sample_size is None:
178
+ sample_size = self.Core.number_of_samples if self.Core.number_of_samples is not None else 20
179
+
180
+ total_users = self.Core.users_df.shape[0]
181
+ sample_size = min(total_users, sample_size)
182
+ self.Core.users_df = self.Core.users_df.sample(n=sample_size, replace=False)
Messaging_system/LLMR.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This class is a LLM based recommender that can choose the perfect content for the user given user profile and our goal
3
+
4
+ """
5
+ import json
6
+ import os
7
+ import pandas as pd
8
+ import openai
9
+ from openai import OpenAI
10
+ from dotenv import load_dotenv
11
+ import time
12
+ import streamlit as st
13
+ from tqdm import tqdm
14
+
15
+ load_dotenv()
16
+
17
+
18
+ # -----------------------------------------------------------------------
19
+ class LLMR:
20
+
21
+ def __init__(self, CoreConfig):
22
+
23
+ self.Core = CoreConfig
24
+ self.user = None
25
+ self.selected_content_ids = [] # will be populated for each user
26
+
27
+ def get_recommendations(self, progress_callback):
28
+ """
29
+ selecting the recommended content for each user
30
+ :return:
31
+ """
32
+
33
+ self.Core.users_df["recommendation"] = None
34
+ self.Core.users_df["recommendation_info"] = None
35
+ total_users = len(self.Core.users_df)
36
+
37
+ st.write("Choosing the best content to recommend ... ")
38
+
39
+ self.Core.start_time = time.time()
40
+ for progress, (idx, row) in enumerate(
41
+ tqdm(self.Core.users_df.iterrows(), desc="Selecting the best content to recommend ...")):
42
+ # if we have a prompt to generate a personalized message
43
+ # Update progress if callback is provided
44
+ if progress_callback is not None:
45
+ progress_callback(progress, total_users)
46
+
47
+ self.user = row
48
+ content_id, content_info, recsys_json, token = self._get_recommendation()
49
+
50
+ if content_id is None: # error in selecting a content to recommend
51
+ continue
52
+
53
+ else:
54
+ # updating tokens
55
+ self.Core.total_tokens['prompt_tokens'] += int(token['prompt_tokens'])
56
+ self.Core.total_tokens['completion_tokens'] += int(token['completion_tokens'])
57
+ self.Core.temp_token_counter = int(token['prompt_tokens']) + int(token['completion_tokens'])
58
+ self.Core.users_df.at[idx, "recommendation"] = content_id
59
+ self.Core.users_df.at[idx, "recommendation_info"] = content_info
60
+ self.Core.users_df.at[idx, "recsys_result"] = recsys_json
61
+ self.Core.respect_request_ratio()
62
+
63
+ return self.Core
64
+
65
+ # --------------------------------------------------------------
66
+ # --------------------------------------------------------------
67
+ def _get_recommendation(self):
68
+ """
69
+ select and return the recommendation from the available list of contents
70
+ :return: content_id
71
+ """
72
+
73
+ prompt, recsys_json = self._generate_prompt()
74
+ if prompt is None:
75
+ return None, None, None, None
76
+
77
+ else:
78
+ content_id, tokens = self.get_llm_response(prompt)
79
+ if content_id == 0:
80
+ # was not able to receive a recommendation
81
+ return None, None, None, None
82
+ else:
83
+ content_info = self._get_content_info(content_id)
84
+ return content_id, content_info, recsys_json, tokens
85
+
86
+ # --------------------------------------------------------------
87
+ # --------------------------------------------------------------
88
+
89
+ def _generate_prompt(self):
90
+ """
91
+ Generates the prompts for given user in order to choose the recommendation from the available list
92
+ :param user:
93
+ :return:
94
+ """
95
+ available_contents, recsys_json = self._get_available_contents()
96
+ if available_contents.strip() == "": # no item to recommend
97
+ return None
98
+
99
+ # Getting different part of the prompts
100
+ input_context = self._input_context()
101
+ user_info = self._get_user_profile()
102
+ task = self._task_instructions()
103
+ output_instruction = self._output_instruction()
104
+
105
+ prompt = f"""
106
+ ### Context:
107
+ {input_context}
108
+
109
+ ### User Information:
110
+ {user_info}
111
+
112
+ ### Available Contents:
113
+ {available_contents}
114
+
115
+ ### Main Task:
116
+ {task}
117
+
118
+ ### Output Instructions:
119
+ {output_instruction}
120
+ """
121
+
122
+ return prompt, recsys_json
123
+
124
+ # --------------------------------------------------------------
125
+ # --------------------------------------------------------------
126
+ def _input_context(self):
127
+ """
128
+ :return: input instructions as a string
129
+ """
130
+
131
+ context = f"""
132
+ You are a helpful assistant at Musora, an online music education platform that helps users learn music. Your goal is to choose a perfect content to recommend to the user given the information that we have from the user and available contents to recommend.
133
+ """
134
+
135
+ return context
136
+
137
+ # --------------------------------------------------------------
138
+ # --------------------------------------------------------------
139
+ def _system_instructions(self):
140
+ """
141
+ (Optional) A helper function that defines high-level system context for certain LLMs.
142
+ For example, if your LLM endpoint supports messages in the form of role='system'.
143
+ """
144
+ return (
145
+ "You are a helpful recommendation assistant at Musora, an online music education platform. "
146
+ "Use the provided user information and content details to choose the best content to recommend. "
147
+ "Make sure to follow the instructions precisely and only return the chosen content_id as JSON."
148
+ )
149
+
150
+ # --------------------------------------------------------------
151
+ # --------------------------------------------------------------
152
+ def _task_instructions(self):
153
+ """
154
+ creating the instructions about the task
155
+ :return: task
156
+ """
157
+
158
+ task = """
159
+ - You must select exactly ONE content from the 'Available Contents' to recommend.
160
+ - Base your decision on the User information and focus on providing the most relevant recommendation.
161
+ - Do not recommended content where the topic is focused on a specific Gear (e.g. YAMAHA)
162
+ - Provide the content_id of the recommended content in the output based on Output instructions.
163
+ """
164
+
165
+ return task
166
+
167
+ # --------------------------------------------------------------
168
+ # --------------------------------------------------------------
169
+ def _get_user_profile(self):
170
+ """
171
+ getting user's goal and user's last completed content to use for choosing the recommended content
172
+ :return:
173
+ """
174
+
175
+ last_completed_content = self._get_user_data(attribute="last_completed_content")
176
+ user_info = self._get_user_data(attribute="user_info")
177
+
178
+ recommendation_info = f"""
179
+ **User information and preferences:**
180
+
181
+ {user_info}
182
+
183
+ **Previous completed content:**
184
+ {last_completed_content}
185
+ """
186
+
187
+ return recommendation_info
188
+
189
+ # --------------------------------------------------------------
190
+ # --------------------------------------------------------------
191
+ def _get_user_data(self, attribute):
192
+ """
193
+ get user's information for the requested attribute
194
+ :param user:
195
+ :return: user_info
196
+ """
197
+
198
+ # Previous interaction
199
+ if pd.notna(self.user[attribute]) and self.user[attribute] not in [
200
+ None, [], {}] and (not isinstance(self.user[attribute], str) or self.user[attribute].strip()):
201
+ user_info = self.user[attribute]
202
+ else:
203
+ user_info = "Not Available"
204
+
205
+ return user_info
206
+
207
+ # --------------------------------------------------------------
208
+ # --------------------------------------------------------------
209
+
210
+ def _get_user_recommendation(self):
211
+
212
+ recsys_json = self.user["recsys_result"]
213
+
214
+ try:
215
+ recsys_data = json.loads(recsys_json)
216
+ # Sections to process
217
+ sections = self.Core.recsys_contents
218
+
219
+ # Check if none of the sections are present in recsys_data --> cold start scenario
220
+ if not any(section in recsys_data for section in sections):
221
+ popular_content = self.Core.popular_contents_df.iloc[0][f"popular_content"]
222
+ return popular_content
223
+ else:
224
+ return recsys_json
225
+ except:
226
+ popular_content = self.Core.popular_contents_df.iloc[0][f"popular_content"]
227
+ return popular_content
228
+
229
+
230
+ # --------------------------------------------------------------
231
+ # --------------------------------------------------------------
232
+ def _get_available_contents(self):
233
+
234
+ # Get the user ID
235
+ recsys_json = self._get_user_recommendation()
236
+ recsys_data = json.loads(recsys_json)
237
+
238
+ # Sections to process
239
+ sections = self.Core.recsys_contents
240
+
241
+ # Collect selected content_ids
242
+ selected_content_ids = []
243
+
244
+ for section in sections:
245
+ if section in recsys_data:
246
+ # Get the list of recommendations in this section
247
+ recs = recsys_data[section]
248
+ # Sort by recommendation_rank (ascending order)
249
+ recs_sorted = sorted(recs, key=lambda x: x['recommendation_rank'])
250
+ # Select top 3 recommendations
251
+ top_recs = recs_sorted[:3]
252
+ # Get the content_ids
253
+ content_ids = [rec['content_id'] for rec in top_recs]
254
+ # Append to the list
255
+ selected_content_ids.extend(content_ids)
256
+ # Fetch content info for the selected content_ids
257
+ content_info_rows = self.Core.content_info[self.Core.content_info['content_id'].isin(selected_content_ids)]
258
+
259
+ # Create a mapping from CONTENT_ID to CONTENT_INFO
260
+ content_info_map = dict(zip(content_info_rows['content_id'], content_info_rows['content_info']))
261
+
262
+ # Assemble the text in a structured way using a list
263
+ lines = []
264
+ for content_id in selected_content_ids:
265
+ # Retrieve the content_info (which may include multi-line text)
266
+ content_info = content_info_map.get(content_id, "No content info found")
267
+
268
+ # Append the structured lines without extra spaces
269
+ lines.append(f"**content_id**: {content_id}")
270
+ lines.append("**content_info**:")
271
+ lines.append(content_info) # this line may already contain internal newlines
272
+ lines.append("") # blank line for separation
273
+
274
+ # Join all lines into a single text string with newline characters
275
+ text = "\n".join(lines)
276
+
277
+ self.selected_content_ids = selected_content_ids
278
+
279
+ return text, recsys_json
280
+
281
+ # --------------------------------------------------------------
282
+ # --------------------------------------------------------------
283
+
284
+ def _get_content_info(self, content_id):
285
+ """
286
+ getting content_info for the recommended content
287
+ :param content_id:
288
+ :return:
289
+ """
290
+
291
+ content_info_row = self.Core.content_info[self.Core.content_info['content_id'] == content_id]
292
+ content_info = content_info_row['content_info'].iloc[0]
293
+
294
+ return content_info
295
+
296
+ # --------------------------------------------------------------
297
+ # --------------------------------------------------------------
298
+ def is_valid_content_id(self, content_id):
299
+ """
300
+ check if the llm respond is a valid content_id
301
+ :param content_id:
302
+ :return:
303
+ """
304
+
305
+ if content_id in self.selected_content_ids:
306
+ return True
307
+ else:
308
+ return False
309
+
310
+ # --------------------------------------------------------------
311
+ # --------------------------------------------------------------
312
+ def _output_instruction(self):
313
+ """
314
+ :return: output instructions as a string
315
+ """
316
+
317
+ instructions = f"""
318
+ Return the content_id of the final recommendation in **JSON** format with the following structure:
319
+
320
+ {{
321
+ "content_id": "content_id of the recommended content from Available Contents, as an integer",
322
+ }}
323
+
324
+ Do not include any additional keys or text outside the JSON.
325
+ """
326
+
327
+ return instructions
328
+
329
+ def get_llm_response(self, prompt, max_retries=4):
330
+ """
331
+ sending the prompt to the LLM and get back the response
332
+ """
333
+
334
+ openai.api_key = self.Core.api_key
335
+ instructions = self._system_instructions()
336
+ client = OpenAI(api_key=self.Core.api_key)
337
+
338
+ for attempt in range(max_retries):
339
+ try:
340
+ response = client.chat.completions.create(
341
+ model=self.Core.model,
342
+ response_format={"type": "json_object"},
343
+ messages=[
344
+ {"role": "system", "content": instructions},
345
+ {"role": "user", "content": prompt}
346
+ ],
347
+ max_tokens=20,
348
+ n=1,
349
+ temperature=0.7
350
+ )
351
+
352
+ tokens = {
353
+ 'prompt_tokens': response.usage.prompt_tokens,
354
+ 'completion_tokens': response.usage.completion_tokens,
355
+ 'total_tokens': response.usage.total_tokens
356
+ }
357
+
358
+ try:
359
+ content = response.choices[0].message.content
360
+
361
+ # Extract JSON code block
362
+
363
+ output = json.loads(content)
364
+
365
+ if 'content_id' in output and self.is_valid_content_id(int(output['content_id'])):
366
+ return int(output['content_id']), tokens
367
+
368
+ else:
369
+ print(f"'content_id' missing or invalid in response on attempt {attempt + 1}. Retrying...")
370
+ continue # Continue to next attempt
371
+
372
+ except json.JSONDecodeError:
373
+ print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
374
+
375
+ except openai.APIConnectionError as e:
376
+ print("The server could not be reached")
377
+ print(e.__cause__) # an underlying Exception, likely raised within httpx.
378
+ except openai.RateLimitError as e:
379
+ print("A 429 status code was received; we should back off a bit.")
380
+ except openai.APIStatusError as e:
381
+ print("Another non-200-range status code was received")
382
+ print(e.status_code)
383
+ print(e.response)
384
+
385
+ print("Max retries exceeded. Returning empty response.")
386
+ return 0, 0
Messaging_system/Message_generator.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ THis class will generate message or messages based on the number of requested.
3
+ """
4
+ import json
5
+ import time
6
+ from openai import OpenAI
7
+ from tqdm import tqdm
8
+ import streamlit as st
9
+
10
+ from Messaging_system.MultiMessage import MultiMessage
11
+ from Messaging_system.protection_layer import ProtectionLayer
12
+ import openai
13
+
14
+
15
+ class MessageGenerator:
16
+
17
+ def __init__(self, CoreConfig):
18
+ self.Core = CoreConfig
19
+
20
+ # --------------------------------------------------------------
21
+ # --------------------------------------------------------------
22
+ def generate_messages(self, progress_callback):
23
+ """
24
+ generating messages based on prompts for each user
25
+ :return: updating message column for each user
26
+ """
27
+
28
+ total_users = len(self.Core.users_df)
29
+ st.write("Generating messages ... ")
30
+
31
+ self.Core.start_time = time.time()
32
+ for progress, (idx, row) in enumerate(tqdm(self.Core.users_df.iterrows(), desc="generating messages")):
33
+ # if we have a prompt to generate a personalized message
34
+ # Update progress if callback is provided
35
+ if progress_callback is not None:
36
+ progress_callback(progress, total_users)
37
+
38
+ if row["prompt"] is not None:
39
+ first_message = self.get_llm_response(row["prompt"])
40
+
41
+ if first_message is not None:
42
+ # adding protection layer
43
+ protect = ProtectionLayer(config_file=self.Core.config_file,
44
+ messaging_mode=self.Core.messaging_mode)
45
+ message, total_tokens = protect.criticize(message=first_message, user=row)
46
+
47
+ # updating tokens
48
+ self.Core.total_tokens['prompt_tokens'] += total_tokens['prompt_tokens']
49
+ self.Core.total_tokens['completion_tokens'] += total_tokens['completion_tokens']
50
+ self.Core.temp_token_counter += total_tokens['prompt_tokens'] + total_tokens['completion_tokens']
51
+
52
+ # double check output structure
53
+ if isinstance(message, dict) and "message" in message and isinstance(message["message"], str):
54
+ # parsing output result
55
+ message = self.parsing_output_message(message, row)
56
+ self.Core.users_df.at[idx, "message"] = message
57
+ row["message"] = message
58
+ else:
59
+ self.Core.users_df.at[idx, "message"] = None
60
+ self.Core.checkpoint()
61
+ self.Core.respect_request_ratio()
62
+ else:
63
+ self.Core.users_df.at[idx, "message"] = None
64
+
65
+ # generating subsequence messages if needed:
66
+ if isinstance(self.Core.subsequence_messages, dict) and len(self.Core.subsequence_messages.keys()) > 1 and \
67
+ self.Core.users_df.at[idx, "message"] is not None and row["message"] is not None:
68
+ MM = MultiMessage(self.Core)
69
+ message = MM.generate_multi_messages(row)
70
+ self.Core.users_df.at[idx, "message"] = message
71
+
72
+ else:
73
+ # ---------------------------------------------------------
74
+ # SINGLE-MESSAGE path
75
+ # ---------------------------------------------------------
76
+ single_msg = row["message"] or self.Core.users_df.at[idx, "message"]
77
+ if single_msg is not None:
78
+ # If the single message is still a JSON string, turn it into a dict first
79
+ if isinstance(single_msg, str):
80
+ try:
81
+ single_msg = json.loads(single_msg)
82
+ except json.JSONDecodeError:
83
+ # leave it as-is if it’s not valid JSON
84
+ pass
85
+
86
+ msg_wrapper = {"messages_sequence": [single_msg]}
87
+ # Again, store a proper JSON string
88
+ self.Core.users_df.at[idx, "message"] = json.dumps(msg_wrapper,
89
+ ensure_ascii=False)
90
+
91
+ else:
92
+ self.Core.users_df.at[idx, "message"] = None
93
+
94
+ return self.Core
95
+
96
+ # --------------------------------------------------------------
97
+ # --------------------------------------------------------------
98
+ def parsing_output_message(self, message, user):
99
+ """
100
+ Parses the output JSON from the LLM and enriches it with additional content information if needed.
101
+
102
+ :param message: Output JSON from LLM (expected to have at least a "message" key)
103
+ :param user: The user row
104
+ :return: Parsed and enriched output as a JSON object
105
+ """
106
+ if self.Core.involve_recsys_result:
107
+ output_message = self.fetch_recommendation_data(user, message)
108
+ elif self.Core.messaging_mode == "recommend_playlist":
109
+ # adding playlist url to the message
110
+ if "playlist_id" in message and "message" in message:
111
+ playlist_id = str(message["playlist_id"])
112
+ web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
113
+ # Add these to the message dict
114
+ output_message = {
115
+ "header": message["header"],
116
+ "message": message["message"],
117
+ "playlist_id": int(message["playlist_id"]),
118
+ "web_url_path": web_url_path,
119
+ }
120
+
121
+ else:
122
+ # Only "message" is expected when involve_recsys_result is False and we are not recommending any other content from input
123
+ if "message" not in message or "header" not in message:
124
+ print("LLM output is missing 'message'.")
125
+ return None
126
+ output_message = {"header": message["header"], "message": message["message"]}
127
+
128
+ return json.dumps(output_message, ensure_ascii=False)
129
+
130
+ # --------------------------------------------------------------
131
+ # --------------------------------------------------------------
132
+ def fetch_recommendation_data(self, user, message):
133
+
134
+ user_id = user["user_id"]
135
+ content_id = int(user["recommendation"])
136
+ recsys_json_str = user["recsys_result"]
137
+ recsys_data = json.loads(recsys_json_str)
138
+
139
+ # Initialize variables to store found item and category
140
+ found_item = None
141
+
142
+ # Search through all categories in the recsys data
143
+ for category, items in recsys_data.items():
144
+ for item in items:
145
+ if item.get("content_id") == content_id:
146
+ found_item = item
147
+ break # Exit inner loop if item is found
148
+ if found_item:
149
+ break # Exit outer loop if item is found
150
+
151
+ if not found_item:
152
+ print(f"content_id {content_id} not found in recsys_data for user_id {user_id}.")
153
+ return None
154
+
155
+ # Extract required fields from found_item
156
+ web_url_path = found_item.get("web_url_path")
157
+ title = found_item.get("title")
158
+ thumbnail_url = found_item.get("thumbnail_url")
159
+
160
+ message["message"].replace('\\', '').replace('"', '')
161
+
162
+ # Add these to the message dict
163
+ output_message = {
164
+ "header": message.get("header"),
165
+ "message": message.get("message"),
166
+ "content_id": content_id,
167
+ "web_url_path": web_url_path,
168
+ "title": title,
169
+ "thumbnail_url": thumbnail_url
170
+ }
171
+ return output_message
172
+
173
+ # --------------------------------------------------------------
174
+ # --------------------------------------------------------------
175
+
176
+ def get_llm_response(self, prompt, max_retries=4):
177
+ """
178
+ sending the prompt to the LLM and get back the response
179
+ """
180
+
181
+ openai.api_key = self.Core.api_key
182
+ instructions = self.llm_instructions()
183
+ client = OpenAI(api_key=self.Core.api_key)
184
+
185
+ for attempt in range(max_retries):
186
+ try:
187
+ response = client.chat.completions.create(
188
+ model=self.Core.model,
189
+ response_format={"type": "json_object"},
190
+ messages=[
191
+ {"role": "system", "content": instructions},
192
+ {"role": "user", "content": prompt}
193
+ ],
194
+ max_tokens=500,
195
+ n=1,
196
+ temperature=0.6
197
+ )
198
+
199
+ tokens = {
200
+ 'prompt_tokens': response.usage.prompt_tokens,
201
+ 'completion_tokens': response.usage.completion_tokens,
202
+ 'total_tokens': response.usage.total_tokens
203
+ }
204
+
205
+ try:
206
+ content = response.choices[0].message.content
207
+
208
+ # Extract JSON code block
209
+
210
+ output = json.loads(content)
211
+ # output = json.loads(response.choices[0].message.content)
212
+
213
+ if 'message' not in output or 'header' not in output:
214
+ print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
215
+ continue # Continue to next attempt
216
+
217
+ else:
218
+ if len(output["header"].strip()) > self.Core.config_file["header_limit"] or len(
219
+ output["message"].strip()) > self.Core.config_file["message_limit"]:
220
+ print(
221
+ f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
222
+ continue
223
+
224
+ # validating the JSON
225
+ self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
226
+ self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
227
+ self.Core.temp_token_counter += tokens['total_tokens']
228
+ return output
229
+
230
+ except json.JSONDecodeError:
231
+ print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
232
+
233
+ except openai.APIConnectionError as e:
234
+ print("The server could not be reached")
235
+ print(e.__cause__) # an underlying Exception, likely raised within httpx.
236
+ except openai.RateLimitError as e:
237
+ print("A 429 status code was received; we should back off a bit.")
238
+ except openai.APIStatusError as e:
239
+ print("Another non-200-range status code was received")
240
+ print(e.status_code)
241
+ print(e.response)
242
+
243
+ print("Max retries exceeded. Returning empty response.")
244
+ return None
245
+
246
+ # --------------------------------------------------------------
247
+ # --------------------------------------------------------------
248
+ def llm_instructions(self):
249
+ """
250
+ Setting instructions for llm
251
+ :return: instructions as string
252
+ """
253
+
254
+ # set LLM initial instruction
255
+ instructions = """You are an AI assistant that receives information of a music student and generate personalized
256
+ motivation message. """
257
+
258
+ return instructions
Messaging_system/MultiMessage.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+ from openai import OpenAI
4
+ from Messaging_system.protection_layer import ProtectionLayer
5
+ import openai
6
+
7
+ class MultiMessage:
8
+ def __init__(self, CoreConfig):
9
+ """
10
+ Class that generates a sequence of messages (multi-step push notifications)
11
+ for each user, building on previously generated messages.
12
+ """
13
+ self.Core = CoreConfig
14
+
15
+ # --------------------------------------------------------------
16
+ def generate_multi_messages(self, user):
17
+ """
18
+ Generates multiple messages per user, storing them in a single JSON structure.
19
+ The first message is assumed to already exist in user["message"].
20
+ Subsequent messages are generated by referencing all previously generated ones.
21
+
22
+ :param user: A row (dictionary-like) containing user data and the first message.
23
+ :return: JSON string containing the entire sequence of messages
24
+ (or None if something goes wrong).
25
+ """
26
+ # 1) Get the first message if it exists
27
+ first_message_str = user.get("message", None)
28
+ if not first_message_str:
29
+ print("No initial message found; cannot build a multi-message sequence.")
30
+ return None
31
+
32
+ # Parse the first message as JSON
33
+ try:
34
+ first_message_dict = json.loads(first_message_str)
35
+ except (json.JSONDecodeError, TypeError):
36
+ print("Could not parse the first message as JSON. Returning None.")
37
+ return None
38
+
39
+ # Start our sequence with the first message
40
+ message_sequence = [first_message_dict]
41
+
42
+ # We'll reuse the same ProtectionLayer
43
+ protect = ProtectionLayer(
44
+ config_file=self.Core.config_file,
45
+ messaging_mode=self.Core.messaging_mode
46
+ )
47
+
48
+ # If user requested multiple messages, generate the rest
49
+ # number_of_messages is the *total* number of messages requested
50
+ total_to_generate = len(self.Core.subsequence_messages.keys())
51
+
52
+ # Already have the first message, so generate the next (n-1) messages
53
+ for step in range(2, total_to_generate + 1):
54
+ # 2) Generate the next message referencing all so-far messages
55
+ next_msg_raw = self.generate_next_messages(message_sequence, step)
56
+ if not next_msg_raw:
57
+ print(f"Could not generate the message for step {step}. Stopping.")
58
+ break
59
+
60
+ # 3) Pass it through the protection layer
61
+ criticized_msg, tokens_used = protect.criticize(
62
+ message=next_msg_raw,
63
+ user=user
64
+ )
65
+
66
+ # Update token usage stats
67
+ self.Core.total_tokens['prompt_tokens'] += tokens_used['prompt_tokens']
68
+ self.Core.total_tokens['completion_tokens'] += tokens_used['completion_tokens']
69
+ self.Core.temp_token_counter += tokens_used['prompt_tokens'] + tokens_used['completion_tokens']
70
+
71
+ # 4) Parse & validate the next message (we do the same as the single-message pipeline)
72
+ parsed_output_str = self.parsing_output_message(criticized_msg, user)
73
+ if not parsed_output_str:
74
+ print(f"Parsing output failed for step {step}. Stopping.")
75
+ break
76
+
77
+ try:
78
+ parsed_output_dict = json.loads(parsed_output_str)
79
+ except json.JSONDecodeError:
80
+ print(f"Could not parse the new message as JSON for step {step}. Stopping.")
81
+ break
82
+
83
+ # Add this next message to our sequence
84
+ message_sequence.append(parsed_output_dict)
85
+
86
+ # 5) Return the entire sequence so it can be stored back in the DataFrame or elsewhere
87
+ final_structure = {"messages_sequence": message_sequence}
88
+ return json.dumps(final_structure, ensure_ascii=False)
89
+
90
+ # --------------------------------------------------------------
91
+ def generate_next_messages(self, previous_messages, step):
92
+ """
93
+ Uses all previously generated messages to produce the next message.
94
+ Returns a *raw* dictionary (header, message, etc.) from the LLM.
95
+
96
+ :param previous_messages: A list of dicts, each containing at least "header" and "message".
97
+ :return: A dictionary from LLM (with 'header' and 'message'), or None if generation fails.
98
+ """
99
+ # 1) Build a prompt that includes all previous messages
100
+ prompt = self.generate_prompt(previous_messages, step)
101
+ # 2) Call our existing LLM routine (identical to the one in MessageGenerator)
102
+ response_dict = self.get_llm_response(prompt)
103
+ return response_dict
104
+
105
+ # --------------------------------------------------------------
106
+ def generate_prompt(self, previous_messages, step):
107
+ """
108
+ Creates a prompt to feed to the LLM, incorporating all previously generated messages.
109
+
110
+ :param previous_messages: A list of dicts, each containing 'header' and 'message'.
111
+ :return: A user-facing prompt string instructing the model to produce a new message.
112
+ """
113
+ # Build a textual summary of previous messages
114
+ previous_text = []
115
+ for i, m in enumerate(previous_messages, start=1):
116
+ header = m.get("header", "").strip()
117
+ body = m.get("message", "").strip()
118
+ previous_text.append(f"Message {i}: (Header) {header}\n (Body) {body}")
119
+
120
+ # Combine into a single string
121
+ previous_text_str = "\n\n".join(previous_text)
122
+
123
+ # Provide constraints for our next push notification
124
+ header_limit = self.Core.config_file.get("header_limit", 50)
125
+ message_limit = self.Core.config_file.get("message_limit", 200)
126
+
127
+ # Craft the prompt
128
+ prompt = f"""
129
+ We have previously sent these push notifications to the user:
130
+ {previous_text_str}
131
+
132
+ The user has still not re-engaged. Generate the *next* push notification to motivate the user
133
+ to return and continue their music learning.
134
+
135
+ Constraints:
136
+ - "header" must be fewer than {header_limit} characters.
137
+ - "message" must be fewer than {message_limit} characters.
138
+ - Output must be valid JSON with exactly two keys: "header" and "message".
139
+ - Do NOT repeat the exact same wording as prior messages; keep the same overall style.
140
+ - The user is a music student who hasn't been active recently.
141
+
142
+ Tune:
143
+ - {self.Core.subsequence_messages[step]}
144
+
145
+ Return only JSON of the form:
146
+ {{
147
+ "header": "...",
148
+ "message": "..."
149
+ }}
150
+ """.strip()
151
+
152
+ return prompt
153
+
154
+ # --------------------------------------------------------------
155
+ def parsing_output_message(self, message, user):
156
+ """
157
+ Parses the output JSON from the LLM and enriches it with additional content
158
+ information if needed (e.g., from recsys). Re-uses the logic from the single-message
159
+ pipeline to keep the results consistent.
160
+
161
+ :param message: Output JSON *dictionary* from the LLM (with at least "message" and "header").
162
+ :param user: The user row dictionary.
163
+ :return: A valid JSON string or None if the structure is invalid.
164
+ """
165
+ if self.Core.involve_recsys_result:
166
+ # If recsys is used, fetch recommendation data
167
+ output_message = self.fetch_recommendation_data(user, message)
168
+ elif self.Core.messaging_mode == "recommend_playlist":
169
+ # If recommending a playlist, add the relevant fields
170
+ if "playlist_id" in message and "message" in message:
171
+ playlist_id = str(message["playlist_id"])
172
+ web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
173
+ output_message = {
174
+ "header": message.get("header", ""),
175
+ "message": message.get("message", ""),
176
+ "playlist_id": int(message["playlist_id"]),
177
+ "web_url_path": web_url_path,
178
+ }
179
+ else:
180
+ print("LLM output is missing either 'playlist_id' or 'message'.")
181
+ return None
182
+ else:
183
+ # Basic scenario: Only 'header' and 'message' expected
184
+ if "message" not in message or "header" not in message:
185
+ print("LLM output is missing 'header' or 'message'.")
186
+ return None
187
+ output_message = {
188
+ "header": message["header"],
189
+ "message": message["message"]
190
+ }
191
+
192
+ return json.dumps(output_message, ensure_ascii=False)
193
+
194
+ # --------------------------------------------------------------
195
+ def fetch_recommendation_data(self, user, message):
196
+ """
197
+ Extracts recommendation data from user's recsys_result and merges it into the given
198
+ message dictionary. Identical to single-message usage.
199
+
200
+ :param user: The user row (with 'recsys_result', 'recommendation', etc.).
201
+ :param message: Dictionary with at least "header" and "message".
202
+ :return: Enriched dict (header, message, content_id, web_url_path, title, thumbnail_url)
203
+ """
204
+ user_id = user["user_id"]
205
+ content_id = int(user["recommendation"])
206
+ recsys_json_str = user["recsys_result"]
207
+ recsys_data = json.loads(recsys_json_str)
208
+
209
+ # Initialize variable to store found item
210
+ found_item = None
211
+ for category, items in recsys_data.items():
212
+ for item in items:
213
+ if item.get("content_id") == content_id:
214
+ found_item = item
215
+ break
216
+ if found_item:
217
+ break
218
+
219
+ if not found_item:
220
+ print(f"content_id {content_id} not found in recsys_data for user_id {user_id}.")
221
+ return None
222
+
223
+ web_url_path = found_item.get("web_url_path")
224
+ title = found_item.get("title")
225
+ thumbnail_url = found_item.get("thumbnail_url")
226
+
227
+ # Construct final dictionary
228
+ output_message = {
229
+ "header": message.get("header"),
230
+ "message": message.get("message", "").replace('\\', '').replace('"', ''),
231
+ "content_id": content_id,
232
+ "web_url_path": web_url_path,
233
+ "title": title,
234
+ "thumbnail_url": thumbnail_url
235
+ }
236
+ return output_message
237
+
238
+ # --------------------------------------------------------------
239
+ def get_llm_response(self, prompt, max_retries=4):
240
+ """
241
+ Calls the LLM (similar to MessageGenerator) with the prompt, returning a dict
242
+ with keys like 'header' and 'message' if successful, or None otherwise.
243
+
244
+ :param prompt: The text prompt for the LLM.
245
+ :param max_retries: Number of retries for potential LLM/connection failures.
246
+ :return: Dictionary with 'header' and 'message', or None if unsuccessful.
247
+ """
248
+ openai.api_key = self.Core.api_key
249
+ instructions = self.llm_instructions()
250
+ client = OpenAI(api_key=self.Core.api_key)
251
+
252
+ for attempt in range(max_retries):
253
+ try:
254
+ response = client.chat.completions.create(
255
+ model=self.Core.model,
256
+ response_format={"type": "json_object"},
257
+ messages=[
258
+ {"role": "system", "content": instructions},
259
+ {"role": "user", "content": prompt}
260
+ ],
261
+ max_tokens=500,
262
+ n=1,
263
+ temperature=0.6
264
+ )
265
+
266
+ tokens = {
267
+ 'prompt_tokens': response.usage.prompt_tokens,
268
+ 'completion_tokens': response.usage.completion_tokens,
269
+ 'total_tokens': response.usage.total_tokens
270
+ }
271
+
272
+ try:
273
+ content = response.choices[0].message.content
274
+ output = json.loads(content)
275
+
276
+ # Validate output keys
277
+ if 'message' not in output or 'header' not in output:
278
+ print(f"'message' or 'header' missing in response (attempt {attempt+1}). Retrying...")
279
+ continue
280
+
281
+ # Check character length constraints
282
+ if (len(output["header"].strip()) > self.Core.config_file["header_limit"] or
283
+ len(output["message"].strip()) > self.Core.config_file["message_limit"]):
284
+ print(f"Header or message exceeded character limits (attempt {attempt+1}). Retrying...")
285
+ continue
286
+
287
+ # If we're good here, update token usage
288
+ self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
289
+ self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
290
+ self.Core.temp_token_counter += tokens['total_tokens']
291
+
292
+ return output
293
+
294
+ except json.JSONDecodeError:
295
+ print(f"Invalid JSON from LLM (attempt {attempt+1}). Retrying...")
296
+
297
+ except openai.APIConnectionError as e:
298
+ print("The server could not be reached")
299
+ print(e.__cause__)
300
+ except openai.RateLimitError as e:
301
+ print("Received a 429 status code; backing off might be needed.")
302
+ except openai.APIStatusError as e:
303
+ print("A non-200 status code was received")
304
+ print(e.status_code)
305
+ print(e.response)
306
+
307
+ print("Max retries exceeded. Returning None.")
308
+ return None
309
+
310
+ # --------------------------------------------------------------
311
+ def llm_instructions(self):
312
+ """
313
+ System instructions for the LLM, focusing on generating motivational messages
314
+ for a returning music student. Extended or adapted as needed.
315
+
316
+ :return: A string with top-level instructions for the model.
317
+ """
318
+ instructions = """
319
+ You are an AI assistant helping to create push notification messages for a music student
320
+ who has not been active recently. Each new message should build on previously sent
321
+ messages. Provide short, motivational text that encourages the user to come back.
322
+ Ensure the final output is valid JSON with keys "header" and "message."
323
+ """.strip()
324
+ return instructions
Messaging_system/Permes.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ the flow of the Program starts from create_personalized_message function
3
+ """
4
+
5
+
6
+ import time
7
+ from tqdm import tqdm
8
+ from Messaging_system.DataCollector import DataCollector
9
+ from Messaging_system.CoreConfig import CoreConfig
10
+ from Messaging_system.LLMR import LLMR
11
+ import streamlit as st
12
+ from Messaging_system.Message_generator import MessageGenerator
13
+ from Messaging_system.PromptGenerator import PromptGenerator
14
+ from Messaging_system.SnowFlakeConnection import SnowFlakeConn
15
+
16
+
17
+
18
+ class Permes:
19
+ """
20
+ LLM-based personalized message generator:
21
+ """
22
+
23
+ def create_personalize_messages(self, session, users, brand, config_file, openai_api_key, CTA, segment_info,
24
+ platform="push", number_of_messages=1, instructionset=None,
25
+ message_style=None, selected_input_features=None, selected_source_features=None
26
+ , recsys_contents=None,
27
+ additional_instructions=None, identifier_column="user_id",
28
+ sample_example=None, number_of_samples=None, involve_recsys_result=False,
29
+ messaging_mode="message", target_column=None, ongoing_df=None,
30
+ progress_callback=None, segment_name="no_recent_activity"):
31
+ """
32
+ creating personalized messages for the input users given the parameters for both app and push platform.
33
+ :param session: snowflake connection object
34
+ :param users: users dataframe
35
+ :param brand
36
+ :param config_file
37
+ :param openai_api_key
38
+ :param CTA: call to action for the messages
39
+ :param segment_info: common information about the users
40
+ :param message_style: style of the message
41
+ :param sample_example: a sample for one shot prompting
42
+ :return:
43
+ """
44
+
45
+ # primary processing
46
+ users = self.identify_users(users_df=users, identifier_column=identifier_column)
47
+
48
+ if selected_input_features is None:
49
+ selected_input_features = []
50
+ selected_input_features.append(identifier_column)
51
+ else:
52
+ if identifier_column not in selected_input_features and selected_input_features is not None:
53
+ selected_input_features.append(identifier_column.upper())
54
+ users = users[selected_input_features]
55
+
56
+ personalize_message = CoreConfig(session=session,
57
+ users_df=users,
58
+ brand=brand,
59
+ platform=platform,
60
+ config_file=config_file)
61
+
62
+ personalize_message.set_CTA(CTA)
63
+ personalize_message.set_segment_info(segment_info)
64
+ personalize_message.set_openai_api(openai_api_key)
65
+ personalize_message.set_segment_name(segment_name=segment_name)
66
+ personalize_message.set_number_of_messages(number_of_messages=number_of_messages, instructionset=instructionset)
67
+
68
+ if message_style: # Check if message_style is not empty
69
+ personalize_message.set_message_style(message_style)
70
+
71
+ if sample_example: # Check if sample_example is not empty
72
+ personalize_message.set_sample_example(sample_example)
73
+
74
+ if additional_instructions:
75
+ personalize_message.set_additional_instructions(additional_instructions)
76
+
77
+ if number_of_samples:
78
+ personalize_message.set_number_of_samples(number_of_samples)
79
+
80
+ if selected_source_features:
81
+ personalize_message.set_features_to_use(selected_source_features)
82
+
83
+ if involve_recsys_result:
84
+ personalize_message.set_messaging_mode("recsys_result")
85
+ personalize_message.set_involve_recsys_result(involve_recsys_result)
86
+
87
+ if target_column:
88
+ personalize_message.set_target_feature(target_column)
89
+
90
+ if messaging_mode != "message":
91
+ personalize_message.set_messaging_mode(messaging_mode)
92
+
93
+ if recsys_contents:
94
+ personalize_message.set_recsys_contents(recsys_contents)
95
+
96
+ users_df = self._create_personalized_message(CoreConfig=personalize_message, progress_callback=progress_callback)
97
+
98
+ total_prompt_tokens = personalize_message.total_tokens["prompt_tokens"]
99
+ total_completion_tokens = personalize_message.total_tokens["completion_tokens"]
100
+
101
+ total_cost = ((total_prompt_tokens / 1000000) * 0.15) + (
102
+ (total_completion_tokens / 1000000) * 0.6) # Cost calculation estimation
103
+ print(f"Estimated Cost (USD): {total_cost:.5f}")
104
+
105
+ # Storing process can also happen after some evaluation steps
106
+ # snowflake_conn = SnowFlakeConn(session=session, brand=brand)
107
+ # query = snowflake_conn.generate_write_sql_query(table_name="AI_generated_messages", dataframe=users_df)
108
+ # snowflake_conn.run_write_query(query=query, table_name="AI_generated_messages", dataframe=users_df)
109
+ # snowflake_conn.close_connection()
110
+
111
+ return users_df
112
+
113
+ # -----------------------------------------------------
114
+ def identify_users(self, users_df, identifier_column):
115
+ """
116
+ specifying the users for identification
117
+ :param identifier_column:
118
+ :return: updated users
119
+ """
120
+
121
+ if identifier_column.upper() == "EMAIL":
122
+ return users_df
123
+ else:
124
+ users_df.rename(columns={identifier_column: "USER_ID"}, inplace=True)
125
+ return users_df
126
+
127
+ # ------------------------------------------------------------------
128
+ def _create_personalized_message(self, CoreConfig, progress_callback):
129
+ """
130
+ main function of the class to flow the work between functions inorder to create personalized messages.
131
+ :return: updated users_df with extracted information and personalize messages.
132
+ """
133
+ # Collecting all the data that we need to personalize messages
134
+ datacollect = DataCollector(CoreConfig)
135
+ CoreConfig = datacollect.gather_data()
136
+
137
+ # generating recommendations for users, if we want to include recommendations in the message
138
+ if CoreConfig.involve_recsys_result:
139
+ Recommender = LLMR(CoreConfig)
140
+ CoreConfig = Recommender.get_recommendations(progress_callback)
141
+
142
+ # generating proper prompt for each user
143
+ prompt = PromptGenerator(CoreConfig)
144
+ CoreConfig = prompt.generate_prompts()
145
+
146
+ # generating messages for each user
147
+ message_generator = MessageGenerator(CoreConfig)
148
+ CoreConfig = message_generator.generate_messages(progress_callback)
149
+
150
+ # Eliminating rows where we don't have a valid message (null, empty, or whitespace only)
151
+ CoreConfig.users_df = CoreConfig.users_df[CoreConfig.users_df["message"].str.strip().astype(bool)]
152
+ CoreConfig.checkpoint()
153
+
154
+ # closing snowflake connection
155
+ # CoreConfig.session.close()
156
+
157
+ return CoreConfig.users_df
158
+
159
+
160
+
Messaging_system/PromptGenerator.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ THis class generate proper prompts for the messaging system
3
+ """
4
+ import pandas as pd
5
+ from tqdm import tqdm
6
+
7
+
8
+ class PromptGenerator:
9
+
10
+ def __init__(self, Core):
11
+ self.Core = Core
12
+
13
+ # --------------------------------------------------------------
14
+ # --------------------------------------------------------------
15
+ def generate_prompts(self):
16
+ """
17
+ generates a personalized message for each student
18
+ :return:
19
+ """
20
+
21
+ # if we have personalized information about them, we generate a personalized prompt
22
+ for idx, row in tqdm(self.Core.users_df.iterrows(), desc="generating prompts"):
23
+ # check if we have enough information to generate a personalized message
24
+ prompt = self.generate_personalized_prompt(user=row)
25
+ # message = self.call_llm(prompt)
26
+ self.Core.users_df.at[idx, "prompt"] = prompt
27
+ self.Core.users_df.at[idx, "source"] = "AI-generated"
28
+
29
+ return self.Core
30
+
31
+ # --------------------------------------------------------------
32
+ def safe_get(self, value):
33
+ return str(value) if pd.notna(value) else "Not available"
34
+
35
+ # ==============================================================
36
+ def get_user_profile(self, user):
37
+
38
+ additional_info = self.user_additional_info(user)
39
+
40
+ user_info = f"""
41
+ ### **User Information:**
42
+
43
+ Here is the information about the user:
44
+ {self.safe_get(self.Core.segment_info)}
45
+
46
+ **User profile:**
47
+ first name: {self.safe_get(user.get("first_name"))}
48
+ {self.safe_get(user.get("user_info"))}
49
+ last completed content: {self.safe_get(user.get("last_completed_content"))}
50
+ {self.safe_get(additional_info)}
51
+ Weeks since Last interaction:{self.safe_get(user.get("weeks_since_last_interaction"))}
52
+ """
53
+
54
+ return user_info
55
+
56
+ # --------------------------------------------------------------
57
+ def generate_personalized_prompt(self, user):
58
+ """
59
+ generate a personalized prompt by putting the information from the user into a template prompt
60
+ :return: Personalized prompt (string)
61
+ """
62
+ context = self.input_context()
63
+ cta = self.CTA_instructions()
64
+
65
+ if self.Core.involve_recsys_result or self.Core.target_content is not None:
66
+ if user["recommendation"] is not None or user["recommendation_info"] is not None:
67
+ recommendations_instructions = self.recommendations_instructions(user=user) + "\n"
68
+ else:
69
+ recommendations_instructions = ""
70
+
71
+ user_info = self.get_user_profile(user=user)
72
+
73
+ personalize_message_instructions = self.personalize_message_instructions(user)
74
+
75
+ output_instructions = self.output_instruction()
76
+
77
+ task_instructions = self.task_instructions()
78
+
79
+ prompt = f"""
80
+ {context}
81
+ {cta}
82
+
83
+ {personalize_message_instructions}
84
+ {recommendations_instructions}
85
+ {task_instructions}
86
+
87
+ {user_info}
88
+ {output_instructions}
89
+ """
90
+
91
+ return prompt
92
+
93
+ # --------------------------------------------------------------
94
+ # --------------------------------------------------------------
95
+ def input_context(self):
96
+ """
97
+ :return: input instructions as a string
98
+ """
99
+
100
+ context = f""" You are a helpful assistant at Musora, an online music education platform that helps users
101
+ learn music. Your goal is to generate a fully personalized message specifically tailored to the user, to increase
102
+ their engagement with the message.
103
+
104
+ """
105
+
106
+ return context
107
+
108
+ # --------------------------------------------------------------
109
+ # --------------------------------------------------------------
110
+ def CTA_instructions(self):
111
+ """
112
+ define CTA instructions
113
+ :return: CTA instructions (str)
114
+ """
115
+
116
+ instructions = f"""
117
+ Create a clear header, and a message considering the call to action we want the user to hear from us:
118
+
119
+ **Call to Action:**
120
+ - **{self.Core.CTA}** \n
121
+ """
122
+
123
+ return instructions
124
+
125
+ # --------------------------------------------------------------
126
+ # --------------------------------------------------------------
127
+ def user_additional_info(self, user):
128
+ """
129
+ providing additional information given in the input data
130
+ :param user:
131
+ :return:
132
+ """
133
+
134
+ if pd.notna(user["additional_info"]) and user["additional_info"] not in [None, [], {}] and (
135
+ not isinstance(user["additional_info"], str) or user["additional_info"].strip()):
136
+ additional_info = user["additional_info"]
137
+ else:
138
+ additional_info = ""
139
+
140
+ return additional_info
141
+
142
+ # --------------------------------------------------------------
143
+ # --------------------------------------------------------------
144
+ def recommendations_instructions(self, user):
145
+ """
146
+ instructions about target recommendation for the user
147
+ :param user:
148
+ :return:
149
+ """
150
+
151
+ instructions_for_recsys = f"""
152
+ ### ** Recommendations instructions **:
153
+ Below is the content that we want to recommend to the user:
154
+
155
+ Recommended content: {user["recommendation_info"]}
156
+
157
+ - Use the **CONTENT_TITLE** naturally in the message if capable, but do not use the exact title verbatim or put it in quotes.
158
+ - Naturally mention the **CONTENT_TYPE** for course, workout, and quicktips if capable.
159
+ - If the recommended content has an **Artist** with a known full name, use the ** FULL NAME ** naturally in the message if capable. If only the first name of the Artist is available, ** DO NOT ** use it at all.
160
+ """
161
+
162
+ # need to adjust
163
+ instructions_for_target_content = """
164
+ - Considering the information about the user, and the content that we want to recommend, include the **TITLE** inside single quotes, or use the title naturally without the exact title name and quotes if capable.
165
+ Naturally mention the **CONTENT_TYPE** for course, workout, quicktips if capable and shortly provide a reasoning why the content is helpful for them.
166
+
167
+ **Target recommended Content**:
168
+ """
169
+
170
+ instructions = ""
171
+
172
+ if self.Core.involve_recsys_result:
173
+ instructions += f"""
174
+ {instructions_for_recsys}
175
+ """
176
+
177
+ elif self.Core.target_content is not None:
178
+ # fetching the information related to the target content from content_table
179
+ target_info = self.get_target_content_info(user)
180
+ instructions += f"""
181
+ {instructions_for_target_content}
182
+ {target_info}
183
+ """
184
+
185
+ return instructions
186
+
187
+ # --------------------------------------------------------------
188
+ # --------------------------------------------------------------
189
+ def get_target_content_info(self, user):
190
+ """
191
+ fetching information about the target content that we want to recommend to the user
192
+ :param user: target user
193
+ :return:
194
+ """
195
+
196
+ # checking that user[self.target_content] contains a content_id:
197
+ target_id = int(user[self.Core.target_content])
198
+
199
+ try:
200
+
201
+ # fetching the data for target content (self.target_content column in user)
202
+ content_info_row = self.Core.content_info.loc[self.Core.content_info['content_id'] == target_id]
203
+
204
+ text = f"""
205
+ **content_id** : {str(content_info_row["content_id"])}"
206
+ **content_info** : \n {content_info_row["content_info"]} \n\n"
207
+ """
208
+ return text
209
+ except:
210
+ print(f"Target content cannot be found in the content database: content_id = {target_id}")
211
+
212
+ # --------------------------------------------------------------
213
+ # --------------------------------------------------------------
214
+ def personalize_message_instructions(self, user):
215
+ """
216
+ :return: personalized message instructions as a string
217
+ """
218
+
219
+ general_instructions = self.message_type_instructions()
220
+
221
+ instructions = """
222
+ ### ** Personalized Message Specifications **
223
+
224
+ Based on the available information about the user, create a personalized message for the user:
225
+ \n
226
+ """
227
+
228
+ # Name
229
+ if "first_name" in self.Core.list_of_features and pd.notna(user["first_name"]) and user["first_name"] not in [
230
+ None,
231
+ [],
232
+ {}] and (
233
+ not isinstance(user["first_name"], str) or user["first_name"].strip()):
234
+ instructions += f"""
235
+ - Address the user by their first name (only first letter capital) to make the message more personal. \n
236
+ """
237
+ else:
238
+ instructions += """
239
+ - If the user's name is not available or invalid (e.g. email), proceed without addressing them by name. \n
240
+ """
241
+
242
+ # Birthday reminder
243
+ if "birthday_reminder" in self.Core.list_of_features and pd.notna(user["birthday_reminder"]) and user[
244
+ "birthday_reminder"] not in [None, [], {}] and (
245
+ not isinstance(user["birthday_reminder"], str) or user["birthday_reminder"].strip()):
246
+ instructions += """
247
+ - **Include a short message to remind them that their birthday is coming up.** \n
248
+
249
+ """
250
+
251
+ # Additional instructions for input columns
252
+ if self.Core.additional_instructions is not None or str(self.Core.additional_instructions).strip() != '':
253
+ instructions += str(self.Core.additional_instructions)
254
+
255
+ instructions += self.fire_wall() + "\n"
256
+
257
+ final_instructions = f"""
258
+ {general_instructions}
259
+
260
+ {instructions}
261
+
262
+ """
263
+
264
+ return final_instructions
265
+
266
+ # --------------------------------------------------------------
267
+ # --------------------------------------------------------------
268
+
269
+ def message_type_instructions(self):
270
+ """
271
+ create a proper instruction for the message type, regarding the input platform
272
+ :return: message instructions as a string
273
+ """
274
+
275
+ instructions = ""
276
+ message_style = self.message_style_instructions()
277
+
278
+ if self.Core.platform == "push":
279
+ instructions = f"""
280
+ ### ** General Specifications: **
281
+
282
+ - The message is a **mobile push notification**.
283
+ - Make all parts of the message highly **personalized**, **eye-catching**, and **bring curiosity**
284
+ - ** Keep the First sentence as "header": short and less than 30 character **.
285
+ - ** For the "header", Use a space following with a proper emoji at the end (e.g. Great work John 😍) **
286
+ - Use drum emoji or general music emojis (e.g. 🥁, 🎶, 🎵), and Other emojis that relate to motivation, progress, inspiration, and create curiosity can also be used (like 🔥, 🚀, 💪, 🎉, 👀)
287
+ - **Keep the "message" concise and under 100 characters**.
288
+ - Every word should contribute to maximizing impact and engagement, so start directly with the message content without greetings or closing phrases.
289
+ - Avoid using same or similar words so close together in "message" and "header", and make sure there is no grammar problem.
290
+ - ****.
291
+ {message_style}
292
+
293
+ """
294
+
295
+
296
+ elif self.Core.platform == "app":
297
+ instructions = f"""
298
+ Message Specifications:
299
+ - The message is an **in app notification**.
300
+ - ** Keep the First sentence as "header" that should be a short personalized eye catching sentence less than 40 character **.
301
+ - ** For the "header", don't use exclamation mark at the end, instead, use a space following with a proper emoji at the end of the "header" (e.g. Great work John 😍) **
302
+ - **Keep the message concise and straightforward**.
303
+ - **Start directly with the message content**; do not include greetings (e.g., "Hello") or closing phrases.
304
+ - Make the message highly **personalized** and **eye-catching**.
305
+ - "Personalized" means the user should feel the message is specifically crafted for them and not generic.
306
+ - **Every word should contribute to maximizing impact and engagement**.
307
+ - {message_style}
308
+ """
309
+
310
+ return instructions
311
+
312
+ # --------------------------------------------------------------
313
+ # --------------------------------------------------------------
314
+ def message_style_instructions(self):
315
+ """
316
+ defines the style of the message: e.g. friendly, kind, tone, etc.
317
+ :return: style_instructions(str)
318
+ """
319
+
320
+ if self.Core.message_style is None and self.Core.sample_example is None:
321
+ message_style = f"""
322
+ - Keep the tone **kind**, **friendly causal**, and **encouraging**.
323
+ """
324
+
325
+ else:
326
+ message_style = f"""
327
+ - {self.Core.message_style}.
328
+ """
329
+
330
+ return message_style
331
+
332
+ # --------------------------------------------------------------
333
+ # --------------------------------------------------------------
334
+ def fire_wall(self):
335
+ """
336
+ Provide explicit instructions to ensure that sensitive information is not included in the generated message.
337
+ :return: string
338
+ """
339
+ fire_wall = f"""
340
+ ### Restrictions:
341
+
342
+ - **Do not include** any personal sensitive or confidential information.
343
+ - **Avoid AI Jargon:** Skip overused phrases like: {self.Core.config_file["AI_Jargon"]}.
344
+ """
345
+ return fire_wall
346
+
347
+ # --------------------------------------------------------------
348
+ # --------------------------------------------------------------
349
+ def output_instruction(self):
350
+ """
351
+ :return: output instructions as a string
352
+ """
353
+
354
+ example_output = self.example_output()
355
+ general_instructions = """
356
+ - The "header" must be less than 30 character.
357
+ - The "message" must be less than 100 character.
358
+ - Do not include any links in the message.
359
+ - Preserve special characters and emojis in the message.
360
+ - Ensure that the output is a valid JSON.
361
+ - Do not include any text outside the JSON code block.
362
+ """
363
+
364
+ instructions = f"""
365
+ Your response should be in JSON format with the following structure:
366
+
367
+ {{
368
+ "header": "Generated title",
369
+ "message": "Generated message",
370
+ }}
371
+
372
+ {general_instructions}
373
+ """
374
+
375
+ output_instructions = f"""
376
+ ### **Output instructions**:
377
+
378
+ {example_output}
379
+ {instructions}
380
+ """
381
+
382
+ return output_instructions
383
+
384
+ # --------------------------------------------------------------
385
+ # --------------------------------------------------------------
386
+ def example_output(self):
387
+ """
388
+ returns an example output (1-shot) to guide the LLM
389
+ :return: example output
390
+ """
391
+
392
+ if self.Core.sample_example is None:
393
+
394
+ return ""
395
+
396
+ else:
397
+ # one shot prompting
398
+ example = f"""
399
+ Based on the examples below, create a header and message that follows the same style, tone, characteristic, and creativity.
400
+
401
+ ### **Examples:**
402
+ {self.Core.sample_example}
403
+ """
404
+
405
+ return example
406
+
407
+ # --------------------------------------------------------------
408
+ # --------------------------------------------------------------
409
+
410
+ def task_instructions(self):
411
+ """
412
+ creating instructions for specifying the tasks
413
+ :return:
414
+ """
415
+
416
+ if self.Core.involve_recsys_result:
417
+ recsys_task = """
418
+ - Create a perfect message and the header following the instructions, using the user's information and the content that we want to recommend.
419
+ - Use the instructions to include the recommended content in the message.
420
+ - Follow the instructions to create the messages.
421
+ """
422
+ else:
423
+ recsys_task = ""
424
+
425
+ message_task = """
426
+ - Create a perfect personalized message considering the information and instructions mentioned. Your output format should be based on **Output instructions**."""
427
+
428
+ instructions = f"""
429
+ ### Tasks:
430
+ {recsys_task}
431
+ {message_task}
432
+ """
433
+
434
+ return instructions
Messaging_system/SnowFlakeConnection.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This class create a connection to Snowflake, run queries (read and write)
3
+ """
4
+ import json
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from snowflake.snowpark import Session
9
+ from sympy.strategies.branch import condition
10
+
11
+
12
+ class SnowFlakeConn:
13
+ def __init__(self, session, brand):
14
+ self. session = session
15
+ self.brand = brand
16
+
17
+ self.final_columns = ['user_id', "email", "user_info", "permission", "expiration_date", "recsys_result", "message", "brand", "recommendation", "segment_name", "timestamp"]
18
+
19
+ # ---------------------------------------------------------------
20
+ # ---------------------------------------------------------------
21
+ def run_read_query(self, query, data):
22
+ """
23
+ Executes a SQL query on Snowflake that fetch the data
24
+ :return: Pandas dataframe containing the query results
25
+ """
26
+
27
+ # Connect to Snowflake
28
+ try:
29
+ dataframe = self.session.sql(query).to_pandas()
30
+ dataframe.columns = dataframe.columns.str.lower()
31
+ print(f"reading {data} table successfully")
32
+ return dataframe
33
+ except Exception as e:
34
+ print(f"Error in creating/updating table: {e}")
35
+
36
+ # ---------------------------------------------------------------
37
+ # ---------------------------------------------------------------
38
+ def is_json_parsed_to_collection(self, s):
39
+ try:
40
+ parsed = json.loads(s)
41
+ return isinstance(parsed, (dict, list))
42
+ except:
43
+ return False
44
+ # ---------------------------------------------------------------
45
+ # ---------------------------------------------------------------
46
+ def store_df_to_snowflake(self, table_name, dataframe, database="ONLINE_RECSYS", schema="GENERATED_DATA"):
47
+ """
48
+ Executes a SQL query on Snowflake that write the preprocessed data on new tables
49
+ :param query: SQL query string to be executed
50
+ :return: None
51
+ """
52
+
53
+ try:
54
+ self.session.use_database(database)
55
+ self.session.use_schema(schema)
56
+
57
+ dataframe = dataframe.reset_index(drop=True)
58
+ dataframe.columns = dataframe.columns.str.upper()
59
+
60
+ self.session.write_pandas(df=dataframe,
61
+ table_name=table_name.strip().upper(),
62
+ auto_create_table=True,
63
+ overwrite=True,
64
+ use_logical_type=True)
65
+ print(f"Data inserted into {table_name} successfully.")
66
+
67
+ except Exception as e:
68
+ print(f"Error in creating/updating/inserting table: {e}")
69
+
70
+ # ---------------------------------------------------------------
71
+ # ---------------------------------------------------------------
72
+ def get_data(self, data, list_of_ids=None):
73
+ """
74
+ valid Data is = {users, contents, interactions, recsys, popular_contents}
75
+ :param data:
76
+ :return:
77
+ """
78
+ valid_data = {'users', 'contents', 'interactions', 'recsys', 'popular_contents'}
79
+
80
+ if data not in valid_data:
81
+ raise ValueError(f"Invalid data type: {data}")
82
+
83
+ # Construct the method name based on the input
84
+ method_name = f"_get_{data}"
85
+
86
+ # Retrieve the method dynamically
87
+ method = getattr(self, method_name, None)
88
+ if method is None:
89
+ raise NotImplementedError(f"The method {method_name} is not implemented.")
90
+
91
+ query = method(list_of_ids)
92
+ data = self.run_read_query(query, data)
93
+
94
+ return data
95
+ # ---------------------------------------------------------------
96
+ # ---------------------------------------------------------------
97
+ def _get_contents(self, list_of_ids=None):
98
+ query = f"""
99
+ select CONTENT_ID, CONTENT_TYPE, CONTENT_PROFILE as content_info, CONTENT_PROFILE_VECTOR
100
+ from ONLINE_RECSYS.VECTOR_DB.VECTORIZED_CONTENT
101
+ where BRAND = '{self.brand}'
102
+ """
103
+ return query
104
+ # ---------------------------------------------------------------
105
+ # ---------------------------------------------------------------
106
+ def _get_users(self, list_of_ids=None):
107
+
108
+ if list_of_ids is not None:
109
+ ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
110
+ condition = f"AND USER_ID in {ids_str}"
111
+ else :
112
+ condition = ""
113
+
114
+ query = f"""
115
+ select USER_ID, BRAND, FIRST_NAME, BIRTHDAY, TIMEZONE, EMAIL, CURRENT_TIMESTAMP() AS TIMESTAMP, DIFFICULTY, SELF_REPORT_DIFFICULTY, USER_PROFILE as user_info, PERMISSION, EXPIRATION_DATE,
116
+ DATEDIFF(
117
+ day,
118
+ CURRENT_DATE(),
119
+ CASE
120
+ WHEN DATE_FROM_PARTS(YEAR(CURRENT_DATE()), EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY)) < CURRENT_DATE()
121
+ THEN DATE_FROM_PARTS(YEAR(CURRENT_DATE()) + 1, EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY))
122
+ ELSE DATE_FROM_PARTS(YEAR(CURRENT_DATE()), EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY))
123
+ END) AS birthday_reminder
124
+ from ONLINE_RECSYS.PREPROCESSED.USERS
125
+ where BRAND = '{self.brand}' {condition}
126
+ """
127
+ return query
128
+ # ---------------------------------------------------------------
129
+ # ---------------------------------------------------------------
130
+ def _get_interactions(self, list_of_ids=None):
131
+
132
+ if list_of_ids is not None:
133
+ ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
134
+ condition = f"AND USER_ID in {ids_str}"
135
+ else :
136
+ condition = ""
137
+
138
+ query = f"""
139
+ WITH latest_interactions AS(
140
+ SELECT
141
+ USER_ID, CONTENT_ID, CONTENT_TYPE, EVENT_TEXT, TIMESTAMP,
142
+ ROW_NUMBER() OVER(PARTITION BY USER_ID ORDER BY TIMESTAMP DESC) AS rn
143
+ FROM ONLINE_RECSYS.PREPROCESSED.RECSYS_INTEACTIONS
144
+ WHERE BRAND = '{self.brand}' AND EVENT_TEXT IN('Video Completed', 'Video Playing') {condition})
145
+
146
+ SELECT i.USER_ID, i.CONTENT_ID, i.CONTENT_TYPE, c.content_profile as last_completed_content, i.EVENT_TEXT, i.TIMESTAMP, DATEDIFF('week', i.TIMESTAMP, CURRENT_TIMESTAMP) AS weeks_since_last_interaction
147
+ FROM latest_interactions i
148
+ LEFT JOIN
149
+ ONLINE_RECSYS.VECTOR_DB.VECTORIZED_CONTENT c ON c.CONTENT_ID = i.CONTENT_ID
150
+ WHERE rn = 1;
151
+ """
152
+ return query
153
+ # ---------------------------------------------------------------
154
+ # ---------------------------------------------------------------
155
+ def _get_recsys(self, list_of_ids=None):
156
+
157
+ if list_of_ids is not None:
158
+ ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
159
+ condition = f"WHERE USER_ID in {ids_str}"
160
+ else :
161
+ condition = ""
162
+
163
+ recsys_col = f"{self.brand}_recsys_v2"
164
+ query = f"""
165
+ select USER_ID, {recsys_col} as recsys_result
166
+ from RECSYS_V2.RECSYS_V2_CIO.RECSYS_V2_CUSTOMER_IO
167
+ {condition}
168
+ """
169
+ return query
170
+ # ---------------------------------------------------------------
171
+ # ---------------------------------------------------------------
172
+ def _get_popular_contents(self, list_of_ids=None):
173
+
174
+ query = f"""
175
+ select POPULAR_CONTENT
176
+ from RECSYS_V2.RECSYS_V2_CIO.POPULAR_CONTENT_CUSTOMER_IO
177
+ where brand = '{self.brand.lower()}'
178
+ """
179
+
180
+ return query
181
+ # ---------------------------------------------------------------
182
+ # ---------------------------------------------------------------
183
+ def extract_id_from_email(self, emails):
184
+ """
185
+ extracting user_ids from emails
186
+ :param unique_emails:
187
+ :return:
188
+ """
189
+
190
+ email_list_str = ', '.join(f"'{email}'" for email in emails)
191
+ query = f"""
192
+ SELECT id as USER_ID, email as EMAIL
193
+ FROM STITCH.MUSORA_ECOM_DB.USORA_USERS
194
+ WHERE email IN ({email_list_str})
195
+ """
196
+
197
+ user_ids_df = self.run_read_query(query, data="User_ids")
198
+ return user_ids_df
199
+ # ---------------------------------------------------------------
200
+ # ---------------------------------------------------------------
201
+
202
+ def adjust_dataframe(self, dataframe):
203
+ """
204
+ Filter dataframe to only include the columns in self.final_columns.
205
+ Add any missing columns with None values.
206
+ Ensure the final order is consistent with self.final_columns.
207
+ """
208
+ # Work with a copy so that we don't modify the original input
209
+ final_df = dataframe.copy()
210
+
211
+ # Normalize column names to lower-case for matching (if needed)
212
+ final_df.columns = final_df.columns.str.lower()
213
+ expected_cols = [col.lower() for col in self.final_columns]
214
+
215
+ # Keep only those columns in the expected list
216
+ available = [col for col in final_df.columns if col in expected_cols]
217
+ final_df = final_df[available]
218
+
219
+ # Add missing columns with None values
220
+ for col in expected_cols:
221
+ if col not in final_df.columns:
222
+ final_df[col] = None
223
+
224
+ # Reorder the columns to the desired order
225
+ final_df = final_df[expected_cols]
226
+
227
+ # If you need the column names to match exactly what self.final_columns provides (case-sensitive),
228
+ # you can rename them accordingly.
229
+ rename_mapping = {col.lower(): col for col in self.final_columns}
230
+ final_df.rename(columns=rename_mapping, inplace=True)
231
+
232
+ return final_df
233
+
234
+ # ---------------------------------------------------------------
235
+ # ---------------------------------------------------------------
236
+ def close_connection(self):
237
+ self.session.close()
Messaging_system/StoreLayer.py ADDED
File without changes
Messaging_system/context_validator.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+ import openai
4
+ from openai import OpenAI
5
+ from tqdm import tqdm
6
+
7
+
8
+
9
+ class Validator:
10
+ """
11
+ LLM-based personalized message generator:
12
+ """
13
+
14
+ def __init__(self, api_key):
15
+
16
+ # will be set by the user
17
+ self.validator_instructions = None
18
+ self.api_key = api_key
19
+ self.model = "gpt-4o-mini"
20
+
21
+
22
+ # to trace the number of tokens and estimate the cost if needed
23
+ self.temp_token_counter = 0
24
+ self.total_tokens = {
25
+ 'prompt_tokens': 0,
26
+ 'completion_tokens': 0,
27
+ }
28
+
29
+ # -------------------------------------------------------------------
30
+ def set_openai_api(self, openai_key):
31
+ """
32
+ Setting template with placeholders manually connection
33
+ :param template: a string with placeholders
34
+ :return:
35
+ """
36
+ self.api_key = openai_key
37
+
38
+ # -------------------------------------------------------------------
39
+ def context_prompt(self):
40
+
41
+ instructions = """
42
+ You are a text moderator and you should parse the input text. based on below instructions. you should decide if
43
+ the input text is a valid input or not.
44
+ """
45
+ return instructions
46
+
47
+ # -------------------------------------------------------------------
48
+ def initial_prompt(self):
49
+
50
+ instructions = """You are a helpful assistant at Musora, an online music education platform that helps users
51
+ learn music. Our students will provide user-generated-context such as comments and forums on engaging musical
52
+ contents like songs, lessons, workouts or other type of musical and educational content. Your task is
53
+ to determine if the input text provided by our student is a valid text or not.
54
+
55
+ """
56
+ return instructions
57
+
58
+ # -------------------------------------------------------------------
59
+ def set_validator_instructions(self, valid_instructions="", invalid_instructions=""):
60
+
61
+ instructions = f"""
62
+ ** The text is INValid if it falls into any of the below criteria **:
63
+
64
+ {invalid_instructions}
65
+ {self.fire_wall()}
66
+ --------------------------
67
+
68
+ Please ensure that the text meets the following criteria to be considered **valid**:
69
+
70
+ {valid_instructions}
71
+ {self.default_valid_text()}
72
+ """
73
+
74
+ self.validator_instructions = instructions
75
+
76
+ # -------------------------------------------------------------------
77
+ def output_instruction(self):
78
+ """
79
+ :return: output instructions as a string
80
+ """
81
+
82
+ output_instructions = """
83
+ ** Task: **
84
+ - **Based on the input text, the music educational nature of our contents, and instructions about validating the student's input, check if the text is a valid input or not.**
85
+ - **Your output should be strictly "True" if it is a Valid text, or "False" if it not a valid text.**
86
+ - **You should provide the output in JSON format where the key is "valid"** - **Do not include any text outside the JSON code block**.
87
+
88
+ Your response should be in JSON format with the following structure:
89
+
90
+ example of a VALID text:
91
+
92
+ {
93
+ "valid": "True",
94
+ }
95
+
96
+ Example of an INVALID text:
97
+
98
+ {
99
+ "valid": "False",
100
+ }
101
+ """
102
+ return output_instructions
103
+
104
+ # -------------------------------------------------------------------
105
+ def get_llm_response(self, prompt, max_retries=3):
106
+ """
107
+ sending the prompt to the LLM and get back the response
108
+ """
109
+
110
+ openai.api_key = self.api_key
111
+ instructions = self.context_prompt()
112
+ client = OpenAI(api_key=self.api_key)
113
+
114
+ for attempt in range(max_retries):
115
+ try:
116
+ response = client.chat.completions.create(
117
+ model=self.model,
118
+ response_format={"type": "json_object"},
119
+ messages=[
120
+ {"role": "system", "content": instructions},
121
+ {"role": "user", "content": prompt}
122
+ ],
123
+ max_tokens=500,
124
+ n=1,
125
+ temperature=0.7
126
+ )
127
+
128
+ tokens = {
129
+ 'prompt_tokens': response.usage.prompt_tokens,
130
+ 'completion_tokens': response.usage.completion_tokens,
131
+ 'total_tokens': response.usage.total_tokens
132
+ }
133
+
134
+ try:
135
+ content = response.choices[0].message.content
136
+ # Extract JSON code block
137
+ output = json.loads(content)
138
+
139
+ if 'valid' not in output:
140
+ print(f"'valid' key is missing in response on attempt {attempt + 1}. Retrying...")
141
+ continue # Continue to next attempt
142
+
143
+ else:
144
+ if output["valid"] not in ["True", "False"]:
145
+ print(f"True or False value missing in response on attempt {attempt + 1}. Retrying...")
146
+ continue
147
+
148
+ # validating the JSON
149
+ self.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
150
+ self.total_tokens['completion_tokens'] += tokens['completion_tokens']
151
+ self.temp_token_counter += tokens['prompt_tokens'] + tokens['completion_tokens']
152
+ return output
153
+
154
+ except json.JSONDecodeError:
155
+ print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
156
+
157
+ except openai.APIConnectionError as e:
158
+ print("The server could not be reached")
159
+ print(e.__cause__) # an underlying Exception, likely raised within httpx.
160
+ except openai.RateLimitError as e:
161
+ print("A 429 status code was received; we should back off a bit.")
162
+ except openai.APIStatusError as e:
163
+ print("Another non-200-range status code was received")
164
+ print(e.status_code)
165
+ print(e.response)
166
+
167
+ print("Max retries exceeded. Returning empty response.")
168
+ return [], {}
169
+
170
+ # -------------------------------------------------------------------
171
+ def create_validation_prompt(self, input_text):
172
+
173
+ """
174
+ creating the proper prompt and instructions around the input text
175
+ :param input_text:
176
+ :return:
177
+ """
178
+
179
+ prompt = f"""
180
+ {self.initial_prompt()}
181
+
182
+ **Input text provided by the Student:**
183
+ {input_text}
184
+
185
+ {self.validator_instructions}
186
+ {self.output_instruction()}
187
+ """
188
+
189
+ return prompt
190
+
191
+ # -------------------------------------------------------------------
192
+ def validate_dataframe(self, dataframe, target_column, progress_callback=None):
193
+ """
194
+ generating the prompt for every user based on their text input, generating the results (True or False),
195
+ updating and returning the input dataframe. :return:
196
+ """
197
+ dataframe["valid"] = None
198
+ start_time = time.time()
199
+ total_users = len(dataframe)
200
+
201
+ for progress, (idx, row) in enumerate(tqdm(dataframe.iterrows(), desc="generating prompts")):
202
+
203
+ if progress_callback is not None:
204
+ progress_callback(progress, total_users)
205
+ input_text = row[target_column]
206
+ prompt = self.create_validation_prompt(input_text)
207
+ response = self.get_llm_response(prompt)
208
+ dataframe.at[idx, "valid"] = response["valid"]
209
+
210
+ current_time = time.time()
211
+ delta = current_time - start_time
212
+
213
+ # Check token limits
214
+ if self.temp_token_counter > 195000 and delta >= 60: # Using a safe margin
215
+ print("Sleeping for 60 seconds to respect the token limit...")
216
+ # reset the token counter
217
+ self.temp_token_counter = 0
218
+ start_time = time.time()
219
+ time.sleep(60) # Sleep for a minute before making new requests
220
+
221
+ return dataframe
222
+
223
+ # -------------------------------------------------------------------
224
+ def validate_text(self, text):
225
+ """
226
+ generating the prompt for every user based on their text input, generating the results (True or False),
227
+ updating and returning the input dataframe. :return:
228
+ """
229
+
230
+ prompt = self.create_validation_prompt(text)
231
+ response = self.get_llm_response(prompt)
232
+ return response["valid"]
233
+
234
+ # -------------------------------------------------------------------
235
+ def fire_wall(self):
236
+ """
237
+ Provide explicit instructions to ensure that sensitive or inappropriate information is identified in the text.
238
+ :return: string
239
+ """
240
+ fire_wall = """
241
+ As a content moderator, please review the text and ensure it does not contain any of the following:
242
+
243
+ **Disallowed Content Categories:**
244
+
245
+ 1. **Sensitive Personal Information**: personal data such as phone numbers, email addresses, or other identifying information.
246
+
247
+ 2. **Offensive or Discriminatory Language**: Hate speech, harassment, bullying, or any derogatory remarks targeting individuals or groups based on race, ethnicity, nationality, religion, gender, sexual orientation, age, disability, or any other characteristic.
248
+
249
+ 3. **Sensitive Topics**: Content that discusses or promotes extremist views, political propaganda, or divisive religious beliefs in a manner that could incite hostility.
250
+
251
+ 4. **Removed or Restricted Content**: Mentions of songs, media, or features that have been removed or are restricted on our platform.
252
+
253
+ 5. **Technical Issues or Bugs**: Any references to glitches, errors, crashes, or other technical problems experienced on the platform.
254
+
255
+ 6. ** Language that is excessively angry, aggressive, or includes profanity or vulgar expressions. **
256
+
257
+ 7. **Privacy Violations**: Sharing of confidential information or content that infringes on someone's privacy rights.
258
+
259
+ 8. **Intellectual Property Violations**: Unauthorized use or distribution of copyrighted material.
260
+
261
+ 9. **Defamation**: False statements presented as facts that harm the reputation of an individual or organization.
262
+
263
+ **Examples of Invalid Content:**
264
+
265
+ - "This app is useless and the developers are idiots!"
266
+ - "They removed my favorite song; it sucks"
267
+ - "People who follow [specific religion] are all wrong and should be banned."
268
+
269
+ If the text contains any of the above issues, please flag it as invalid.
270
+
271
+ """
272
+ return fire_wall
273
+
274
+ # -------------------------------------------------------------------
275
+ def default_valid_text(self):
276
+ """
277
+ Provide explicit instructions to ensure that the text is appropriate and meets the content guidelines.
278
+ :return: string
279
+ """
280
+ valid_text = """
281
+
282
+ **Allowed Content Criteria:**
283
+
284
+ 1. **Positive Sentiment**: The text should be encouraging, uplifting, or convey a positive emotion.
285
+
286
+ 2. **Constructive and Helpful**: Provides valuable insights, advice, or shares personal experiences that could
287
+ benefit others. This can be sharing struggling in practices, challenges or other type of difficulties that might need our attention.
288
+
289
+ 3. **Respectful Language**: Uses polite and appropriate language, fostering a friendly and inclusive community environment.
290
+
291
+ **Examples of Valid Content:**
292
+
293
+ - "I love how this app helps me discover new music every day!"
294
+ - "Here's a tip: creating themed playlists can really enhance your listening experience."
295
+ - "I had a great time using this feature during my commute today."
296
+ - "This session is so challenging for me and I'm feeling so much pain in my foot, might go over the workout couple more"
297
+ """
298
+ return valid_text
299
+
300
+
301
+
302
+
Messaging_system/protection_layer.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ protection layer on top of the messaging system to make sure the messages are as expected.
3
+ """
4
+
5
+ import json
6
+ import os
7
+ import openai
8
+ from openai import OpenAI
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+
12
+
13
+ # -----------------------------------------------------------------------
14
+
15
+ class ProtectionLayer:
16
+ """
17
+ Protection layer to double check the generated message:
18
+ """
19
+
20
+ def __init__(self, config_file, messaging_mode):
21
+
22
+ self.config_file = config_file
23
+ self.messaging_mode = messaging_mode
24
+
25
+ # LLM configs
26
+ self.api_key = os.environ.get("OPENAI_API") # will be set by user
27
+ self.model = "gpt-4o-mini" # will be set by user
28
+
29
+ # to trace the number of tokens and estimate the cost if needed
30
+ self.total_tokens = {
31
+ 'prompt_tokens': 0,
32
+ 'completion_tokens': 0,
33
+ }
34
+
35
+ # --------------------------------------------------------------
36
+ def llm_instructions(self):
37
+ """
38
+ Setting instructions for the LLM for the second pass.
39
+ """
40
+ instructions = (
41
+ "You are a protection layer AI. Your task is to check the given JSON message "
42
+ "against specific rules. If it violates any rule, fix only those errors. If "
43
+ "it does not violate any rule, return it exactly as is. You must respond ONLY "
44
+ "with valid JSON in the specified structure, and no additional text."
45
+ )
46
+ return instructions
47
+
48
+ # --------------------------------------------------------------
49
+ def get_general_rules(self):
50
+ """
51
+ Core rules to apply when checking or modifying the message.
52
+ """
53
+ return f"""
54
+ 1. No two consecutive sentences should end with exclamation points, change one of them to dot.
55
+ 2. ONLY Capitalize the first word of the 'header' as well as names or any proper nouns. Other words in the 'header' must be lowercase. (e.g. Jump back in, David! 🥁)
56
+ 3. If there is any grammar error in the message, you must fix it.
57
+ 4. Always use "the" before proper nouns, including any titles of the recommended content, if the title was in the message.
58
+ 5. Do not include any words that explicitly or implicitly reference a time-related concept (e.g., “new,” “recent,” “latest,” “upcoming,” etc.).
59
+ 6. If the **Artist** name from the recommended content is referenced in the message, it MUST be the **FULL NAME**. If only the first name is available, ** DO NOT ** use the artist name at all.
60
+ 7. If the message contains any AI_Jargon words (from below list) you MUST replace it with a more user-friendly synonym that makes sense.
61
+ AI_Jargon words are: {self.config_file["AI_Jargon"]}
62
+
63
+ 8. Preserve the original JSON structure: {{"header": "...", "message": "..."}}
64
+ 9. If no rule is violated, return the exact same JSON unchanged.
65
+ 10. The output must be strictly valid JSON with no extra commentary or text.
66
+ """
67
+
68
+ # --------------------------------------------------------------
69
+ def output_instruction(self):
70
+ """
71
+ :return: output instructions as a string
72
+ """
73
+ instructions = f"""
74
+ **You must output only valid JSON in the form:**
75
+
76
+ {{
77
+ "header": "Original header or modified version",
78
+ "message": "Original header or modified version"
79
+ }}
80
+
81
+
82
+ **Constraints:**
83
+ - The "header" must be less than 30 character.
84
+ - The "message" must be less than 100 character.
85
+ - No text is allowed outside this JSON structure.\n"
86
+ """
87
+
88
+ return instructions
89
+
90
+ # --------------------------------------------------------------
91
+ def get_llm_response(self, prompt, max_retries=3):
92
+ """
93
+ sending the prompt to the LLM and get back the response
94
+ """
95
+
96
+ openai.api_key = self.api_key
97
+ instructions = self.llm_instructions()
98
+ client = OpenAI(api_key=self.api_key)
99
+
100
+ for attempt in range(max_retries):
101
+ try:
102
+ response = client.chat.completions.create(
103
+ model=self.model,
104
+ response_format={"type": "json_object"},
105
+ messages=[
106
+ {"role": "system", "content": instructions},
107
+ {"role": "user", "content": prompt}
108
+ ],
109
+ max_tokens=500,
110
+ n=1,
111
+ temperature=0.5
112
+ )
113
+
114
+ tokens = {
115
+ 'prompt_tokens': response.usage.prompt_tokens,
116
+ 'completion_tokens': response.usage.completion_tokens,
117
+ 'total_tokens': response.usage.total_tokens
118
+ }
119
+
120
+ try:
121
+ content = response.choices[0].message.content
122
+ # Extract JSON code block
123
+
124
+ output = json.loads(content)
125
+ # output = json.loads(response.choices[0].message.content)
126
+
127
+ if 'message' not in output or 'header' not in output:
128
+ print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
129
+ continue # Continue to next attempt
130
+
131
+ else:
132
+ if len(output["header"].strip()) > self.config_file["header_limit"] or len(output["message"].strip()) > self.config_file["message_limit"]:
133
+ print(f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
134
+ continue
135
+
136
+ # validating the JSON
137
+ self.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
138
+ self.total_tokens['completion_tokens'] += tokens['completion_tokens']
139
+ return output
140
+
141
+ except json.JSONDecodeError:
142
+ print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
143
+
144
+ except openai.APIConnectionError as e:
145
+ print("The server could not be reached")
146
+ print(e.__cause__) # an underlying Exception, likely raised within httpx.
147
+ except openai.RateLimitError as e:
148
+ print("A 429 status code was received; we should back off a bit.")
149
+ except openai.APIStatusError as e:
150
+ print("Another non-200-range status code was received")
151
+ print(e.status_code)
152
+ print(e.response)
153
+
154
+ print("Max retries exceeded. Returning empty response.")
155
+ return [], {}
156
+
157
+ # --------------------------------------------------------------
158
+ def get_context(self):
159
+ """
160
+ context for the LLM
161
+ :return: the context string
162
+ """
163
+ context = (
164
+ "We created a personalized message for a user "
165
+ "considering the provided information. Your task is to double-check "
166
+ "the message and correct or improve the output, according to instructions."
167
+ )
168
+ return context
169
+
170
+ # --------------------------------------------------------------
171
+ def generate_prompt(self, message, user):
172
+ """
173
+ generating the prompt for criticizing
174
+ :param query: input query
175
+ :param message: llm response
176
+ :return: new prompt
177
+ """
178
+ recommended_content = ""
179
+ if self.messaging_mode == "recsys_result":
180
+ recommended_content = f"""
181
+ ### ** Recommended Content **
182
+ {user['recommendation_info']}
183
+ """
184
+
185
+ prompt = f"""
186
+
187
+ ### System Instruction:
188
+ {self.llm_instructions()}
189
+
190
+ ### Context:
191
+ We created a personalized message for a user based on available information.
192
+ Your job is to check the message and correct only if it violates rules. Otherwise, leave it unchanged.
193
+
194
+ ### Original JSON Message:
195
+ {message}
196
+
197
+ {recommended_content}
198
+
199
+ ### Rules:
200
+ {self.get_general_rules()}
201
+
202
+ ### Output Requirements:
203
+ {self.output_instruction()}
204
+ """
205
+ return prompt
206
+
207
+ # --------------------------------------------------------------
208
+ def criticize(self, message, user):
209
+ """
210
+ criticize the llm response by using additional layer of query
211
+ :return: updated users_df with extracted information and personalize messages.
212
+ """
213
+
214
+ prompt = self.generate_prompt(message, user)
215
+ response = self.get_llm_response(prompt)
216
+
217
+ return response, self.total_tokens
218
+
219
+
220
+
Messaging_system/sending_time.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ calculating sending time for each individual user
3
+ """
4
+
5
+ import numpy as np
6
+ from snowflake.snowpark import Session
7
+ import json
8
+ import pandas as pd
9
+ import os
10
+ from dotenv import load_dotenv
11
+ load_dotenv()
12
+
13
+ class PersonalizedTime:
14
+ """
15
+ This module will calcualte the best tiume to send for each individual users
16
+ """
17
+
18
+ def calculate_sending_time(self):
19
+
20
+ # fetching data
21
+ session = self.snowflake_connection()
22
+ query = self.fetch_users_time(session)
23
+
24
+
25
+ def fetch_users_time(self, session):
26
+ """
27
+ fetching user's activity data
28
+ :param dataframe:
29
+ :return:
30
+ """
31
+
32
+ query = self.get_query()
33
+
34
+ # Connect to Snowflake
35
+ try:
36
+ spark_df = session.sql(query).collect()
37
+ dataframe = pd.DataFrame(spark_df)
38
+ print(f"reading content table successfully")
39
+ return dataframe
40
+ except Exception as e:
41
+ print(f"Error in reading table: {e}")
42
+
43
+ def get_query(self):
44
+
45
+ query = """
46
+
47
+ """
48
+
49
+
50
+
51
+
52
+ def snowflake_connection(self):
53
+ """
54
+ setting snowflake connection
55
+ :return:
56
+ """
57
+
58
+ conn = {
59
+ "user": os.getenv('snowflake_user'),
60
+ "password": os.getenv('snowflake_password'),
61
+ "account": os.getenv('snowflake_account'),
62
+ "role": os.getenv('snowflake_role'),
63
+ "database": os.getenv('snowflake_database'),
64
+ "warehouse": os.getenv('snowflake_warehouse'),
65
+ "schema": os.getenv('snowflake_schema'),
66
+ }
67
+
68
+ session = Session.builder.configs(conn).create()
69
+ return session
README.md CHANGED
@@ -12,9 +12,5 @@ short_description: 'UI for AI Messaging system '
12
  license: apache-2.0
13
  ---
14
 
15
- # Welcome to Streamlit!
16
 
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
-
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
12
  license: apache-2.0
13
  ---
14
 
15
+ AI messaging system UI
16
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json, os
2
+ from io import StringIO
3
+ import pandas as pd
4
+ import streamlit as st
5
+ from snowflake.snowpark import Session
6
+ from bs4 import BeautifulSoup
7
+
8
+ from Messaging_system.Permes import Permes
9
+ from Messaging_system.context_validator import Validator
10
+ from dotenv import load_dotenv
11
+ load_dotenv()
12
+
13
+ # ──────────────────────────────────────────────────────────────────────────────
14
+ # Helpers
15
+ # ──────────────────────────────────────────────────────────────────────────────
16
+ @st.cache_data
17
+ def load_data(buf) -> pd.DataFrame:
18
+ return pd.read_csv(buf)
19
+
20
+ def load_config_(file_path: str) -> dict:
21
+ with open(file_path) as f:
22
+ return json.load(f)
23
+
24
+ def get_credential(key):
25
+ return st.secrets.get(key) or os.getenv(key)
26
+
27
+
28
+ def init_state() -> None:
29
+ defaults = dict(
30
+ involve_recsys_result=False,
31
+ involve_last_interaction=False,
32
+ valid_instructions="",
33
+ invalid_instructions="",
34
+ messaging_type="push",
35
+ generated=False,
36
+ include_recommendation=False,
37
+ data=None, brand=None, recsys_contents=[], csv_output=None,
38
+ users_message=None, messaging_mode=None, target_column=None,
39
+ ugc_column=None, identifier_column=None, input_validator=None,
40
+ selected_input_features=None, selected_features=None,
41
+ additional_instructions=None, segment_info="", message_style="",
42
+ sample_example="", CTA="", all_features=None, number_of_messages=1,
43
+ instructionset={}, segment_name="", number_of_samples=20,
44
+ selected_source_features=[], platform=None, generate_clicked=False,
45
+ )
46
+ for k, v in defaults.items():
47
+ st.session_state.setdefault(k, v)
48
+
49
+ # ──────────────────────────────────────────────────────────────────────────────
50
+ # PAGE CONFIG + THEME
51
+ # ──────────────────────────────────────────────────────────────────────────────
52
+ st.set_page_config(
53
+ page_title="Personalized Message Generator",
54
+ page_icon="📬",
55
+ layout="wide",
56
+ initial_sidebar_state="expanded"
57
+ )
58
+
59
+ st.markdown(
60
+ """
61
+ <style>
62
+ html, body, [class*="css"] {
63
+ background-color:#0d0d0d;
64
+ color:#ffd700;
65
+ }
66
+ .stButton>button, .stDownloadButton>button {
67
+ border-radius:8px;
68
+ background:#ffd700;
69
+ color:#0d0d0d;
70
+ font-weight:600;
71
+ }
72
+ .stTabs [data-baseweb="tab"] {
73
+ font-weight:600;
74
+ }
75
+ .stTabs [aria-selected="true"] {
76
+ color:#ffd700;
77
+ }
78
+ h1, h2, h3 {color:#ffd700;}
79
+ .small {font-size:0.85rem; opacity:0.7;}
80
+ </style>
81
+ """,
82
+ unsafe_allow_html=True
83
+ )
84
+
85
+ # ──────────────────────────────────────────────────────────────────────────────
86
+ # SIDEBAR – the “control panel”
87
+ # ──────────────────────────────────────────────────────────────────────────────
88
+ init_state()
89
+ with st.sidebar:
90
+ st.header("📂 Upload your CSV")
91
+ uploaded_file = st.file_uploader("Choose file", type="csv")
92
+ if uploaded_file:
93
+ st.session_state.data = load_data(uploaded_file)
94
+ st.success("File loaded!")
95
+
96
+ st.markdown("---")
97
+
98
+ if st.session_state.data is not None:
99
+ # ─ Identifier
100
+ id_col = st.selectbox(
101
+ "Identifier column",
102
+ st.session_state.data.columns,
103
+ key="identifier_column"
104
+ )
105
+
106
+ # ─ Brand
107
+ st.selectbox(
108
+ "Brand",
109
+ ["drumeo", "pianote", "guitareo", "singeo"],
110
+ key="brand"
111
+ )
112
+
113
+ # ─ Personalisation
114
+ st.text_area("Segment info *", key="segment_info")
115
+ st.text_area("CTA *", key="CTA")
116
+ with st.expander("🔧 Optional tone & examples"):
117
+ st.text_area("Message style", key="message_style",
118
+ placeholder="Be kind and friendly…")
119
+ st.text_area("Additional instructions", key="additional_instructions",
120
+ placeholder="e.g. Mention the number weeks since their last practice")
121
+ st.text_area("Sample example", key="sample_example",
122
+ placeholder="Hello! We have crafted…")
123
+ st.number_input("Number of samples", 1, 100, 20,
124
+ key="number_of_samples")
125
+
126
+ # ─ Sequential messages
127
+ st.number_input("Sequential messages / user", 1, 10, 1,
128
+ key="number_of_messages")
129
+ st.text_input("Segment name", key="segment_name",
130
+ placeholder="no_recent_activity")
131
+ if st.session_state.number_of_messages > 1:
132
+ st.caption("Additional per-message instructions")
133
+ for i in range(1, st.session_state.number_of_messages + 1):
134
+ st.text_input(f"Message {i} instruction",
135
+ key=f"instr_{i}")
136
+
137
+ # ─ Source feature selection
138
+ st.multiselect(
139
+ "Source features",
140
+ ["instrument", "weeks_since_last_interaction",
141
+ "birthday_reminder"],
142
+ default=["instrument"],
143
+ key="selected_source_features"
144
+ )
145
+
146
+ # ─ Rec-sys
147
+ st.checkbox("Include content recommendation", key="include_recommendation")
148
+ if st.session_state.include_recommendation:
149
+ st.multiselect(
150
+ "Recommendation types",
151
+ ["song", "workout", "quick_tips", "course"],
152
+ key="recsys_contents"
153
+ )
154
+
155
+ st.markdown("---")
156
+ generate = st.button("🚀 Generate messages")
157
+ st.session_state["generate_clicked"] = generate
158
+
159
+
160
+ # ──────────────────────────────────────────────────────────────────────────────
161
+ # MAIN AREA – three tabs
162
+ # ──────────────────────────────────────────────────────────────────────────────
163
+ tab0, tab1, tab2 = st.tabs(
164
+ ["📊 Data preview", "🛠️ Configure", "📨 Results"])
165
+
166
+ # ------------------------------------------------------------------ TAB 0 ---#
167
+ with tab0:
168
+ st.header("📊 Data preview")
169
+ if st.session_state.data is not None:
170
+ st.dataframe(st.session_state.data.head(100))
171
+ else:
172
+ st.info("Upload a CSV to preview it here.")
173
+
174
+ # ------------------------------------------------------------------ TAB 1 ---#
175
+ with tab1:
176
+ st.header("🛠️ Configure & launch")
177
+ if st.session_state.data is None:
178
+ st.warning("Upload a CSV first ⬅")
179
+ elif not generate:
180
+ st.info("Adjust settings in the sidebar, then hit *Generate*.")
181
+ else:
182
+ st.success("Parameters captured – see **Results** tab.")
183
+
184
+ # ------------------------------------------------------------------ TAB 2 ---#
185
+ with tab2:
186
+ st.header("📨 Generated messages")
187
+ # Run generation only once per click
188
+ if st.session_state.generate_clicked and not st.session_state.generated:
189
+
190
+ # ─ simple validation
191
+ if not st.session_state.CTA.strip() or not st.session_state.segment_info.strip():
192
+ st.error("CTA and Segment info are mandatory 🚫")
193
+ st.stop()
194
+
195
+ # ─ build Snowflake session
196
+ conn = dict(
197
+ user=get_credential("snowflake_user"),
198
+ password=get_credential("snowflake_password"),
199
+ account=get_credential("snowflake_account"),
200
+ role=get_credential("snowflake_role"),
201
+ database=get_credential("snowflake_database"),
202
+ warehouse=get_credential("snowflake_warehouse"),
203
+ schema=get_credential("snowflake_schema")
204
+ )
205
+ config = load_config_("Config_files/message_system_config.json")
206
+ session = Session.builder.configs(conn).create()
207
+
208
+ # ─ prepare parameters
209
+ st.session_state.messaging_mode = (
210
+ "recsys_result" if st.session_state.include_recommendation
211
+ else "message"
212
+ )
213
+ st.session_state.involve_recsys_result = st.session_state.include_recommendation
214
+ st.session_state.instructionset = {
215
+ i: st.session_state.get(f"instr_{i}")
216
+ for i in range(1, st.session_state.number_of_messages + 1)
217
+ if st.session_state.get(f"instr_{i}", "").strip()
218
+ }
219
+
220
+ # ─ progress callback
221
+ prog = st.progress(0)
222
+ status = st.empty()
223
+
224
+ def cb(done, total):
225
+ pct = int(done / total * 100)
226
+ prog.progress(pct)
227
+ status.write(f"{pct}%")
228
+
229
+ permes = Permes()
230
+ df_msg = permes.create_personalize_messages(
231
+ session=session,
232
+ users=st.session_state.data,
233
+ brand=st.session_state.brand,
234
+ config_file=config,
235
+ openai_api_key=get_credential("OPENAI_API"),
236
+ CTA=st.session_state.CTA,
237
+ segment_info=st.session_state.segment_info,
238
+ number_of_samples=st.session_state.number_of_samples,
239
+ message_style=st.session_state.message_style,
240
+ sample_example=st.session_state.sample_example,
241
+ selected_input_features=st.session_state.selected_features,
242
+ selected_source_features=st.session_state.selected_source_features,
243
+ additional_instructions=st.session_state.additional_instructions,
244
+ platform=st.session_state.messaging_type,
245
+ involve_recsys_result=st.session_state.involve_recsys_result,
246
+ messaging_mode=st.session_state.messaging_mode,
247
+ identifier_column=st.session_state.identifier_column,
248
+ target_column=st.session_state.target_column,
249
+ recsys_contents=st.session_state.recsys_contents,
250
+ progress_callback=cb,
251
+ number_of_messages=st.session_state.number_of_messages,
252
+ instructionset=st.session_state.instructionset,
253
+ segment_name=st.session_state.segment_name
254
+ )
255
+
256
+ # ─ cache output
257
+ st.session_state.users_message = df_msg
258
+ st.session_state.csv_output = df_msg.to_csv(
259
+ index=False, encoding="utf-8-sig")
260
+ st.session_state.generated = True
261
+ prog.empty(); status.empty()
262
+ st.balloons()
263
+
264
+ # -------- show results (if any)
265
+ # -------- show results (if any)
266
+ if st.session_state.generated:
267
+ df = st.session_state.users_message
268
+ id_col = st.session_state.identifier_column
269
+
270
+ # expandable per-user cards
271
+ for i, (_, row) in enumerate(df.iterrows(), 1):
272
+ with st.expander(f"{i}. User ID: {row[id_col.lower()]}", expanded=(i == 1)):
273
+ st.write("##### 👤 Features")
274
+ feats = st.session_state.selected_source_features
275
+ cols = st.columns(3)
276
+ for idx, f in enumerate(feats):
277
+ cols[idx % 3].markdown(f"**{f}**: {row.get(f, '—')}")
278
+
279
+ st.markdown("---")
280
+ st.write("##### ✉️ Messages")
281
+ try:
282
+ blob = json.loads(row["message"])
283
+ seq = (blob.get("messages_sequence", blob)
284
+ if isinstance(blob, dict) else blob)
285
+
286
+ for j, msg in enumerate(seq, 1):
287
+ st.markdown(f"**{j}. {msg.get('header', '(no header)')}**")
288
+ thumb = (msg.get("thumbnail_url") # per-message
289
+ or row.get("thumbnail_url")) # per-user fallback
290
+ if thumb:
291
+ st.image(thumb, width=150)
292
+ # ---------------------------------------------------------
293
+
294
+ st.markdown(msg.get("message", ""))
295
+ st.markdown(f"[Read more]({msg.get('web_url_path', '#')})")
296
+ st.markdown("---")
297
+
298
+ except Exception as e:
299
+ st.error(f"Failed to parse JSON: {e}")
300
+
local_llm/LocalLM.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+
4
+ import torch
5
+ import ollama
6
+
7
+ class LocalLM:
8
+
9
+ def __init__(self, model):
10
+ # Initialize the Ollama client
11
+ self.client = ollama.Client()
12
+ self.model = model
13
+
14
+ # def get_llm_response(self, prompt):
15
+ #
16
+ # # Send the query to the model
17
+ # response = self.client.generate(model=self.model, prompt=prompt)
18
+ # return response.response
19
+
20
+ def preprocess_and_parse_json(self, response):
21
+ # Remove any leading/trailing whitespace and newlines
22
+ if response.startswith('```json') and response.endswith('```'):
23
+ cleaned_response = response[len('```json'):-len('```')].strip()
24
+
25
+ # Parse the cleaned response into a JSON object
26
+ try:
27
+ json_object = json.loads(cleaned_response)
28
+ return json_object
29
+ except json.JSONDecodeError as e:
30
+ print(f"Failed to parse JSON: {e}")
31
+ return None
32
+
33
+ def get_llm_response(self, prompt, mode, max_retries=10):
34
+ """
35
+ Send the prompt to the LLM and get back the response.
36
+ Includes handling for GPU memory issues by clearing cache and waiting before retry.
37
+ """
38
+ for attempt in range(max_retries):
39
+ try:
40
+ # Try generating the response
41
+ response = self.client.generate(model=self.model, prompt=prompt)
42
+ except Exception as e:
43
+ # This catches errors like the connection being forcibly closed
44
+ print(f"Error on attempt {attempt + 1}: {e}.")
45
+ try:
46
+ # Clear GPU cache if you're using PyTorch; this may help free up memory
47
+ torch.cuda.empty_cache()
48
+ print("Cleared GPU cache.")
49
+ except Exception as cache_err:
50
+ print("Failed to clear GPU cache:", cache_err)
51
+ # Wait a bit before retrying to allow memory to recover
52
+ time.sleep(2)
53
+ continue
54
+
55
+ try:
56
+ tokens = {
57
+ 'prompt_tokens': 0,
58
+ 'completion_tokens': 0,
59
+ 'total_tokens': 0
60
+ }
61
+
62
+ try:
63
+ output = self.preprocess_and_parse_json(response.response)
64
+ if output is None:
65
+ continue
66
+
67
+ if mode == "rating":
68
+ # Check if all keys and values are integers (or convertible to integers)
69
+ all_int = True
70
+ for k, v in output.items():
71
+ try:
72
+ int(k)
73
+ int(v)
74
+ except ValueError:
75
+ all_int = False
76
+ break
77
+ if all_int:
78
+ return output, tokens
79
+ else:
80
+ print(f"Keys and values are not integers on attempt {attempt + 1}. Retrying...")
81
+ continue # Continue to next attempt
82
+ else:
83
+ print(f"Invalid mode: {mode}")
84
+ return None, tokens
85
+
86
+ except json.JSONDecodeError:
87
+ print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
88
+ except Exception as parse_error:
89
+ print("Error processing output:", parse_error)
90
+
91
+ print("Max retries exceeded. Returning empty response.")
92
+ return [], {}
requirements.txt ADDED
Binary file (6.39 kB). View file