Commit ·
d0e3307
1
Parent(s): 6b1529a
Adding files and directories
Browse files- System works with CSV, we still need CIO and snowflake integration
- .dockerignore +9 -0
- .gitignore +9 -0
- .idea/.gitignore +8 -0
- .idea/inspectionProfiles/Project_Default.xml +188 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +7 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- CIO/CIO_integration_Python.py +146 -0
- Config_files/message_system_config.json +24 -0
- Dockerfile +23 -0
- Messaging_system/CoreConfig.py +270 -0
- Messaging_system/DataCollector.py +182 -0
- Messaging_system/LLMR.py +386 -0
- Messaging_system/Message_generator.py +258 -0
- Messaging_system/MultiMessage.py +324 -0
- Messaging_system/Permes.py +160 -0
- Messaging_system/PromptGenerator.py +434 -0
- Messaging_system/SnowFlakeConnection.py +237 -0
- Messaging_system/StoreLayer.py +0 -0
- Messaging_system/context_validator.py +302 -0
- Messaging_system/protection_layer.py +220 -0
- Messaging_system/sending_time.py +69 -0
- README.md +1 -5
- app.py +300 -0
- local_llm/LocalLM.py +92 -0
- requirements.txt +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore the .streamlit directory and its contents
|
| 2 |
+
Config_files/mysql_credentials.json
|
| 3 |
+
Config_files/secrets.json
|
| 4 |
+
Config_files/snowflake_credentials_Danial.json
|
| 5 |
+
.streamlit/secrets.toml
|
| 6 |
+
|
| 7 |
+
# Ignore the .env file
|
| 8 |
+
Analysis/.env
|
| 9 |
+
.env
|
.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore the .streamlit directory and its contents
|
| 2 |
+
Config_files/mysql_credentials.json
|
| 3 |
+
Config_files/secrets.json
|
| 4 |
+
Config_files/snowflake_credentials_Danial.json
|
| 5 |
+
.streamlit/secrets.toml
|
| 6 |
+
|
| 7 |
+
# Ignore the .env file
|
| 8 |
+
Analysis/.env
|
| 9 |
+
.env
|
.idea/.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
| 4 |
+
# Editor-based HTTP Client requests
|
| 5 |
+
/httpRequests/
|
| 6 |
+
# Datasource local storage ignored files
|
| 7 |
+
/dataSources/
|
| 8 |
+
/dataSources.local.xml
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<profile version="1.0">
|
| 3 |
+
<option name="myName" value="Project Default" />
|
| 4 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
|
| 5 |
+
<option name="ignoredPackages">
|
| 6 |
+
<value>
|
| 7 |
+
<list size="153">
|
| 8 |
+
<item index="0" class="java.lang.String" itemvalue="urllib3" />
|
| 9 |
+
<item index="1" class="java.lang.String" itemvalue="tokenizers" />
|
| 10 |
+
<item index="2" class="java.lang.String" itemvalue="transformers" />
|
| 11 |
+
<item index="3" class="java.lang.String" itemvalue="huggingface-hub" />
|
| 12 |
+
<item index="4" class="java.lang.String" itemvalue="safetensors" />
|
| 13 |
+
<item index="5" class="java.lang.String" itemvalue="pandas" />
|
| 14 |
+
<item index="6" class="java.lang.String" itemvalue="protobuf" />
|
| 15 |
+
<item index="7" class="java.lang.String" itemvalue="httpx" />
|
| 16 |
+
<item index="8" class="java.lang.String" itemvalue="openai" />
|
| 17 |
+
<item index="9" class="java.lang.String" itemvalue="anyio" />
|
| 18 |
+
<item index="10" class="java.lang.String" itemvalue="h11" />
|
| 19 |
+
<item index="11" class="java.lang.String" itemvalue="httpcore" />
|
| 20 |
+
<item index="12" class="java.lang.String" itemvalue="tangled-up-in-unicode" />
|
| 21 |
+
<item index="13" class="java.lang.String" itemvalue="numba" />
|
| 22 |
+
<item index="14" class="java.lang.String" itemvalue="Babel" />
|
| 23 |
+
<item index="15" class="java.lang.String" itemvalue="PyYAML" />
|
| 24 |
+
<item index="16" class="java.lang.String" itemvalue="pickleshare" />
|
| 25 |
+
<item index="17" class="java.lang.String" itemvalue="defusedxml" />
|
| 26 |
+
<item index="18" class="java.lang.String" itemvalue="executing" />
|
| 27 |
+
<item index="19" class="java.lang.String" itemvalue="pycparser" />
|
| 28 |
+
<item index="20" class="java.lang.String" itemvalue="torchvision" />
|
| 29 |
+
<item index="21" class="java.lang.String" itemvalue="patsy" />
|
| 30 |
+
<item index="22" class="java.lang.String" itemvalue="ipython-genutils" />
|
| 31 |
+
<item index="23" class="java.lang.String" itemvalue="Pygments" />
|
| 32 |
+
<item index="24" class="java.lang.String" itemvalue="bleach" />
|
| 33 |
+
<item index="25" class="java.lang.String" itemvalue="jupyter_server_terminals" />
|
| 34 |
+
<item index="26" class="java.lang.String" itemvalue="soupsieve" />
|
| 35 |
+
<item index="27" class="java.lang.String" itemvalue="torchaudio" />
|
| 36 |
+
<item index="28" class="java.lang.String" itemvalue="jsonschema" />
|
| 37 |
+
<item index="29" class="java.lang.String" itemvalue="pywin32" />
|
| 38 |
+
<item index="30" class="java.lang.String" itemvalue="qtconsole" />
|
| 39 |
+
<item index="31" class="java.lang.String" itemvalue="terminado" />
|
| 40 |
+
<item index="32" class="java.lang.String" itemvalue="comm" />
|
| 41 |
+
<item index="33" class="java.lang.String" itemvalue="pydantic" />
|
| 42 |
+
<item index="34" class="java.lang.String" itemvalue="wordcloud" />
|
| 43 |
+
<item index="35" class="java.lang.String" itemvalue="jupyterlab-pygments" />
|
| 44 |
+
<item index="36" class="java.lang.String" itemvalue="ipykernel" />
|
| 45 |
+
<item index="37" class="java.lang.String" itemvalue="nbconvert" />
|
| 46 |
+
<item index="38" class="java.lang.String" itemvalue="phik" />
|
| 47 |
+
<item index="39" class="java.lang.String" itemvalue="attrs" />
|
| 48 |
+
<item index="40" class="java.lang.String" itemvalue="contourpy" />
|
| 49 |
+
<item index="41" class="java.lang.String" itemvalue="psutil" />
|
| 50 |
+
<item index="42" class="java.lang.String" itemvalue="jedi" />
|
| 51 |
+
<item index="43" class="java.lang.String" itemvalue="jupyter_server" />
|
| 52 |
+
<item index="44" class="java.lang.String" itemvalue="pure-eval" />
|
| 53 |
+
<item index="45" class="java.lang.String" itemvalue="regex" />
|
| 54 |
+
<item index="46" class="java.lang.String" itemvalue="asttokens" />
|
| 55 |
+
<item index="47" class="java.lang.String" itemvalue="platformdirs" />
|
| 56 |
+
<item index="48" class="java.lang.String" itemvalue="matplotlib" />
|
| 57 |
+
<item index="49" class="java.lang.String" itemvalue="idna" />
|
| 58 |
+
<item index="50" class="java.lang.String" itemvalue="referencing" />
|
| 59 |
+
<item index="51" class="java.lang.String" itemvalue="decorator" />
|
| 60 |
+
<item index="52" class="java.lang.String" itemvalue="networkx" />
|
| 61 |
+
<item index="53" class="java.lang.String" itemvalue="pandas-profiling" />
|
| 62 |
+
<item index="54" class="java.lang.String" itemvalue="json5" />
|
| 63 |
+
<item index="55" class="java.lang.String" itemvalue="cffi" />
|
| 64 |
+
<item index="56" class="java.lang.String" itemvalue="pandocfilters" />
|
| 65 |
+
<item index="57" class="java.lang.String" itemvalue="numpy" />
|
| 66 |
+
<item index="58" class="java.lang.String" itemvalue="jupyter-events" />
|
| 67 |
+
<item index="59" class="java.lang.String" itemvalue="sniffio" />
|
| 68 |
+
<item index="60" class="java.lang.String" itemvalue="websocket-client" />
|
| 69 |
+
<item index="61" class="java.lang.String" itemvalue="exceptiongroup" />
|
| 70 |
+
<item index="62" class="java.lang.String" itemvalue="jupyter" />
|
| 71 |
+
<item index="63" class="java.lang.String" itemvalue="seaborn" />
|
| 72 |
+
<item index="64" class="java.lang.String" itemvalue="stack-data" />
|
| 73 |
+
<item index="65" class="java.lang.String" itemvalue="multimethod" />
|
| 74 |
+
<item index="66" class="java.lang.String" itemvalue="PyWavelets" />
|
| 75 |
+
<item index="67" class="java.lang.String" itemvalue="zipp" />
|
| 76 |
+
<item index="68" class="java.lang.String" itemvalue="nest-asyncio" />
|
| 77 |
+
<item index="69" class="java.lang.String" itemvalue="prompt-toolkit" />
|
| 78 |
+
<item index="70" class="java.lang.String" itemvalue="visions" />
|
| 79 |
+
<item index="71" class="java.lang.String" itemvalue="ipywidgets" />
|
| 80 |
+
<item index="72" class="java.lang.String" itemvalue="scipy" />
|
| 81 |
+
<item index="73" class="java.lang.String" itemvalue="tornado" />
|
| 82 |
+
<item index="74" class="java.lang.String" itemvalue="ydata-profiling" />
|
| 83 |
+
<item index="75" class="java.lang.String" itemvalue="jsonpointer" />
|
| 84 |
+
<item index="76" class="java.lang.String" itemvalue="Send2Trash" />
|
| 85 |
+
<item index="77" class="java.lang.String" itemvalue="torch" />
|
| 86 |
+
<item index="78" class="java.lang.String" itemvalue="overrides" />
|
| 87 |
+
<item index="79" class="java.lang.String" itemvalue="mistune" />
|
| 88 |
+
<item index="80" class="java.lang.String" itemvalue="importlib-resources" />
|
| 89 |
+
<item index="81" class="java.lang.String" itemvalue="mpmath" />
|
| 90 |
+
<item index="82" class="java.lang.String" itemvalue="jupyter-console" />
|
| 91 |
+
<item index="83" class="java.lang.String" itemvalue="typing_extensions" />
|
| 92 |
+
<item index="84" class="java.lang.String" itemvalue="debugpy" />
|
| 93 |
+
<item index="85" class="java.lang.String" itemvalue="statsmodels" />
|
| 94 |
+
<item index="86" class="java.lang.String" itemvalue="argon2-cffi" />
|
| 95 |
+
<item index="87" class="java.lang.String" itemvalue="pytz" />
|
| 96 |
+
<item index="88" class="java.lang.String" itemvalue="dacite" />
|
| 97 |
+
<item index="89" class="java.lang.String" itemvalue="webencodings" />
|
| 98 |
+
<item index="90" class="java.lang.String" itemvalue="Pillow" />
|
| 99 |
+
<item index="91" class="java.lang.String" itemvalue="notebook_shim" />
|
| 100 |
+
<item index="92" class="java.lang.String" itemvalue="tiktoken" />
|
| 101 |
+
<item index="93" class="java.lang.String" itemvalue="traitlets" />
|
| 102 |
+
<item index="94" class="java.lang.String" itemvalue="pywinpty" />
|
| 103 |
+
<item index="95" class="java.lang.String" itemvalue="rfc3339-validator" />
|
| 104 |
+
<item index="96" class="java.lang.String" itemvalue="joblib" />
|
| 105 |
+
<item index="97" class="java.lang.String" itemvalue="arrow" />
|
| 106 |
+
<item index="98" class="java.lang.String" itemvalue="python-dateutil" />
|
| 107 |
+
<item index="99" class="java.lang.String" itemvalue="nbclient" />
|
| 108 |
+
<item index="100" class="java.lang.String" itemvalue="QtPy" />
|
| 109 |
+
<item index="101" class="java.lang.String" itemvalue="cycler" />
|
| 110 |
+
<item index="102" class="java.lang.String" itemvalue="MarkupSafe" />
|
| 111 |
+
<item index="103" class="java.lang.String" itemvalue="tinycss2" />
|
| 112 |
+
<item index="104" class="java.lang.String" itemvalue="mkl" />
|
| 113 |
+
<item index="105" class="java.lang.String" itemvalue="fsspec" />
|
| 114 |
+
<item index="106" class="java.lang.String" itemvalue="python-json-logger" />
|
| 115 |
+
<item index="107" class="java.lang.String" itemvalue="filelock" />
|
| 116 |
+
<item index="108" class="java.lang.String" itemvalue="jupyterlab-widgets" />
|
| 117 |
+
<item index="109" class="java.lang.String" itemvalue="pyzmq" />
|
| 118 |
+
<item index="110" class="java.lang.String" itemvalue="certifi" />
|
| 119 |
+
<item index="111" class="java.lang.String" itemvalue="pyparsing" />
|
| 120 |
+
<item index="112" class="java.lang.String" itemvalue="sympy" />
|
| 121 |
+
<item index="113" class="java.lang.String" itemvalue="notebook" />
|
| 122 |
+
<item index="114" class="java.lang.String" itemvalue="isoduration" />
|
| 123 |
+
<item index="115" class="java.lang.String" itemvalue="jupyter-lsp" />
|
| 124 |
+
<item index="116" class="java.lang.String" itemvalue="fqdn" />
|
| 125 |
+
<item index="117" class="java.lang.String" itemvalue="jupyter_client" />
|
| 126 |
+
<item index="118" class="java.lang.String" itemvalue="kiwisolver" />
|
| 127 |
+
<item index="119" class="java.lang.String" itemvalue="jupyterlab_server" />
|
| 128 |
+
<item index="120" class="java.lang.String" itemvalue="fonttools" />
|
| 129 |
+
<item index="121" class="java.lang.String" itemvalue="backcall" />
|
| 130 |
+
<item index="122" class="java.lang.String" itemvalue="tbb" />
|
| 131 |
+
<item index="123" class="java.lang.String" itemvalue="widgetsnbextension" />
|
| 132 |
+
<item index="124" class="java.lang.String" itemvalue="argon2-cffi-bindings" />
|
| 133 |
+
<item index="125" class="java.lang.String" itemvalue="distro" />
|
| 134 |
+
<item index="126" class="java.lang.String" itemvalue="matplotlib-inline" />
|
| 135 |
+
<item index="127" class="java.lang.String" itemvalue="webcolors" />
|
| 136 |
+
<item index="128" class="java.lang.String" itemvalue="more-itertools" />
|
| 137 |
+
<item index="129" class="java.lang.String" itemvalue="wcwidth" />
|
| 138 |
+
<item index="130" class="java.lang.String" itemvalue="llvmlite" />
|
| 139 |
+
<item index="131" class="java.lang.String" itemvalue="jupyter_core" />
|
| 140 |
+
<item index="132" class="java.lang.String" itemvalue="importlib-metadata" />
|
| 141 |
+
<item index="133" class="java.lang.String" itemvalue="Jinja2" />
|
| 142 |
+
<item index="134" class="java.lang.String" itemvalue="rfc3986-validator" />
|
| 143 |
+
<item index="135" class="java.lang.String" itemvalue="typeguard" />
|
| 144 |
+
<item index="136" class="java.lang.String" itemvalue="jsonschema-specifications" />
|
| 145 |
+
<item index="137" class="java.lang.String" itemvalue="rpds-py" />
|
| 146 |
+
<item index="138" class="java.lang.String" itemvalue="uri-template" />
|
| 147 |
+
<item index="139" class="java.lang.String" itemvalue="tomli" />
|
| 148 |
+
<item index="140" class="java.lang.String" itemvalue="jupyterlab" />
|
| 149 |
+
<item index="141" class="java.lang.String" itemvalue="parso" />
|
| 150 |
+
<item index="142" class="java.lang.String" itemvalue="intel-openmp" />
|
| 151 |
+
<item index="143" class="java.lang.String" itemvalue="nbformat" />
|
| 152 |
+
<item index="144" class="java.lang.String" itemvalue="tzdata" />
|
| 153 |
+
<item index="145" class="java.lang.String" itemvalue="ipython" />
|
| 154 |
+
<item index="146" class="java.lang.String" itemvalue="packaging" />
|
| 155 |
+
<item index="147" class="java.lang.String" itemvalue="fastjsonschema" />
|
| 156 |
+
<item index="148" class="java.lang.String" itemvalue="prometheus-client" />
|
| 157 |
+
<item index="149" class="java.lang.String" itemvalue="tqdm" />
|
| 158 |
+
<item index="150" class="java.lang.String" itemvalue="colorama" />
|
| 159 |
+
<item index="151" class="java.lang.String" itemvalue="async-lru" />
|
| 160 |
+
<item index="152" class="java.lang.String" itemvalue="ImageHash" />
|
| 161 |
+
</list>
|
| 162 |
+
</value>
|
| 163 |
+
</option>
|
| 164 |
+
</inspection_tool>
|
| 165 |
+
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
| 166 |
+
<option name="ignoredErrors">
|
| 167 |
+
<list>
|
| 168 |
+
<option value="N803" />
|
| 169 |
+
<option value="N806" />
|
| 170 |
+
</list>
|
| 171 |
+
</option>
|
| 172 |
+
</inspection_tool>
|
| 173 |
+
<inspection_tool class="PyStubPackagesAdvertiser" enabled="true" level="WARNING" enabled_by_default="true">
|
| 174 |
+
<option name="ignoredPackages">
|
| 175 |
+
<list>
|
| 176 |
+
<option value="pyspark-stubs==3.0.0.post3" />
|
| 177 |
+
</list>
|
| 178 |
+
</option>
|
| 179 |
+
</inspection_tool>
|
| 180 |
+
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
| 181 |
+
<option name="ignoredIdentifiers">
|
| 182 |
+
<list>
|
| 183 |
+
<option value="str.__or__" />
|
| 184 |
+
</list>
|
| 185 |
+
</option>
|
| 186 |
+
</inspection_tool>
|
| 187 |
+
</profile>
|
| 188 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/misc.xml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="Black">
|
| 4 |
+
<option name="sdkName" value="Python 3.9 (AI_Message_Generator)" />
|
| 5 |
+
</component>
|
| 6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
|
| 7 |
+
</project>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/AI_Message_Generator.iml" filepath="$PROJECT_DIR$/.idea/AI_Message_Generator.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/vcs.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="VcsDirectoryMappings">
|
| 4 |
+
<mapping directory="" vcs="Git" />
|
| 5 |
+
</component>
|
| 6 |
+
</project>
|
CIO/CIO_integration_Python.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import http.client
|
| 2 |
+
import json
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import logging
|
| 5 |
+
import base64
|
| 6 |
+
import requests
|
| 7 |
+
from customerio import CustomerIO, Regions
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class CustomerIOIntegration:
|
| 11 |
+
def __init__(self, site_id, api_key):
|
| 12 |
+
|
| 13 |
+
self.cio = CustomerIO(site_id=site_id, api_key=api_key)
|
| 14 |
+
logging.basicConfig(level=logging.INFO)
|
| 15 |
+
|
| 16 |
+
# Authentication
|
| 17 |
+
self.site_id = site_id
|
| 18 |
+
self.api_key = api_key
|
| 19 |
+
# Base URL for Customer.io App API endpoints (used for segments management)
|
| 20 |
+
self.base_url = "https://api.customer.io/v1"
|
| 21 |
+
|
| 22 |
+
# Create Basic Auth header
|
| 23 |
+
auth_b64 = base64.b64encode(f"{self.site_id}:{self.api_key}".encode('utf-8')).decode('utf-8')
|
| 24 |
+
self.headers = {
|
| 25 |
+
"Authorization": f"Basic {auth_b64}",
|
| 26 |
+
"Content-Type": "application/json"
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
def add_attributes(self, dataframe):
|
| 30 |
+
|
| 31 |
+
# Filter out rows without messages or cio_id
|
| 32 |
+
filtered_df = dataframe.dropna(subset=['ai_generated_message', 'email'])
|
| 33 |
+
|
| 34 |
+
for index, row in filtered_df.iterrows():
|
| 35 |
+
try:
|
| 36 |
+
self.cio.identify(id=row['email'], ai_generated_message=row['ai_generated_message'])
|
| 37 |
+
logging.info(f"Successfully updated user {row['email']} with message")
|
| 38 |
+
except Exception as e:
|
| 39 |
+
logging.error(f"Failed to update user {row['email']}: {e}")
|
| 40 |
+
|
| 41 |
+
def get_segment(self, segment_name):
|
| 42 |
+
|
| 43 |
+
# Step 1: Check if the segment exists
|
| 44 |
+
resp = requests.get(f"{self.base_url}/segments", headers=self.headers)
|
| 45 |
+
if resp.status_code != 200:
|
| 46 |
+
raise Exception(f"Error fetching segments: {resp.text}")
|
| 47 |
+
|
| 48 |
+
segments = resp.json() # assuming a list of segments is returned
|
| 49 |
+
segment_id = None
|
| 50 |
+
for seg in segments:
|
| 51 |
+
if seg.get("name") == segment_name:
|
| 52 |
+
segment_id = seg.get("id")
|
| 53 |
+
break
|
| 54 |
+
return segment_id
|
| 55 |
+
|
| 56 |
+
def update_segment_from_dataframe(self, df: pd.DataFrame,
|
| 57 |
+
segment_name: str,
|
| 58 |
+
segment_description: str) -> str:
|
| 59 |
+
"""
|
| 60 |
+
Given a pandas DataFrame, create (if needed) and update a Customer.io manual segment.
|
| 61 |
+
|
| 62 |
+
The DataFrame must contain an "email" column (used as the unique identifier) plus other columns
|
| 63 |
+
that become customer attributes.
|
| 64 |
+
|
| 65 |
+
Parameters:
|
| 66 |
+
df: DataFrame containing customer data.
|
| 67 |
+
segment_name: The name of the segment to create or update.
|
| 68 |
+
segment_description: A description for the segment (used when creating it).
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
The segment ID (as returned by the API).
|
| 72 |
+
"""
|
| 73 |
+
segment_id = self.get_segment(segment_name)
|
| 74 |
+
|
| 75 |
+
# If segment does not exist, create it
|
| 76 |
+
if segment_id is None:
|
| 77 |
+
payload = {
|
| 78 |
+
"name": segment_name,
|
| 79 |
+
"description": segment_description,
|
| 80 |
+
"type": "manual" # manual segments require that you add customers explicitly
|
| 81 |
+
}
|
| 82 |
+
resp = requests.post(f"{self.base_url}/segments", headers=self.headers, data=json.dumps(payload))
|
| 83 |
+
if resp.status_code not in (200, 201):
|
| 84 |
+
raise Exception(f"Error creating segment: {resp.text}")
|
| 85 |
+
segment = resp.json()
|
| 86 |
+
segment_id = segment.get("id")
|
| 87 |
+
print(f"Segment '{segment_name}' created with ID: {segment_id}")
|
| 88 |
+
else:
|
| 89 |
+
print(f"Segment '{segment_name}' already exists with ID: {segment_id}")
|
| 90 |
+
|
| 91 |
+
# Step 2: For each row in the DataFrame, update the customer profile.
|
| 92 |
+
# We use the "email" column as the id.
|
| 93 |
+
for index, row in df.iterrows():
|
| 94 |
+
email = row["email"]
|
| 95 |
+
# Prepare a dictionary of attributes (all columns except email)
|
| 96 |
+
attrs = row.drop("email").to_dict()
|
| 97 |
+
# Use the customer.io client to create or update the profile.
|
| 98 |
+
# Note: any keyword argument you pass becomes a custom attribute.
|
| 99 |
+
self.cio.identify(id=email, **attrs)
|
| 100 |
+
|
| 101 |
+
# Step 3: Add all customers (emails) from the DataFrame to the segment.
|
| 102 |
+
customer_ids = df["email"].tolist()
|
| 103 |
+
payload = {
|
| 104 |
+
"ids": customer_ids,
|
| 105 |
+
"id_type": "email" # since we use emails as the identifier
|
| 106 |
+
}
|
| 107 |
+
resp = requests.put(f"{self.base_url}/segments/{segment_id}", headers=self.headers, data=json.dumps(payload))
|
| 108 |
+
if resp.status_code != 200:
|
| 109 |
+
raise Exception(f"Error adding customers to segment: {resp.text}")
|
| 110 |
+
|
| 111 |
+
print(f"Successfully updated segment '{segment_name}' with {len(customer_ids)} customers.")
|
| 112 |
+
return segment_id
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def load_config_(file_path):
|
| 116 |
+
"""
|
| 117 |
+
Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
|
| 118 |
+
:param file_path: local path to the JSON file
|
| 119 |
+
:return: JSON file
|
| 120 |
+
"""
|
| 121 |
+
with open(file_path, 'r') as file:
|
| 122 |
+
return json.load(file)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# Example usage
|
| 126 |
+
if __name__ == "__main__":
|
| 127 |
+
data = pd.DataFrame({'email': ['danial@musora.com'],
|
| 128 |
+
'message': ['This is the second test message'],
|
| 129 |
+
'json_att': [{"message": "test", "url": "test"}]})
|
| 130 |
+
df = pd.DataFrame(data)
|
| 131 |
+
|
| 132 |
+
secrets_file = 'Config_files/secrets.json'
|
| 133 |
+
secrets = load_config_(secrets_file)
|
| 134 |
+
|
| 135 |
+
track_api_key = secrets["MUSORA_CUSTOMER_IO_TRACK_API_KEY"]
|
| 136 |
+
site_id = secrets["MUSORA_CUSTOMER_IO_SITE_ID"]
|
| 137 |
+
api_key = secrets["MUSORA_CUSTOMER_IO_APP_API_KEY"]
|
| 138 |
+
workspace_id = secrets["MUSORA_CUSTOMER_IO_WORKSPACE_ID"]
|
| 139 |
+
|
| 140 |
+
cio_integration = CustomerIOIntegration(api_key=track_api_key, site_id=site_id)
|
| 141 |
+
|
| 142 |
+
# Update (or create) the segment
|
| 143 |
+
segment_id = cio_integration.update_segment_from_dataframe(df,
|
| 144 |
+
segment_name="Danial_ Manual Segment _ AI",
|
| 145 |
+
segment_description="Customers imported from DataFrame")
|
| 146 |
+
print(f"Segment ID: {segment_id}")
|
Config_files/message_system_config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"user_info_features": [
|
| 3 |
+
"first_name",
|
| 4 |
+
"country",
|
| 5 |
+
"instrument",
|
| 6 |
+
"biography",
|
| 7 |
+
"birthday_reminder",
|
| 8 |
+
"topics",
|
| 9 |
+
"genres",
|
| 10 |
+
"last_completed_content"
|
| 11 |
+
],
|
| 12 |
+
"interaction_features": ["last_content_info"],
|
| 13 |
+
"check_feasibility": [
|
| 14 |
+
"first_name",
|
| 15 |
+
"biography",
|
| 16 |
+
"birthday",
|
| 17 |
+
"topics",
|
| 18 |
+
"genres"
|
| 19 |
+
],
|
| 20 |
+
"AI_Jargon": ["elevate", "enhance", "reignite", "passion", "boost", "fuel", "thrill", "revive", "spark", "performing", "fresh", "tone"],
|
| 21 |
+
"header_limit": 30,
|
| 22 |
+
"message_limit": 110
|
| 23 |
+
}
|
| 24 |
+
|
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Start from a lightweight Python image
|
| 2 |
+
FROM python:3.9
|
| 3 |
+
|
| 4 |
+
# Set environment variables for better behavior in containers
|
| 5 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 6 |
+
PIP_NO_CACHE_DIR=1 \
|
| 7 |
+
|
| 8 |
+
# Create and set the working directory
|
| 9 |
+
WORKDIR /app
|
| 10 |
+
|
| 11 |
+
# Copy requirements and install Python dependencies
|
| 12 |
+
COPY requirements.txt .
|
| 13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# Copy the rest of the application code
|
| 16 |
+
COPY . .
|
| 17 |
+
|
| 18 |
+
# Expose the port that the application will listen on
|
| 19 |
+
EXPOSE 7860
|
| 20 |
+
|
| 21 |
+
# Run the Streamlit app
|
| 22 |
+
# Streamlit will read PORT from the environment and bind to 0.0.0.0
|
| 23 |
+
CMD streamlit run app.py --server.port=$PORT --server.headless true --server.address 0.0.0.0
|
Messaging_system/CoreConfig.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
the flow of the Program starts from create_personalized_message function
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import time
|
| 6 |
+
from Messaging_system.SnowFlakeConnection import SnowFlakeConn
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class CoreConfig:
|
| 10 |
+
"""
|
| 11 |
+
LLM-based personalized message generator:
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, session, users_df, brand, platform, config_file):
|
| 15 |
+
|
| 16 |
+
self.session = session
|
| 17 |
+
self.users_df = users_df
|
| 18 |
+
self.config_file = config_file
|
| 19 |
+
self.platform = platform # valid values: [push, app]
|
| 20 |
+
self.brand = brand
|
| 21 |
+
|
| 22 |
+
# LLM configs
|
| 23 |
+
self.api_key = None # will be set by user
|
| 24 |
+
self.model = "gpt-4o-mini" # will be set by user
|
| 25 |
+
|
| 26 |
+
# will be set by user
|
| 27 |
+
self.CTA = None
|
| 28 |
+
self.message_style = None
|
| 29 |
+
self.sample_example = None
|
| 30 |
+
self.template_message = self.CTA
|
| 31 |
+
self.segment_info = None
|
| 32 |
+
self.subsequence_messages = 1
|
| 33 |
+
|
| 34 |
+
self.number_of_samples = 0
|
| 35 |
+
self.list_of_features = None
|
| 36 |
+
self.consider_last_interaction = True
|
| 37 |
+
self.additional_instructions = None
|
| 38 |
+
|
| 39 |
+
# to trace the number of tokens and estimate the cost if needed
|
| 40 |
+
self.temp_token_counter = 0
|
| 41 |
+
self.total_tokens = {
|
| 42 |
+
'prompt_tokens': 0,
|
| 43 |
+
'completion_tokens': 0,
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
# Recsys_result
|
| 47 |
+
self.recsys_result = None
|
| 48 |
+
self.recsys_contents = ["song", "workout", "course", "quick_tips"]
|
| 49 |
+
self.content_info = None
|
| 50 |
+
self.involve_recsys_result = False
|
| 51 |
+
self.popular_contents_df = None
|
| 52 |
+
|
| 53 |
+
# Additional_info
|
| 54 |
+
self.additional_info_columns = None
|
| 55 |
+
self.messaging_mode = "message"
|
| 56 |
+
self.target_content = None
|
| 57 |
+
|
| 58 |
+
self.start_time = time.time()
|
| 59 |
+
self.remaining_tokens = None
|
| 60 |
+
self.wait_time = None
|
| 61 |
+
|
| 62 |
+
# Instantiate the connection to Snowflake
|
| 63 |
+
self.SF = SnowFlakeConn(session=self.session, brand=self.brand)
|
| 64 |
+
|
| 65 |
+
# segment name
|
| 66 |
+
self.segment_name = None
|
| 67 |
+
|
| 68 |
+
# --------------------------------------------------------------
|
| 69 |
+
# --------------------------------------------------------------
|
| 70 |
+
def set_message_style(self, message_style):
|
| 71 |
+
"""
|
| 72 |
+
Setting message style
|
| 73 |
+
:param message_style: a string with placeholders
|
| 74 |
+
:return:
|
| 75 |
+
"""
|
| 76 |
+
self.message_style = message_style
|
| 77 |
+
|
| 78 |
+
# --------------------------------------------------------------
|
| 79 |
+
# --------------------------------------------------------------
|
| 80 |
+
def set_involve_recsys_result(self, involve_recsys_result):
|
| 81 |
+
self.involve_recsys_result = involve_recsys_result
|
| 82 |
+
|
| 83 |
+
# --------------------------------------------------------------
|
| 84 |
+
# --------------------------------------------------------------
|
| 85 |
+
def set_recsys_contents(self, recsys_contents):
|
| 86 |
+
"""
|
| 87 |
+
setting recsys contents -> content types that we want to include in recommendations. Default value will contain all.
|
| 88 |
+
:param recsys_contents:
|
| 89 |
+
:return:
|
| 90 |
+
"""
|
| 91 |
+
self.recsys_contents = recsys_contents
|
| 92 |
+
|
| 93 |
+
# --------------------------------------------------------------
|
| 94 |
+
# --------------------------------------------------------------
|
| 95 |
+
def set_messaging_mode(self, messaging_mode):
|
| 96 |
+
"""
|
| 97 |
+
setting the messaging mode -> [recsys_result, message(default), recommend_playlist, recommend_content]
|
| 98 |
+
:param messaging_mode:
|
| 99 |
+
:return:
|
| 100 |
+
"""
|
| 101 |
+
valid_modes = ["recsys_result", "message", "recommend_playlist", "recommend_content"]
|
| 102 |
+
if messaging_mode in valid_modes:
|
| 103 |
+
self.messaging_mode = messaging_mode
|
| 104 |
+
else:
|
| 105 |
+
print(f"{messaging_mode} is not a valid messaging mode. available modes are: \n {valid_modes}")
|
| 106 |
+
|
| 107 |
+
# --------------------------------------------------------------
|
| 108 |
+
# --------------------------------------------------------------
|
| 109 |
+
def set_openai_api(self, openai_key):
|
| 110 |
+
"""
|
| 111 |
+
Setting openai key
|
| 112 |
+
:param openai_key: a string with placeholders
|
| 113 |
+
:return:
|
| 114 |
+
"""
|
| 115 |
+
self.api_key = openai_key
|
| 116 |
+
|
| 117 |
+
# --------------------------------------------------------------
|
| 118 |
+
# --------------------------------------------------------------
|
| 119 |
+
def set_number_of_samples(self, number_of_samples):
|
| 120 |
+
"""
|
| 121 |
+
Setting number_of_samples to generate messages
|
| 122 |
+
:param number_of_samples: int
|
| 123 |
+
"""
|
| 124 |
+
self.number_of_samples = int(number_of_samples)
|
| 125 |
+
|
| 126 |
+
# --------------------------------------------------------------
|
| 127 |
+
# --------------------------------------------------------------
|
| 128 |
+
def set_sample_example(self, sample_example):
|
| 129 |
+
"""
|
| 130 |
+
Setting sample_example for one-shot prompting
|
| 131 |
+
:param sample_example: a string with placeholders
|
| 132 |
+
:return:
|
| 133 |
+
"""
|
| 134 |
+
self.sample_example = sample_example
|
| 135 |
+
|
| 136 |
+
# --------------------------------------------------------------
|
| 137 |
+
# --------------------------------------------------------------
|
| 138 |
+
def set_CTA(self, CTA):
|
| 139 |
+
"""
|
| 140 |
+
Setting CTA (call to action), the main goal that we are sending the message.
|
| 141 |
+
:param CTA: a string with placeholders
|
| 142 |
+
:return:
|
| 143 |
+
"""
|
| 144 |
+
self.CTA = CTA
|
| 145 |
+
|
| 146 |
+
# --------------------------------------------------------------
|
| 147 |
+
# --------------------------------------------------------------
|
| 148 |
+
def set_segment_info(self, segment_info):
|
| 149 |
+
"""
|
| 150 |
+
Setting segment_info: information that is common between all the users in the segment
|
| 151 |
+
:param segment_info: a string with placeholders
|
| 152 |
+
:return:
|
| 153 |
+
"""
|
| 154 |
+
self.segment_info = segment_info
|
| 155 |
+
|
| 156 |
+
# --------------------------------------------------------------
|
| 157 |
+
# --------------------------------------------------------------
|
| 158 |
+
def set_additional_instructions(self, additional_instructions):
|
| 159 |
+
"""
|
| 160 |
+
setting additional_instructions
|
| 161 |
+
:param additional_instructions:
|
| 162 |
+
:return:
|
| 163 |
+
"""
|
| 164 |
+
self.additional_instructions = additional_instructions
|
| 165 |
+
|
| 166 |
+
# --------------------------------------------------------------
|
| 167 |
+
# --------------------------------------------------------------
|
| 168 |
+
def set_features_to_use(self, list_of_features):
|
| 169 |
+
"""
|
| 170 |
+
Setting list_of_features to use in the message
|
| 171 |
+
:param list_of_features: list of features to user
|
| 172 |
+
:return:
|
| 173 |
+
"""
|
| 174 |
+
self.list_of_features = list_of_features
|
| 175 |
+
|
| 176 |
+
# --------------------------------------------------------------
|
| 177 |
+
# --------------------------------------------------------------
|
| 178 |
+
def set_target_feature(self, target_content):
|
| 179 |
+
"""
|
| 180 |
+
Setting the target feature when we want to recommend a content from the input data (it should be a content_id)
|
| 181 |
+
:return:
|
| 182 |
+
"""
|
| 183 |
+
self.target_content = target_content
|
| 184 |
+
|
| 185 |
+
# --------------------------------------------------------------
|
| 186 |
+
# --------------------------------------------------------------
|
| 187 |
+
def set_number_of_messages(self, number_of_messages=1, instructionset=None):
|
| 188 |
+
"""
|
| 189 |
+
If the number of messages is more than 1, we will set self.subsequence_messages to a dictionary where
|
| 190 |
+
the key is an integer from 1 to number_of_messages, and the values are corresponding instructions in instructionset.
|
| 191 |
+
:param number_of_messages: int
|
| 192 |
+
:param instructionset: list of instructions
|
| 193 |
+
:return:
|
| 194 |
+
"""
|
| 195 |
+
if number_of_messages == 1:
|
| 196 |
+
self.subsequence_messages = {1: None}
|
| 197 |
+
else:
|
| 198 |
+
if instructionset is not None:
|
| 199 |
+
self.subsequence_messages = instructionset
|
| 200 |
+
else:
|
| 201 |
+
raise ValueError("Instructionset must have instructions for each subsequence message")
|
| 202 |
+
|
| 203 |
+
# --------------------------------------------------------------
|
| 204 |
+
# --------------------------------------------------------------
|
| 205 |
+
|
| 206 |
+
def get_instrument(self):
|
| 207 |
+
"""
|
| 208 |
+
get the instrument name based on the brand
|
| 209 |
+
:return: instrument (str)
|
| 210 |
+
"""
|
| 211 |
+
# map
|
| 212 |
+
switch_dict = {
|
| 213 |
+
"drumeo": "Drum",
|
| 214 |
+
"pianote": "Piano",
|
| 215 |
+
"guitareo": "Guitar",
|
| 216 |
+
"singeo": "Vocal"
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
return switch_dict[self.brand]
|
| 220 |
+
|
| 221 |
+
# --------------------------------------------------------------
|
| 222 |
+
# --------------------------------------------------------------
|
| 223 |
+
def respect_request_ratio(self):
|
| 224 |
+
"""
|
| 225 |
+
sleeping for a while to respect request rate ratio
|
| 226 |
+
:return:
|
| 227 |
+
"""
|
| 228 |
+
current_time = time.time()
|
| 229 |
+
delta = current_time - self.start_time
|
| 230 |
+
|
| 231 |
+
# Check token limits
|
| 232 |
+
if self.temp_token_counter > 3997000 and delta <= 60: # Using a safe margin
|
| 233 |
+
print("Sleeping for few seconds to respect the token limit...")
|
| 234 |
+
# reset the token counter
|
| 235 |
+
self.temp_token_counter = 0
|
| 236 |
+
self.start_time = time.time()
|
| 237 |
+
time.sleep(10) # Sleep for a while before making new requests
|
| 238 |
+
|
| 239 |
+
if delta > 60:
|
| 240 |
+
# reset the token counter
|
| 241 |
+
self.temp_token_counter = 0
|
| 242 |
+
self.start_time = time.time()
|
| 243 |
+
|
| 244 |
+
# --------------------------------------------------------------
|
| 245 |
+
# --------------------------------------------------------------
|
| 246 |
+
|
| 247 |
+
def checkpoint(self):
|
| 248 |
+
"""
|
| 249 |
+
saving the current process
|
| 250 |
+
:return:
|
| 251 |
+
"""
|
| 252 |
+
|
| 253 |
+
save_data = self.users_df[
|
| 254 |
+
["user_id", "email", "first_name", "message", "additional_info", "recommendation_info"]]
|
| 255 |
+
save_data.to_csv(f"drumeo_not_active_segment.csv", encoding='utf-8-sig', index=False)
|
| 256 |
+
|
| 257 |
+
# --------------------------------------------------------------
|
| 258 |
+
# --------------------------------------------------------------
|
| 259 |
+
def set_segment_name(self, segment_name):
|
| 260 |
+
"""
|
| 261 |
+
saving the current process
|
| 262 |
+
:return:
|
| 263 |
+
"""
|
| 264 |
+
|
| 265 |
+
self.segment_name = segment_name
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
|
Messaging_system/DataCollector.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
setting instructions and inputs required to generate personalized messages
|
| 3 |
+
"""
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class DataCollector:
|
| 9 |
+
|
| 10 |
+
def __init__(self, CoreConfig):
|
| 11 |
+
|
| 12 |
+
self.Core = CoreConfig
|
| 13 |
+
|
| 14 |
+
# -----------------------------------------------------------------
|
| 15 |
+
# -----------------------------------------------------------------
|
| 16 |
+
def gather_data(self):
|
| 17 |
+
"""
|
| 18 |
+
main function of the class to flow the work for gathering all the data that we need.
|
| 19 |
+
:return: updated users_df with extracted information
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
# extract user_ids and other data
|
| 23 |
+
self.extract_musora_id()
|
| 24 |
+
|
| 25 |
+
# selecting a sample of users
|
| 26 |
+
self.select_sample()
|
| 27 |
+
self.fetch_data()
|
| 28 |
+
|
| 29 |
+
# calculate the remaining days to their birthday
|
| 30 |
+
self.remaining_days_to_birthday()
|
| 31 |
+
self.create_columns()
|
| 32 |
+
|
| 33 |
+
# creating additional info if applicable
|
| 34 |
+
if len(self.Core.additional_info_columns) != 0:
|
| 35 |
+
self.create_additional_information()
|
| 36 |
+
|
| 37 |
+
return self.Core
|
| 38 |
+
|
| 39 |
+
# -----------------------------------------------------------------
|
| 40 |
+
# -----------------------------------------------------------------
|
| 41 |
+
def extract_musora_id(self):
|
| 42 |
+
"""
|
| 43 |
+
Extracts the musora user id and preserves additional columns.
|
| 44 |
+
"""
|
| 45 |
+
self.Core.users_df.columns = self.Core.users_df.columns.str.lower()
|
| 46 |
+
|
| 47 |
+
# Define valid columns in order of preference.
|
| 48 |
+
valid_columns = ['user_id', 'musora_user_id', 'id', 'email']
|
| 49 |
+
# Find the first valid column present in the DataFrame.
|
| 50 |
+
id_col = next((col for col in valid_columns if col in self.Core.users_df.columns), None)
|
| 51 |
+
|
| 52 |
+
if id_col is None:
|
| 53 |
+
raise Exception("Input data must contain user_id, musora_user_id, id, or email column.")
|
| 54 |
+
|
| 55 |
+
# Normalize the identification column to 'user_id'
|
| 56 |
+
if id_col in ['musora_user_id', 'id']:
|
| 57 |
+
self.Core.users_df.rename(columns={id_col: 'user_id'}, inplace=True)
|
| 58 |
+
elif id_col == 'email':
|
| 59 |
+
self.Core._lookup_user_ids_from_email()
|
| 60 |
+
|
| 61 |
+
# Identify additional columns: exclude identification columns
|
| 62 |
+
identification_columns = {'user_id', 'email'} if 'email' in self.Core.users_df.columns else {'user_id'}
|
| 63 |
+
additional_columns = [col for col in self.Core.users_df.columns if col not in identification_columns]
|
| 64 |
+
self.Core.additional_info_columns = [col.lower() for col in additional_columns]
|
| 65 |
+
|
| 66 |
+
# -----------------------------------------------------------------
|
| 67 |
+
# -----------------------------------------------------------------
|
| 68 |
+
def _lookup_user_ids_from_email(self):
|
| 69 |
+
"""
|
| 70 |
+
Looks up user IDs based on unique email addresses and merges the results
|
| 71 |
+
into self.users_df. Assumes self.users_df contains an 'email' column.
|
| 72 |
+
"""
|
| 73 |
+
unique_emails = self.Core.users_df["email"].unique()
|
| 74 |
+
data = self.Core.SF.extract_id_from_email(emails=unique_emails)
|
| 75 |
+
self.Core.users_df = pd.merge(self.Core.users_df, data, on='email', how='left')
|
| 76 |
+
|
| 77 |
+
# -----------------------------------------------------------------
|
| 78 |
+
# -----------------------------------------------------------------
|
| 79 |
+
def remaining_days_to_birthday(self):
|
| 80 |
+
"""
|
| 81 |
+
calculating the remaining days to the user's birthday
|
| 82 |
+
:return: updating users_df
|
| 83 |
+
"""
|
| 84 |
+
|
| 85 |
+
# Iterate through each row in the DataFrame
|
| 86 |
+
for idx, row in self.Core.users_df.iterrows():
|
| 87 |
+
if pd.notna(row.get("birthday")):
|
| 88 |
+
if int(row["birthday_reminder"]) <= 7:
|
| 89 |
+
remaining_days = int(row["birthday_reminder"])
|
| 90 |
+
self.Core.users_df.at[idx, "birthday_reminder"] = f"{remaining_days} days until student's birthday"
|
| 91 |
+
else:
|
| 92 |
+
self.Core.users_df.at[idx, "birthday_reminder"] = None
|
| 93 |
+
|
| 94 |
+
# -----------------------------------------------------------------
|
| 95 |
+
# -----------------------------------------------------------------
|
| 96 |
+
def fetch_data(self):
|
| 97 |
+
|
| 98 |
+
# Fetch datasets
|
| 99 |
+
user_ids = self.Core.users_df["user_id"].unique()
|
| 100 |
+
|
| 101 |
+
users_data = self.Core.SF.get_data("users", user_ids)
|
| 102 |
+
interactions_data = self.Core.SF.get_data("interactions",user_ids)
|
| 103 |
+
recsys_data = self.Core.SF.get_data("recsys", user_ids)
|
| 104 |
+
contents_data = self.Core.SF.get_data("contents")
|
| 105 |
+
popular_contents_data = self.Core.SF.get_data("popular_contents")
|
| 106 |
+
|
| 107 |
+
self.Core.users_df["user_id"] = self.Core.users_df["user_id"].astype(int)
|
| 108 |
+
interactions_data["user_id"] = interactions_data["user_id"].astype(int)
|
| 109 |
+
|
| 110 |
+
# Merge additional user details into the base dataframe (self.users_df)
|
| 111 |
+
# Assuming self.users_df already exists and contains a "USER_ID" column
|
| 112 |
+
self.Core.users_df = self.Core.users_df.merge(users_data, on="user_id", how="left", suffixes=("", "_users"))
|
| 113 |
+
self.Core.users_df = self.Core.users_df.merge(interactions_data, on="user_id", how="left",
|
| 114 |
+
suffixes=("", "_interactions"))
|
| 115 |
+
self.Core.users_df = self.Core.users_df.merge(recsys_data, on="user_id", how="left", suffixes=("", "_recsys"))
|
| 116 |
+
|
| 117 |
+
for col in self.Core.users_df.columns:
|
| 118 |
+
# Replace additional empty representations with np.nan
|
| 119 |
+
self.Core.users_df[col] = self.Core.users_df[col].replace(['', 'None', 'nan'], np.nan)
|
| 120 |
+
|
| 121 |
+
# Now drop rows where 'permission' is missing
|
| 122 |
+
self.Core.users_df.dropna(subset=["permission"], inplace=True)
|
| 123 |
+
|
| 124 |
+
self.Core.content_info = contents_data
|
| 125 |
+
# self.content_info['content_info'] = self.content_info['content_info'].str.replace(chr(10), '\n')
|
| 126 |
+
self.Core.popular_contents_df = popular_contents_data
|
| 127 |
+
|
| 128 |
+
# -----------------------------------------------------------------
|
| 129 |
+
# -----------------------------------------------------------------
|
| 130 |
+
def create_columns(self):
|
| 131 |
+
"""
|
| 132 |
+
Creating user profile based on available information, and adding additional columns for messages
|
| 133 |
+
:return: updates users_df
|
| 134 |
+
"""
|
| 135 |
+
|
| 136 |
+
# adding new columns, initially with none values
|
| 137 |
+
# self.users_df["user_info"] = self.users_df["user_profile"] # represent users
|
| 138 |
+
self.Core.users_df["message"] = None # will contain the final message
|
| 139 |
+
self.Core.users_df["source"] = None # [AI-generated]
|
| 140 |
+
self.Core.users_df["prompt"] = None # will contain final prompt
|
| 141 |
+
self.Core.users_df["instrument"] = self.Core.get_instrument()
|
| 142 |
+
self.Core.users_df["platform"] = self.Core.platform
|
| 143 |
+
self.Core.users_df["segment_name"] = self.Core.segment_name
|
| 144 |
+
|
| 145 |
+
# -------------------------------------------------------------
|
| 146 |
+
# -------------------------------------------------------------
|
| 147 |
+
def create_additional_information(self):
|
| 148 |
+
"""
|
| 149 |
+
providing additional input and instructions based on available columns in the input file
|
| 150 |
+
:return: instructions
|
| 151 |
+
"""
|
| 152 |
+
self.Core.users_df["additional_info"] = None
|
| 153 |
+
|
| 154 |
+
# Iterate through each row in the DataFrame
|
| 155 |
+
for idx, row in self.Core.users_df.iterrows():
|
| 156 |
+
additional_info = []
|
| 157 |
+
|
| 158 |
+
# populating additional_info
|
| 159 |
+
for feature in self.Core.additional_info_columns:
|
| 160 |
+
value = row.get(feature)
|
| 161 |
+
if pd.notna(value) and value not in [None, [], {}] and (
|
| 162 |
+
not isinstance(value, str) or value.strip()):
|
| 163 |
+
additional_info.append(f"{feature}: {str(value)}")
|
| 164 |
+
|
| 165 |
+
self.Core.users_df.at[idx, "additional_info"] = "\n".join(additional_info)
|
| 166 |
+
|
| 167 |
+
# -----------------------------------------------------------------
|
| 168 |
+
# -----------------------------------------------------------------
|
| 169 |
+
def select_sample(self, sample_size=None):
|
| 170 |
+
"""
|
| 171 |
+
Select a sample of the input users.
|
| 172 |
+
:param sample_size: Number of users to select (default to 20).
|
| 173 |
+
:return: DataFrame containing the selected sample.
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
# Use self.number_of_samples if sample_size is None, otherwise default to 20
|
| 177 |
+
if sample_size is None:
|
| 178 |
+
sample_size = self.Core.number_of_samples if self.Core.number_of_samples is not None else 20
|
| 179 |
+
|
| 180 |
+
total_users = self.Core.users_df.shape[0]
|
| 181 |
+
sample_size = min(total_users, sample_size)
|
| 182 |
+
self.Core.users_df = self.Core.users_df.sample(n=sample_size, replace=False)
|
Messaging_system/LLMR.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This class is a LLM based recommender that can choose the perfect content for the user given user profile and our goal
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import openai
|
| 9 |
+
from openai import OpenAI
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
import time
|
| 12 |
+
import streamlit as st
|
| 13 |
+
from tqdm import tqdm
|
| 14 |
+
|
| 15 |
+
load_dotenv()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# -----------------------------------------------------------------------
|
| 19 |
+
class LLMR:
|
| 20 |
+
|
| 21 |
+
def __init__(self, CoreConfig):
|
| 22 |
+
|
| 23 |
+
self.Core = CoreConfig
|
| 24 |
+
self.user = None
|
| 25 |
+
self.selected_content_ids = [] # will be populated for each user
|
| 26 |
+
|
| 27 |
+
def get_recommendations(self, progress_callback):
|
| 28 |
+
"""
|
| 29 |
+
selecting the recommended content for each user
|
| 30 |
+
:return:
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
self.Core.users_df["recommendation"] = None
|
| 34 |
+
self.Core.users_df["recommendation_info"] = None
|
| 35 |
+
total_users = len(self.Core.users_df)
|
| 36 |
+
|
| 37 |
+
st.write("Choosing the best content to recommend ... ")
|
| 38 |
+
|
| 39 |
+
self.Core.start_time = time.time()
|
| 40 |
+
for progress, (idx, row) in enumerate(
|
| 41 |
+
tqdm(self.Core.users_df.iterrows(), desc="Selecting the best content to recommend ...")):
|
| 42 |
+
# if we have a prompt to generate a personalized message
|
| 43 |
+
# Update progress if callback is provided
|
| 44 |
+
if progress_callback is not None:
|
| 45 |
+
progress_callback(progress, total_users)
|
| 46 |
+
|
| 47 |
+
self.user = row
|
| 48 |
+
content_id, content_info, recsys_json, token = self._get_recommendation()
|
| 49 |
+
|
| 50 |
+
if content_id is None: # error in selecting a content to recommend
|
| 51 |
+
continue
|
| 52 |
+
|
| 53 |
+
else:
|
| 54 |
+
# updating tokens
|
| 55 |
+
self.Core.total_tokens['prompt_tokens'] += int(token['prompt_tokens'])
|
| 56 |
+
self.Core.total_tokens['completion_tokens'] += int(token['completion_tokens'])
|
| 57 |
+
self.Core.temp_token_counter = int(token['prompt_tokens']) + int(token['completion_tokens'])
|
| 58 |
+
self.Core.users_df.at[idx, "recommendation"] = content_id
|
| 59 |
+
self.Core.users_df.at[idx, "recommendation_info"] = content_info
|
| 60 |
+
self.Core.users_df.at[idx, "recsys_result"] = recsys_json
|
| 61 |
+
self.Core.respect_request_ratio()
|
| 62 |
+
|
| 63 |
+
return self.Core
|
| 64 |
+
|
| 65 |
+
# --------------------------------------------------------------
|
| 66 |
+
# --------------------------------------------------------------
|
| 67 |
+
def _get_recommendation(self):
|
| 68 |
+
"""
|
| 69 |
+
select and return the recommendation from the available list of contents
|
| 70 |
+
:return: content_id
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
prompt, recsys_json = self._generate_prompt()
|
| 74 |
+
if prompt is None:
|
| 75 |
+
return None, None, None, None
|
| 76 |
+
|
| 77 |
+
else:
|
| 78 |
+
content_id, tokens = self.get_llm_response(prompt)
|
| 79 |
+
if content_id == 0:
|
| 80 |
+
# was not able to receive a recommendation
|
| 81 |
+
return None, None, None, None
|
| 82 |
+
else:
|
| 83 |
+
content_info = self._get_content_info(content_id)
|
| 84 |
+
return content_id, content_info, recsys_json, tokens
|
| 85 |
+
|
| 86 |
+
# --------------------------------------------------------------
|
| 87 |
+
# --------------------------------------------------------------
|
| 88 |
+
|
| 89 |
+
def _generate_prompt(self):
|
| 90 |
+
"""
|
| 91 |
+
Generates the prompts for given user in order to choose the recommendation from the available list
|
| 92 |
+
:param user:
|
| 93 |
+
:return:
|
| 94 |
+
"""
|
| 95 |
+
available_contents, recsys_json = self._get_available_contents()
|
| 96 |
+
if available_contents.strip() == "": # no item to recommend
|
| 97 |
+
return None
|
| 98 |
+
|
| 99 |
+
# Getting different part of the prompts
|
| 100 |
+
input_context = self._input_context()
|
| 101 |
+
user_info = self._get_user_profile()
|
| 102 |
+
task = self._task_instructions()
|
| 103 |
+
output_instruction = self._output_instruction()
|
| 104 |
+
|
| 105 |
+
prompt = f"""
|
| 106 |
+
### Context:
|
| 107 |
+
{input_context}
|
| 108 |
+
|
| 109 |
+
### User Information:
|
| 110 |
+
{user_info}
|
| 111 |
+
|
| 112 |
+
### Available Contents:
|
| 113 |
+
{available_contents}
|
| 114 |
+
|
| 115 |
+
### Main Task:
|
| 116 |
+
{task}
|
| 117 |
+
|
| 118 |
+
### Output Instructions:
|
| 119 |
+
{output_instruction}
|
| 120 |
+
"""
|
| 121 |
+
|
| 122 |
+
return prompt, recsys_json
|
| 123 |
+
|
| 124 |
+
# --------------------------------------------------------------
|
| 125 |
+
# --------------------------------------------------------------
|
| 126 |
+
def _input_context(self):
|
| 127 |
+
"""
|
| 128 |
+
:return: input instructions as a string
|
| 129 |
+
"""
|
| 130 |
+
|
| 131 |
+
context = f"""
|
| 132 |
+
You are a helpful assistant at Musora, an online music education platform that helps users learn music. Your goal is to choose a perfect content to recommend to the user given the information that we have from the user and available contents to recommend.
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
return context
|
| 136 |
+
|
| 137 |
+
# --------------------------------------------------------------
|
| 138 |
+
# --------------------------------------------------------------
|
| 139 |
+
def _system_instructions(self):
|
| 140 |
+
"""
|
| 141 |
+
(Optional) A helper function that defines high-level system context for certain LLMs.
|
| 142 |
+
For example, if your LLM endpoint supports messages in the form of role='system'.
|
| 143 |
+
"""
|
| 144 |
+
return (
|
| 145 |
+
"You are a helpful recommendation assistant at Musora, an online music education platform. "
|
| 146 |
+
"Use the provided user information and content details to choose the best content to recommend. "
|
| 147 |
+
"Make sure to follow the instructions precisely and only return the chosen content_id as JSON."
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# --------------------------------------------------------------
|
| 151 |
+
# --------------------------------------------------------------
|
| 152 |
+
def _task_instructions(self):
|
| 153 |
+
"""
|
| 154 |
+
creating the instructions about the task
|
| 155 |
+
:return: task
|
| 156 |
+
"""
|
| 157 |
+
|
| 158 |
+
task = """
|
| 159 |
+
- You must select exactly ONE content from the 'Available Contents' to recommend.
|
| 160 |
+
- Base your decision on the User information and focus on providing the most relevant recommendation.
|
| 161 |
+
- Do not recommended content where the topic is focused on a specific Gear (e.g. YAMAHA)
|
| 162 |
+
- Provide the content_id of the recommended content in the output based on Output instructions.
|
| 163 |
+
"""
|
| 164 |
+
|
| 165 |
+
return task
|
| 166 |
+
|
| 167 |
+
# --------------------------------------------------------------
|
| 168 |
+
# --------------------------------------------------------------
|
| 169 |
+
def _get_user_profile(self):
|
| 170 |
+
"""
|
| 171 |
+
getting user's goal and user's last completed content to use for choosing the recommended content
|
| 172 |
+
:return:
|
| 173 |
+
"""
|
| 174 |
+
|
| 175 |
+
last_completed_content = self._get_user_data(attribute="last_completed_content")
|
| 176 |
+
user_info = self._get_user_data(attribute="user_info")
|
| 177 |
+
|
| 178 |
+
recommendation_info = f"""
|
| 179 |
+
**User information and preferences:**
|
| 180 |
+
|
| 181 |
+
{user_info}
|
| 182 |
+
|
| 183 |
+
**Previous completed content:**
|
| 184 |
+
{last_completed_content}
|
| 185 |
+
"""
|
| 186 |
+
|
| 187 |
+
return recommendation_info
|
| 188 |
+
|
| 189 |
+
# --------------------------------------------------------------
|
| 190 |
+
# --------------------------------------------------------------
|
| 191 |
+
def _get_user_data(self, attribute):
|
| 192 |
+
"""
|
| 193 |
+
get user's information for the requested attribute
|
| 194 |
+
:param user:
|
| 195 |
+
:return: user_info
|
| 196 |
+
"""
|
| 197 |
+
|
| 198 |
+
# Previous interaction
|
| 199 |
+
if pd.notna(self.user[attribute]) and self.user[attribute] not in [
|
| 200 |
+
None, [], {}] and (not isinstance(self.user[attribute], str) or self.user[attribute].strip()):
|
| 201 |
+
user_info = self.user[attribute]
|
| 202 |
+
else:
|
| 203 |
+
user_info = "Not Available"
|
| 204 |
+
|
| 205 |
+
return user_info
|
| 206 |
+
|
| 207 |
+
# --------------------------------------------------------------
|
| 208 |
+
# --------------------------------------------------------------
|
| 209 |
+
|
| 210 |
+
def _get_user_recommendation(self):
|
| 211 |
+
|
| 212 |
+
recsys_json = self.user["recsys_result"]
|
| 213 |
+
|
| 214 |
+
try:
|
| 215 |
+
recsys_data = json.loads(recsys_json)
|
| 216 |
+
# Sections to process
|
| 217 |
+
sections = self.Core.recsys_contents
|
| 218 |
+
|
| 219 |
+
# Check if none of the sections are present in recsys_data --> cold start scenario
|
| 220 |
+
if not any(section in recsys_data for section in sections):
|
| 221 |
+
popular_content = self.Core.popular_contents_df.iloc[0][f"popular_content"]
|
| 222 |
+
return popular_content
|
| 223 |
+
else:
|
| 224 |
+
return recsys_json
|
| 225 |
+
except:
|
| 226 |
+
popular_content = self.Core.popular_contents_df.iloc[0][f"popular_content"]
|
| 227 |
+
return popular_content
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
# --------------------------------------------------------------
|
| 231 |
+
# --------------------------------------------------------------
|
| 232 |
+
def _get_available_contents(self):
|
| 233 |
+
|
| 234 |
+
# Get the user ID
|
| 235 |
+
recsys_json = self._get_user_recommendation()
|
| 236 |
+
recsys_data = json.loads(recsys_json)
|
| 237 |
+
|
| 238 |
+
# Sections to process
|
| 239 |
+
sections = self.Core.recsys_contents
|
| 240 |
+
|
| 241 |
+
# Collect selected content_ids
|
| 242 |
+
selected_content_ids = []
|
| 243 |
+
|
| 244 |
+
for section in sections:
|
| 245 |
+
if section in recsys_data:
|
| 246 |
+
# Get the list of recommendations in this section
|
| 247 |
+
recs = recsys_data[section]
|
| 248 |
+
# Sort by recommendation_rank (ascending order)
|
| 249 |
+
recs_sorted = sorted(recs, key=lambda x: x['recommendation_rank'])
|
| 250 |
+
# Select top 3 recommendations
|
| 251 |
+
top_recs = recs_sorted[:3]
|
| 252 |
+
# Get the content_ids
|
| 253 |
+
content_ids = [rec['content_id'] for rec in top_recs]
|
| 254 |
+
# Append to the list
|
| 255 |
+
selected_content_ids.extend(content_ids)
|
| 256 |
+
# Fetch content info for the selected content_ids
|
| 257 |
+
content_info_rows = self.Core.content_info[self.Core.content_info['content_id'].isin(selected_content_ids)]
|
| 258 |
+
|
| 259 |
+
# Create a mapping from CONTENT_ID to CONTENT_INFO
|
| 260 |
+
content_info_map = dict(zip(content_info_rows['content_id'], content_info_rows['content_info']))
|
| 261 |
+
|
| 262 |
+
# Assemble the text in a structured way using a list
|
| 263 |
+
lines = []
|
| 264 |
+
for content_id in selected_content_ids:
|
| 265 |
+
# Retrieve the content_info (which may include multi-line text)
|
| 266 |
+
content_info = content_info_map.get(content_id, "No content info found")
|
| 267 |
+
|
| 268 |
+
# Append the structured lines without extra spaces
|
| 269 |
+
lines.append(f"**content_id**: {content_id}")
|
| 270 |
+
lines.append("**content_info**:")
|
| 271 |
+
lines.append(content_info) # this line may already contain internal newlines
|
| 272 |
+
lines.append("") # blank line for separation
|
| 273 |
+
|
| 274 |
+
# Join all lines into a single text string with newline characters
|
| 275 |
+
text = "\n".join(lines)
|
| 276 |
+
|
| 277 |
+
self.selected_content_ids = selected_content_ids
|
| 278 |
+
|
| 279 |
+
return text, recsys_json
|
| 280 |
+
|
| 281 |
+
# --------------------------------------------------------------
|
| 282 |
+
# --------------------------------------------------------------
|
| 283 |
+
|
| 284 |
+
def _get_content_info(self, content_id):
|
| 285 |
+
"""
|
| 286 |
+
getting content_info for the recommended content
|
| 287 |
+
:param content_id:
|
| 288 |
+
:return:
|
| 289 |
+
"""
|
| 290 |
+
|
| 291 |
+
content_info_row = self.Core.content_info[self.Core.content_info['content_id'] == content_id]
|
| 292 |
+
content_info = content_info_row['content_info'].iloc[0]
|
| 293 |
+
|
| 294 |
+
return content_info
|
| 295 |
+
|
| 296 |
+
# --------------------------------------------------------------
|
| 297 |
+
# --------------------------------------------------------------
|
| 298 |
+
def is_valid_content_id(self, content_id):
|
| 299 |
+
"""
|
| 300 |
+
check if the llm respond is a valid content_id
|
| 301 |
+
:param content_id:
|
| 302 |
+
:return:
|
| 303 |
+
"""
|
| 304 |
+
|
| 305 |
+
if content_id in self.selected_content_ids:
|
| 306 |
+
return True
|
| 307 |
+
else:
|
| 308 |
+
return False
|
| 309 |
+
|
| 310 |
+
# --------------------------------------------------------------
|
| 311 |
+
# --------------------------------------------------------------
|
| 312 |
+
def _output_instruction(self):
|
| 313 |
+
"""
|
| 314 |
+
:return: output instructions as a string
|
| 315 |
+
"""
|
| 316 |
+
|
| 317 |
+
instructions = f"""
|
| 318 |
+
Return the content_id of the final recommendation in **JSON** format with the following structure:
|
| 319 |
+
|
| 320 |
+
{{
|
| 321 |
+
"content_id": "content_id of the recommended content from Available Contents, as an integer",
|
| 322 |
+
}}
|
| 323 |
+
|
| 324 |
+
Do not include any additional keys or text outside the JSON.
|
| 325 |
+
"""
|
| 326 |
+
|
| 327 |
+
return instructions
|
| 328 |
+
|
| 329 |
+
def get_llm_response(self, prompt, max_retries=4):
|
| 330 |
+
"""
|
| 331 |
+
sending the prompt to the LLM and get back the response
|
| 332 |
+
"""
|
| 333 |
+
|
| 334 |
+
openai.api_key = self.Core.api_key
|
| 335 |
+
instructions = self._system_instructions()
|
| 336 |
+
client = OpenAI(api_key=self.Core.api_key)
|
| 337 |
+
|
| 338 |
+
for attempt in range(max_retries):
|
| 339 |
+
try:
|
| 340 |
+
response = client.chat.completions.create(
|
| 341 |
+
model=self.Core.model,
|
| 342 |
+
response_format={"type": "json_object"},
|
| 343 |
+
messages=[
|
| 344 |
+
{"role": "system", "content": instructions},
|
| 345 |
+
{"role": "user", "content": prompt}
|
| 346 |
+
],
|
| 347 |
+
max_tokens=20,
|
| 348 |
+
n=1,
|
| 349 |
+
temperature=0.7
|
| 350 |
+
)
|
| 351 |
+
|
| 352 |
+
tokens = {
|
| 353 |
+
'prompt_tokens': response.usage.prompt_tokens,
|
| 354 |
+
'completion_tokens': response.usage.completion_tokens,
|
| 355 |
+
'total_tokens': response.usage.total_tokens
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
try:
|
| 359 |
+
content = response.choices[0].message.content
|
| 360 |
+
|
| 361 |
+
# Extract JSON code block
|
| 362 |
+
|
| 363 |
+
output = json.loads(content)
|
| 364 |
+
|
| 365 |
+
if 'content_id' in output and self.is_valid_content_id(int(output['content_id'])):
|
| 366 |
+
return int(output['content_id']), tokens
|
| 367 |
+
|
| 368 |
+
else:
|
| 369 |
+
print(f"'content_id' missing or invalid in response on attempt {attempt + 1}. Retrying...")
|
| 370 |
+
continue # Continue to next attempt
|
| 371 |
+
|
| 372 |
+
except json.JSONDecodeError:
|
| 373 |
+
print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
|
| 374 |
+
|
| 375 |
+
except openai.APIConnectionError as e:
|
| 376 |
+
print("The server could not be reached")
|
| 377 |
+
print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
| 378 |
+
except openai.RateLimitError as e:
|
| 379 |
+
print("A 429 status code was received; we should back off a bit.")
|
| 380 |
+
except openai.APIStatusError as e:
|
| 381 |
+
print("Another non-200-range status code was received")
|
| 382 |
+
print(e.status_code)
|
| 383 |
+
print(e.response)
|
| 384 |
+
|
| 385 |
+
print("Max retries exceeded. Returning empty response.")
|
| 386 |
+
return 0, 0
|
Messaging_system/Message_generator.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
THis class will generate message or messages based on the number of requested.
|
| 3 |
+
"""
|
| 4 |
+
import json
|
| 5 |
+
import time
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
from Messaging_system.MultiMessage import MultiMessage
|
| 11 |
+
from Messaging_system.protection_layer import ProtectionLayer
|
| 12 |
+
import openai
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class MessageGenerator:
|
| 16 |
+
|
| 17 |
+
def __init__(self, CoreConfig):
|
| 18 |
+
self.Core = CoreConfig
|
| 19 |
+
|
| 20 |
+
# --------------------------------------------------------------
|
| 21 |
+
# --------------------------------------------------------------
|
| 22 |
+
def generate_messages(self, progress_callback):
|
| 23 |
+
"""
|
| 24 |
+
generating messages based on prompts for each user
|
| 25 |
+
:return: updating message column for each user
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
total_users = len(self.Core.users_df)
|
| 29 |
+
st.write("Generating messages ... ")
|
| 30 |
+
|
| 31 |
+
self.Core.start_time = time.time()
|
| 32 |
+
for progress, (idx, row) in enumerate(tqdm(self.Core.users_df.iterrows(), desc="generating messages")):
|
| 33 |
+
# if we have a prompt to generate a personalized message
|
| 34 |
+
# Update progress if callback is provided
|
| 35 |
+
if progress_callback is not None:
|
| 36 |
+
progress_callback(progress, total_users)
|
| 37 |
+
|
| 38 |
+
if row["prompt"] is not None:
|
| 39 |
+
first_message = self.get_llm_response(row["prompt"])
|
| 40 |
+
|
| 41 |
+
if first_message is not None:
|
| 42 |
+
# adding protection layer
|
| 43 |
+
protect = ProtectionLayer(config_file=self.Core.config_file,
|
| 44 |
+
messaging_mode=self.Core.messaging_mode)
|
| 45 |
+
message, total_tokens = protect.criticize(message=first_message, user=row)
|
| 46 |
+
|
| 47 |
+
# updating tokens
|
| 48 |
+
self.Core.total_tokens['prompt_tokens'] += total_tokens['prompt_tokens']
|
| 49 |
+
self.Core.total_tokens['completion_tokens'] += total_tokens['completion_tokens']
|
| 50 |
+
self.Core.temp_token_counter += total_tokens['prompt_tokens'] + total_tokens['completion_tokens']
|
| 51 |
+
|
| 52 |
+
# double check output structure
|
| 53 |
+
if isinstance(message, dict) and "message" in message and isinstance(message["message"], str):
|
| 54 |
+
# parsing output result
|
| 55 |
+
message = self.parsing_output_message(message, row)
|
| 56 |
+
self.Core.users_df.at[idx, "message"] = message
|
| 57 |
+
row["message"] = message
|
| 58 |
+
else:
|
| 59 |
+
self.Core.users_df.at[idx, "message"] = None
|
| 60 |
+
self.Core.checkpoint()
|
| 61 |
+
self.Core.respect_request_ratio()
|
| 62 |
+
else:
|
| 63 |
+
self.Core.users_df.at[idx, "message"] = None
|
| 64 |
+
|
| 65 |
+
# generating subsequence messages if needed:
|
| 66 |
+
if isinstance(self.Core.subsequence_messages, dict) and len(self.Core.subsequence_messages.keys()) > 1 and \
|
| 67 |
+
self.Core.users_df.at[idx, "message"] is not None and row["message"] is not None:
|
| 68 |
+
MM = MultiMessage(self.Core)
|
| 69 |
+
message = MM.generate_multi_messages(row)
|
| 70 |
+
self.Core.users_df.at[idx, "message"] = message
|
| 71 |
+
|
| 72 |
+
else:
|
| 73 |
+
# ---------------------------------------------------------
|
| 74 |
+
# SINGLE-MESSAGE path
|
| 75 |
+
# ---------------------------------------------------------
|
| 76 |
+
single_msg = row["message"] or self.Core.users_df.at[idx, "message"]
|
| 77 |
+
if single_msg is not None:
|
| 78 |
+
# If the single message is still a JSON string, turn it into a dict first
|
| 79 |
+
if isinstance(single_msg, str):
|
| 80 |
+
try:
|
| 81 |
+
single_msg = json.loads(single_msg)
|
| 82 |
+
except json.JSONDecodeError:
|
| 83 |
+
# leave it as-is if it’s not valid JSON
|
| 84 |
+
pass
|
| 85 |
+
|
| 86 |
+
msg_wrapper = {"messages_sequence": [single_msg]}
|
| 87 |
+
# Again, store a proper JSON string
|
| 88 |
+
self.Core.users_df.at[idx, "message"] = json.dumps(msg_wrapper,
|
| 89 |
+
ensure_ascii=False)
|
| 90 |
+
|
| 91 |
+
else:
|
| 92 |
+
self.Core.users_df.at[idx, "message"] = None
|
| 93 |
+
|
| 94 |
+
return self.Core
|
| 95 |
+
|
| 96 |
+
# --------------------------------------------------------------
|
| 97 |
+
# --------------------------------------------------------------
|
| 98 |
+
def parsing_output_message(self, message, user):
|
| 99 |
+
"""
|
| 100 |
+
Parses the output JSON from the LLM and enriches it with additional content information if needed.
|
| 101 |
+
|
| 102 |
+
:param message: Output JSON from LLM (expected to have at least a "message" key)
|
| 103 |
+
:param user: The user row
|
| 104 |
+
:return: Parsed and enriched output as a JSON object
|
| 105 |
+
"""
|
| 106 |
+
if self.Core.involve_recsys_result:
|
| 107 |
+
output_message = self.fetch_recommendation_data(user, message)
|
| 108 |
+
elif self.Core.messaging_mode == "recommend_playlist":
|
| 109 |
+
# adding playlist url to the message
|
| 110 |
+
if "playlist_id" in message and "message" in message:
|
| 111 |
+
playlist_id = str(message["playlist_id"])
|
| 112 |
+
web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
|
| 113 |
+
# Add these to the message dict
|
| 114 |
+
output_message = {
|
| 115 |
+
"header": message["header"],
|
| 116 |
+
"message": message["message"],
|
| 117 |
+
"playlist_id": int(message["playlist_id"]),
|
| 118 |
+
"web_url_path": web_url_path,
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
else:
|
| 122 |
+
# Only "message" is expected when involve_recsys_result is False and we are not recommending any other content from input
|
| 123 |
+
if "message" not in message or "header" not in message:
|
| 124 |
+
print("LLM output is missing 'message'.")
|
| 125 |
+
return None
|
| 126 |
+
output_message = {"header": message["header"], "message": message["message"]}
|
| 127 |
+
|
| 128 |
+
return json.dumps(output_message, ensure_ascii=False)
|
| 129 |
+
|
| 130 |
+
# --------------------------------------------------------------
|
| 131 |
+
# --------------------------------------------------------------
|
| 132 |
+
def fetch_recommendation_data(self, user, message):
|
| 133 |
+
|
| 134 |
+
user_id = user["user_id"]
|
| 135 |
+
content_id = int(user["recommendation"])
|
| 136 |
+
recsys_json_str = user["recsys_result"]
|
| 137 |
+
recsys_data = json.loads(recsys_json_str)
|
| 138 |
+
|
| 139 |
+
# Initialize variables to store found item and category
|
| 140 |
+
found_item = None
|
| 141 |
+
|
| 142 |
+
# Search through all categories in the recsys data
|
| 143 |
+
for category, items in recsys_data.items():
|
| 144 |
+
for item in items:
|
| 145 |
+
if item.get("content_id") == content_id:
|
| 146 |
+
found_item = item
|
| 147 |
+
break # Exit inner loop if item is found
|
| 148 |
+
if found_item:
|
| 149 |
+
break # Exit outer loop if item is found
|
| 150 |
+
|
| 151 |
+
if not found_item:
|
| 152 |
+
print(f"content_id {content_id} not found in recsys_data for user_id {user_id}.")
|
| 153 |
+
return None
|
| 154 |
+
|
| 155 |
+
# Extract required fields from found_item
|
| 156 |
+
web_url_path = found_item.get("web_url_path")
|
| 157 |
+
title = found_item.get("title")
|
| 158 |
+
thumbnail_url = found_item.get("thumbnail_url")
|
| 159 |
+
|
| 160 |
+
message["message"].replace('\\', '').replace('"', '')
|
| 161 |
+
|
| 162 |
+
# Add these to the message dict
|
| 163 |
+
output_message = {
|
| 164 |
+
"header": message.get("header"),
|
| 165 |
+
"message": message.get("message"),
|
| 166 |
+
"content_id": content_id,
|
| 167 |
+
"web_url_path": web_url_path,
|
| 168 |
+
"title": title,
|
| 169 |
+
"thumbnail_url": thumbnail_url
|
| 170 |
+
}
|
| 171 |
+
return output_message
|
| 172 |
+
|
| 173 |
+
# --------------------------------------------------------------
|
| 174 |
+
# --------------------------------------------------------------
|
| 175 |
+
|
| 176 |
+
def get_llm_response(self, prompt, max_retries=4):
|
| 177 |
+
"""
|
| 178 |
+
sending the prompt to the LLM and get back the response
|
| 179 |
+
"""
|
| 180 |
+
|
| 181 |
+
openai.api_key = self.Core.api_key
|
| 182 |
+
instructions = self.llm_instructions()
|
| 183 |
+
client = OpenAI(api_key=self.Core.api_key)
|
| 184 |
+
|
| 185 |
+
for attempt in range(max_retries):
|
| 186 |
+
try:
|
| 187 |
+
response = client.chat.completions.create(
|
| 188 |
+
model=self.Core.model,
|
| 189 |
+
response_format={"type": "json_object"},
|
| 190 |
+
messages=[
|
| 191 |
+
{"role": "system", "content": instructions},
|
| 192 |
+
{"role": "user", "content": prompt}
|
| 193 |
+
],
|
| 194 |
+
max_tokens=500,
|
| 195 |
+
n=1,
|
| 196 |
+
temperature=0.6
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
tokens = {
|
| 200 |
+
'prompt_tokens': response.usage.prompt_tokens,
|
| 201 |
+
'completion_tokens': response.usage.completion_tokens,
|
| 202 |
+
'total_tokens': response.usage.total_tokens
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
try:
|
| 206 |
+
content = response.choices[0].message.content
|
| 207 |
+
|
| 208 |
+
# Extract JSON code block
|
| 209 |
+
|
| 210 |
+
output = json.loads(content)
|
| 211 |
+
# output = json.loads(response.choices[0].message.content)
|
| 212 |
+
|
| 213 |
+
if 'message' not in output or 'header' not in output:
|
| 214 |
+
print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
|
| 215 |
+
continue # Continue to next attempt
|
| 216 |
+
|
| 217 |
+
else:
|
| 218 |
+
if len(output["header"].strip()) > self.Core.config_file["header_limit"] or len(
|
| 219 |
+
output["message"].strip()) > self.Core.config_file["message_limit"]:
|
| 220 |
+
print(
|
| 221 |
+
f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
|
| 222 |
+
continue
|
| 223 |
+
|
| 224 |
+
# validating the JSON
|
| 225 |
+
self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
|
| 226 |
+
self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
|
| 227 |
+
self.Core.temp_token_counter += tokens['total_tokens']
|
| 228 |
+
return output
|
| 229 |
+
|
| 230 |
+
except json.JSONDecodeError:
|
| 231 |
+
print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
|
| 232 |
+
|
| 233 |
+
except openai.APIConnectionError as e:
|
| 234 |
+
print("The server could not be reached")
|
| 235 |
+
print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
| 236 |
+
except openai.RateLimitError as e:
|
| 237 |
+
print("A 429 status code was received; we should back off a bit.")
|
| 238 |
+
except openai.APIStatusError as e:
|
| 239 |
+
print("Another non-200-range status code was received")
|
| 240 |
+
print(e.status_code)
|
| 241 |
+
print(e.response)
|
| 242 |
+
|
| 243 |
+
print("Max retries exceeded. Returning empty response.")
|
| 244 |
+
return None
|
| 245 |
+
|
| 246 |
+
# --------------------------------------------------------------
|
| 247 |
+
# --------------------------------------------------------------
|
| 248 |
+
def llm_instructions(self):
|
| 249 |
+
"""
|
| 250 |
+
Setting instructions for llm
|
| 251 |
+
:return: instructions as string
|
| 252 |
+
"""
|
| 253 |
+
|
| 254 |
+
# set LLM initial instruction
|
| 255 |
+
instructions = """You are an AI assistant that receives information of a music student and generate personalized
|
| 256 |
+
motivation message. """
|
| 257 |
+
|
| 258 |
+
return instructions
|
Messaging_system/MultiMessage.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import time
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
from Messaging_system.protection_layer import ProtectionLayer
|
| 5 |
+
import openai
|
| 6 |
+
|
| 7 |
+
class MultiMessage:
|
| 8 |
+
def __init__(self, CoreConfig):
|
| 9 |
+
"""
|
| 10 |
+
Class that generates a sequence of messages (multi-step push notifications)
|
| 11 |
+
for each user, building on previously generated messages.
|
| 12 |
+
"""
|
| 13 |
+
self.Core = CoreConfig
|
| 14 |
+
|
| 15 |
+
# --------------------------------------------------------------
|
| 16 |
+
def generate_multi_messages(self, user):
|
| 17 |
+
"""
|
| 18 |
+
Generates multiple messages per user, storing them in a single JSON structure.
|
| 19 |
+
The first message is assumed to already exist in user["message"].
|
| 20 |
+
Subsequent messages are generated by referencing all previously generated ones.
|
| 21 |
+
|
| 22 |
+
:param user: A row (dictionary-like) containing user data and the first message.
|
| 23 |
+
:return: JSON string containing the entire sequence of messages
|
| 24 |
+
(or None if something goes wrong).
|
| 25 |
+
"""
|
| 26 |
+
# 1) Get the first message if it exists
|
| 27 |
+
first_message_str = user.get("message", None)
|
| 28 |
+
if not first_message_str:
|
| 29 |
+
print("No initial message found; cannot build a multi-message sequence.")
|
| 30 |
+
return None
|
| 31 |
+
|
| 32 |
+
# Parse the first message as JSON
|
| 33 |
+
try:
|
| 34 |
+
first_message_dict = json.loads(first_message_str)
|
| 35 |
+
except (json.JSONDecodeError, TypeError):
|
| 36 |
+
print("Could not parse the first message as JSON. Returning None.")
|
| 37 |
+
return None
|
| 38 |
+
|
| 39 |
+
# Start our sequence with the first message
|
| 40 |
+
message_sequence = [first_message_dict]
|
| 41 |
+
|
| 42 |
+
# We'll reuse the same ProtectionLayer
|
| 43 |
+
protect = ProtectionLayer(
|
| 44 |
+
config_file=self.Core.config_file,
|
| 45 |
+
messaging_mode=self.Core.messaging_mode
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# If user requested multiple messages, generate the rest
|
| 49 |
+
# number_of_messages is the *total* number of messages requested
|
| 50 |
+
total_to_generate = len(self.Core.subsequence_messages.keys())
|
| 51 |
+
|
| 52 |
+
# Already have the first message, so generate the next (n-1) messages
|
| 53 |
+
for step in range(2, total_to_generate + 1):
|
| 54 |
+
# 2) Generate the next message referencing all so-far messages
|
| 55 |
+
next_msg_raw = self.generate_next_messages(message_sequence, step)
|
| 56 |
+
if not next_msg_raw:
|
| 57 |
+
print(f"Could not generate the message for step {step}. Stopping.")
|
| 58 |
+
break
|
| 59 |
+
|
| 60 |
+
# 3) Pass it through the protection layer
|
| 61 |
+
criticized_msg, tokens_used = protect.criticize(
|
| 62 |
+
message=next_msg_raw,
|
| 63 |
+
user=user
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Update token usage stats
|
| 67 |
+
self.Core.total_tokens['prompt_tokens'] += tokens_used['prompt_tokens']
|
| 68 |
+
self.Core.total_tokens['completion_tokens'] += tokens_used['completion_tokens']
|
| 69 |
+
self.Core.temp_token_counter += tokens_used['prompt_tokens'] + tokens_used['completion_tokens']
|
| 70 |
+
|
| 71 |
+
# 4) Parse & validate the next message (we do the same as the single-message pipeline)
|
| 72 |
+
parsed_output_str = self.parsing_output_message(criticized_msg, user)
|
| 73 |
+
if not parsed_output_str:
|
| 74 |
+
print(f"Parsing output failed for step {step}. Stopping.")
|
| 75 |
+
break
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
parsed_output_dict = json.loads(parsed_output_str)
|
| 79 |
+
except json.JSONDecodeError:
|
| 80 |
+
print(f"Could not parse the new message as JSON for step {step}. Stopping.")
|
| 81 |
+
break
|
| 82 |
+
|
| 83 |
+
# Add this next message to our sequence
|
| 84 |
+
message_sequence.append(parsed_output_dict)
|
| 85 |
+
|
| 86 |
+
# 5) Return the entire sequence so it can be stored back in the DataFrame or elsewhere
|
| 87 |
+
final_structure = {"messages_sequence": message_sequence}
|
| 88 |
+
return json.dumps(final_structure, ensure_ascii=False)
|
| 89 |
+
|
| 90 |
+
# --------------------------------------------------------------
|
| 91 |
+
def generate_next_messages(self, previous_messages, step):
|
| 92 |
+
"""
|
| 93 |
+
Uses all previously generated messages to produce the next message.
|
| 94 |
+
Returns a *raw* dictionary (header, message, etc.) from the LLM.
|
| 95 |
+
|
| 96 |
+
:param previous_messages: A list of dicts, each containing at least "header" and "message".
|
| 97 |
+
:return: A dictionary from LLM (with 'header' and 'message'), or None if generation fails.
|
| 98 |
+
"""
|
| 99 |
+
# 1) Build a prompt that includes all previous messages
|
| 100 |
+
prompt = self.generate_prompt(previous_messages, step)
|
| 101 |
+
# 2) Call our existing LLM routine (identical to the one in MessageGenerator)
|
| 102 |
+
response_dict = self.get_llm_response(prompt)
|
| 103 |
+
return response_dict
|
| 104 |
+
|
| 105 |
+
# --------------------------------------------------------------
|
| 106 |
+
def generate_prompt(self, previous_messages, step):
|
| 107 |
+
"""
|
| 108 |
+
Creates a prompt to feed to the LLM, incorporating all previously generated messages.
|
| 109 |
+
|
| 110 |
+
:param previous_messages: A list of dicts, each containing 'header' and 'message'.
|
| 111 |
+
:return: A user-facing prompt string instructing the model to produce a new message.
|
| 112 |
+
"""
|
| 113 |
+
# Build a textual summary of previous messages
|
| 114 |
+
previous_text = []
|
| 115 |
+
for i, m in enumerate(previous_messages, start=1):
|
| 116 |
+
header = m.get("header", "").strip()
|
| 117 |
+
body = m.get("message", "").strip()
|
| 118 |
+
previous_text.append(f"Message {i}: (Header) {header}\n (Body) {body}")
|
| 119 |
+
|
| 120 |
+
# Combine into a single string
|
| 121 |
+
previous_text_str = "\n\n".join(previous_text)
|
| 122 |
+
|
| 123 |
+
# Provide constraints for our next push notification
|
| 124 |
+
header_limit = self.Core.config_file.get("header_limit", 50)
|
| 125 |
+
message_limit = self.Core.config_file.get("message_limit", 200)
|
| 126 |
+
|
| 127 |
+
# Craft the prompt
|
| 128 |
+
prompt = f"""
|
| 129 |
+
We have previously sent these push notifications to the user:
|
| 130 |
+
{previous_text_str}
|
| 131 |
+
|
| 132 |
+
The user has still not re-engaged. Generate the *next* push notification to motivate the user
|
| 133 |
+
to return and continue their music learning.
|
| 134 |
+
|
| 135 |
+
Constraints:
|
| 136 |
+
- "header" must be fewer than {header_limit} characters.
|
| 137 |
+
- "message" must be fewer than {message_limit} characters.
|
| 138 |
+
- Output must be valid JSON with exactly two keys: "header" and "message".
|
| 139 |
+
- Do NOT repeat the exact same wording as prior messages; keep the same overall style.
|
| 140 |
+
- The user is a music student who hasn't been active recently.
|
| 141 |
+
|
| 142 |
+
Tune:
|
| 143 |
+
- {self.Core.subsequence_messages[step]}
|
| 144 |
+
|
| 145 |
+
Return only JSON of the form:
|
| 146 |
+
{{
|
| 147 |
+
"header": "...",
|
| 148 |
+
"message": "..."
|
| 149 |
+
}}
|
| 150 |
+
""".strip()
|
| 151 |
+
|
| 152 |
+
return prompt
|
| 153 |
+
|
| 154 |
+
# --------------------------------------------------------------
|
| 155 |
+
def parsing_output_message(self, message, user):
|
| 156 |
+
"""
|
| 157 |
+
Parses the output JSON from the LLM and enriches it with additional content
|
| 158 |
+
information if needed (e.g., from recsys). Re-uses the logic from the single-message
|
| 159 |
+
pipeline to keep the results consistent.
|
| 160 |
+
|
| 161 |
+
:param message: Output JSON *dictionary* from the LLM (with at least "message" and "header").
|
| 162 |
+
:param user: The user row dictionary.
|
| 163 |
+
:return: A valid JSON string or None if the structure is invalid.
|
| 164 |
+
"""
|
| 165 |
+
if self.Core.involve_recsys_result:
|
| 166 |
+
# If recsys is used, fetch recommendation data
|
| 167 |
+
output_message = self.fetch_recommendation_data(user, message)
|
| 168 |
+
elif self.Core.messaging_mode == "recommend_playlist":
|
| 169 |
+
# If recommending a playlist, add the relevant fields
|
| 170 |
+
if "playlist_id" in message and "message" in message:
|
| 171 |
+
playlist_id = str(message["playlist_id"])
|
| 172 |
+
web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
|
| 173 |
+
output_message = {
|
| 174 |
+
"header": message.get("header", ""),
|
| 175 |
+
"message": message.get("message", ""),
|
| 176 |
+
"playlist_id": int(message["playlist_id"]),
|
| 177 |
+
"web_url_path": web_url_path,
|
| 178 |
+
}
|
| 179 |
+
else:
|
| 180 |
+
print("LLM output is missing either 'playlist_id' or 'message'.")
|
| 181 |
+
return None
|
| 182 |
+
else:
|
| 183 |
+
# Basic scenario: Only 'header' and 'message' expected
|
| 184 |
+
if "message" not in message or "header" not in message:
|
| 185 |
+
print("LLM output is missing 'header' or 'message'.")
|
| 186 |
+
return None
|
| 187 |
+
output_message = {
|
| 188 |
+
"header": message["header"],
|
| 189 |
+
"message": message["message"]
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
return json.dumps(output_message, ensure_ascii=False)
|
| 193 |
+
|
| 194 |
+
# --------------------------------------------------------------
|
| 195 |
+
def fetch_recommendation_data(self, user, message):
|
| 196 |
+
"""
|
| 197 |
+
Extracts recommendation data from user's recsys_result and merges it into the given
|
| 198 |
+
message dictionary. Identical to single-message usage.
|
| 199 |
+
|
| 200 |
+
:param user: The user row (with 'recsys_result', 'recommendation', etc.).
|
| 201 |
+
:param message: Dictionary with at least "header" and "message".
|
| 202 |
+
:return: Enriched dict (header, message, content_id, web_url_path, title, thumbnail_url)
|
| 203 |
+
"""
|
| 204 |
+
user_id = user["user_id"]
|
| 205 |
+
content_id = int(user["recommendation"])
|
| 206 |
+
recsys_json_str = user["recsys_result"]
|
| 207 |
+
recsys_data = json.loads(recsys_json_str)
|
| 208 |
+
|
| 209 |
+
# Initialize variable to store found item
|
| 210 |
+
found_item = None
|
| 211 |
+
for category, items in recsys_data.items():
|
| 212 |
+
for item in items:
|
| 213 |
+
if item.get("content_id") == content_id:
|
| 214 |
+
found_item = item
|
| 215 |
+
break
|
| 216 |
+
if found_item:
|
| 217 |
+
break
|
| 218 |
+
|
| 219 |
+
if not found_item:
|
| 220 |
+
print(f"content_id {content_id} not found in recsys_data for user_id {user_id}.")
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
+
web_url_path = found_item.get("web_url_path")
|
| 224 |
+
title = found_item.get("title")
|
| 225 |
+
thumbnail_url = found_item.get("thumbnail_url")
|
| 226 |
+
|
| 227 |
+
# Construct final dictionary
|
| 228 |
+
output_message = {
|
| 229 |
+
"header": message.get("header"),
|
| 230 |
+
"message": message.get("message", "").replace('\\', '').replace('"', ''),
|
| 231 |
+
"content_id": content_id,
|
| 232 |
+
"web_url_path": web_url_path,
|
| 233 |
+
"title": title,
|
| 234 |
+
"thumbnail_url": thumbnail_url
|
| 235 |
+
}
|
| 236 |
+
return output_message
|
| 237 |
+
|
| 238 |
+
# --------------------------------------------------------------
|
| 239 |
+
def get_llm_response(self, prompt, max_retries=4):
|
| 240 |
+
"""
|
| 241 |
+
Calls the LLM (similar to MessageGenerator) with the prompt, returning a dict
|
| 242 |
+
with keys like 'header' and 'message' if successful, or None otherwise.
|
| 243 |
+
|
| 244 |
+
:param prompt: The text prompt for the LLM.
|
| 245 |
+
:param max_retries: Number of retries for potential LLM/connection failures.
|
| 246 |
+
:return: Dictionary with 'header' and 'message', or None if unsuccessful.
|
| 247 |
+
"""
|
| 248 |
+
openai.api_key = self.Core.api_key
|
| 249 |
+
instructions = self.llm_instructions()
|
| 250 |
+
client = OpenAI(api_key=self.Core.api_key)
|
| 251 |
+
|
| 252 |
+
for attempt in range(max_retries):
|
| 253 |
+
try:
|
| 254 |
+
response = client.chat.completions.create(
|
| 255 |
+
model=self.Core.model,
|
| 256 |
+
response_format={"type": "json_object"},
|
| 257 |
+
messages=[
|
| 258 |
+
{"role": "system", "content": instructions},
|
| 259 |
+
{"role": "user", "content": prompt}
|
| 260 |
+
],
|
| 261 |
+
max_tokens=500,
|
| 262 |
+
n=1,
|
| 263 |
+
temperature=0.6
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
tokens = {
|
| 267 |
+
'prompt_tokens': response.usage.prompt_tokens,
|
| 268 |
+
'completion_tokens': response.usage.completion_tokens,
|
| 269 |
+
'total_tokens': response.usage.total_tokens
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
try:
|
| 273 |
+
content = response.choices[0].message.content
|
| 274 |
+
output = json.loads(content)
|
| 275 |
+
|
| 276 |
+
# Validate output keys
|
| 277 |
+
if 'message' not in output or 'header' not in output:
|
| 278 |
+
print(f"'message' or 'header' missing in response (attempt {attempt+1}). Retrying...")
|
| 279 |
+
continue
|
| 280 |
+
|
| 281 |
+
# Check character length constraints
|
| 282 |
+
if (len(output["header"].strip()) > self.Core.config_file["header_limit"] or
|
| 283 |
+
len(output["message"].strip()) > self.Core.config_file["message_limit"]):
|
| 284 |
+
print(f"Header or message exceeded character limits (attempt {attempt+1}). Retrying...")
|
| 285 |
+
continue
|
| 286 |
+
|
| 287 |
+
# If we're good here, update token usage
|
| 288 |
+
self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
|
| 289 |
+
self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
|
| 290 |
+
self.Core.temp_token_counter += tokens['total_tokens']
|
| 291 |
+
|
| 292 |
+
return output
|
| 293 |
+
|
| 294 |
+
except json.JSONDecodeError:
|
| 295 |
+
print(f"Invalid JSON from LLM (attempt {attempt+1}). Retrying...")
|
| 296 |
+
|
| 297 |
+
except openai.APIConnectionError as e:
|
| 298 |
+
print("The server could not be reached")
|
| 299 |
+
print(e.__cause__)
|
| 300 |
+
except openai.RateLimitError as e:
|
| 301 |
+
print("Received a 429 status code; backing off might be needed.")
|
| 302 |
+
except openai.APIStatusError as e:
|
| 303 |
+
print("A non-200 status code was received")
|
| 304 |
+
print(e.status_code)
|
| 305 |
+
print(e.response)
|
| 306 |
+
|
| 307 |
+
print("Max retries exceeded. Returning None.")
|
| 308 |
+
return None
|
| 309 |
+
|
| 310 |
+
# --------------------------------------------------------------
|
| 311 |
+
def llm_instructions(self):
|
| 312 |
+
"""
|
| 313 |
+
System instructions for the LLM, focusing on generating motivational messages
|
| 314 |
+
for a returning music student. Extended or adapted as needed.
|
| 315 |
+
|
| 316 |
+
:return: A string with top-level instructions for the model.
|
| 317 |
+
"""
|
| 318 |
+
instructions = """
|
| 319 |
+
You are an AI assistant helping to create push notification messages for a music student
|
| 320 |
+
who has not been active recently. Each new message should build on previously sent
|
| 321 |
+
messages. Provide short, motivational text that encourages the user to come back.
|
| 322 |
+
Ensure the final output is valid JSON with keys "header" and "message."
|
| 323 |
+
""".strip()
|
| 324 |
+
return instructions
|
Messaging_system/Permes.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
the flow of the Program starts from create_personalized_message function
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
import time
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
from Messaging_system.DataCollector import DataCollector
|
| 9 |
+
from Messaging_system.CoreConfig import CoreConfig
|
| 10 |
+
from Messaging_system.LLMR import LLMR
|
| 11 |
+
import streamlit as st
|
| 12 |
+
from Messaging_system.Message_generator import MessageGenerator
|
| 13 |
+
from Messaging_system.PromptGenerator import PromptGenerator
|
| 14 |
+
from Messaging_system.SnowFlakeConnection import SnowFlakeConn
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class Permes:
|
| 19 |
+
"""
|
| 20 |
+
LLM-based personalized message generator:
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def create_personalize_messages(self, session, users, brand, config_file, openai_api_key, CTA, segment_info,
|
| 24 |
+
platform="push", number_of_messages=1, instructionset=None,
|
| 25 |
+
message_style=None, selected_input_features=None, selected_source_features=None
|
| 26 |
+
, recsys_contents=None,
|
| 27 |
+
additional_instructions=None, identifier_column="user_id",
|
| 28 |
+
sample_example=None, number_of_samples=None, involve_recsys_result=False,
|
| 29 |
+
messaging_mode="message", target_column=None, ongoing_df=None,
|
| 30 |
+
progress_callback=None, segment_name="no_recent_activity"):
|
| 31 |
+
"""
|
| 32 |
+
creating personalized messages for the input users given the parameters for both app and push platform.
|
| 33 |
+
:param session: snowflake connection object
|
| 34 |
+
:param users: users dataframe
|
| 35 |
+
:param brand
|
| 36 |
+
:param config_file
|
| 37 |
+
:param openai_api_key
|
| 38 |
+
:param CTA: call to action for the messages
|
| 39 |
+
:param segment_info: common information about the users
|
| 40 |
+
:param message_style: style of the message
|
| 41 |
+
:param sample_example: a sample for one shot prompting
|
| 42 |
+
:return:
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
# primary processing
|
| 46 |
+
users = self.identify_users(users_df=users, identifier_column=identifier_column)
|
| 47 |
+
|
| 48 |
+
if selected_input_features is None:
|
| 49 |
+
selected_input_features = []
|
| 50 |
+
selected_input_features.append(identifier_column)
|
| 51 |
+
else:
|
| 52 |
+
if identifier_column not in selected_input_features and selected_input_features is not None:
|
| 53 |
+
selected_input_features.append(identifier_column.upper())
|
| 54 |
+
users = users[selected_input_features]
|
| 55 |
+
|
| 56 |
+
personalize_message = CoreConfig(session=session,
|
| 57 |
+
users_df=users,
|
| 58 |
+
brand=brand,
|
| 59 |
+
platform=platform,
|
| 60 |
+
config_file=config_file)
|
| 61 |
+
|
| 62 |
+
personalize_message.set_CTA(CTA)
|
| 63 |
+
personalize_message.set_segment_info(segment_info)
|
| 64 |
+
personalize_message.set_openai_api(openai_api_key)
|
| 65 |
+
personalize_message.set_segment_name(segment_name=segment_name)
|
| 66 |
+
personalize_message.set_number_of_messages(number_of_messages=number_of_messages, instructionset=instructionset)
|
| 67 |
+
|
| 68 |
+
if message_style: # Check if message_style is not empty
|
| 69 |
+
personalize_message.set_message_style(message_style)
|
| 70 |
+
|
| 71 |
+
if sample_example: # Check if sample_example is not empty
|
| 72 |
+
personalize_message.set_sample_example(sample_example)
|
| 73 |
+
|
| 74 |
+
if additional_instructions:
|
| 75 |
+
personalize_message.set_additional_instructions(additional_instructions)
|
| 76 |
+
|
| 77 |
+
if number_of_samples:
|
| 78 |
+
personalize_message.set_number_of_samples(number_of_samples)
|
| 79 |
+
|
| 80 |
+
if selected_source_features:
|
| 81 |
+
personalize_message.set_features_to_use(selected_source_features)
|
| 82 |
+
|
| 83 |
+
if involve_recsys_result:
|
| 84 |
+
personalize_message.set_messaging_mode("recsys_result")
|
| 85 |
+
personalize_message.set_involve_recsys_result(involve_recsys_result)
|
| 86 |
+
|
| 87 |
+
if target_column:
|
| 88 |
+
personalize_message.set_target_feature(target_column)
|
| 89 |
+
|
| 90 |
+
if messaging_mode != "message":
|
| 91 |
+
personalize_message.set_messaging_mode(messaging_mode)
|
| 92 |
+
|
| 93 |
+
if recsys_contents:
|
| 94 |
+
personalize_message.set_recsys_contents(recsys_contents)
|
| 95 |
+
|
| 96 |
+
users_df = self._create_personalized_message(CoreConfig=personalize_message, progress_callback=progress_callback)
|
| 97 |
+
|
| 98 |
+
total_prompt_tokens = personalize_message.total_tokens["prompt_tokens"]
|
| 99 |
+
total_completion_tokens = personalize_message.total_tokens["completion_tokens"]
|
| 100 |
+
|
| 101 |
+
total_cost = ((total_prompt_tokens / 1000000) * 0.15) + (
|
| 102 |
+
(total_completion_tokens / 1000000) * 0.6) # Cost calculation estimation
|
| 103 |
+
print(f"Estimated Cost (USD): {total_cost:.5f}")
|
| 104 |
+
|
| 105 |
+
# Storing process can also happen after some evaluation steps
|
| 106 |
+
# snowflake_conn = SnowFlakeConn(session=session, brand=brand)
|
| 107 |
+
# query = snowflake_conn.generate_write_sql_query(table_name="AI_generated_messages", dataframe=users_df)
|
| 108 |
+
# snowflake_conn.run_write_query(query=query, table_name="AI_generated_messages", dataframe=users_df)
|
| 109 |
+
# snowflake_conn.close_connection()
|
| 110 |
+
|
| 111 |
+
return users_df
|
| 112 |
+
|
| 113 |
+
# -----------------------------------------------------
|
| 114 |
+
def identify_users(self, users_df, identifier_column):
|
| 115 |
+
"""
|
| 116 |
+
specifying the users for identification
|
| 117 |
+
:param identifier_column:
|
| 118 |
+
:return: updated users
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
if identifier_column.upper() == "EMAIL":
|
| 122 |
+
return users_df
|
| 123 |
+
else:
|
| 124 |
+
users_df.rename(columns={identifier_column: "USER_ID"}, inplace=True)
|
| 125 |
+
return users_df
|
| 126 |
+
|
| 127 |
+
# ------------------------------------------------------------------
|
| 128 |
+
def _create_personalized_message(self, CoreConfig, progress_callback):
|
| 129 |
+
"""
|
| 130 |
+
main function of the class to flow the work between functions inorder to create personalized messages.
|
| 131 |
+
:return: updated users_df with extracted information and personalize messages.
|
| 132 |
+
"""
|
| 133 |
+
# Collecting all the data that we need to personalize messages
|
| 134 |
+
datacollect = DataCollector(CoreConfig)
|
| 135 |
+
CoreConfig = datacollect.gather_data()
|
| 136 |
+
|
| 137 |
+
# generating recommendations for users, if we want to include recommendations in the message
|
| 138 |
+
if CoreConfig.involve_recsys_result:
|
| 139 |
+
Recommender = LLMR(CoreConfig)
|
| 140 |
+
CoreConfig = Recommender.get_recommendations(progress_callback)
|
| 141 |
+
|
| 142 |
+
# generating proper prompt for each user
|
| 143 |
+
prompt = PromptGenerator(CoreConfig)
|
| 144 |
+
CoreConfig = prompt.generate_prompts()
|
| 145 |
+
|
| 146 |
+
# generating messages for each user
|
| 147 |
+
message_generator = MessageGenerator(CoreConfig)
|
| 148 |
+
CoreConfig = message_generator.generate_messages(progress_callback)
|
| 149 |
+
|
| 150 |
+
# Eliminating rows where we don't have a valid message (null, empty, or whitespace only)
|
| 151 |
+
CoreConfig.users_df = CoreConfig.users_df[CoreConfig.users_df["message"].str.strip().astype(bool)]
|
| 152 |
+
CoreConfig.checkpoint()
|
| 153 |
+
|
| 154 |
+
# closing snowflake connection
|
| 155 |
+
# CoreConfig.session.close()
|
| 156 |
+
|
| 157 |
+
return CoreConfig.users_df
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
|
Messaging_system/PromptGenerator.py
ADDED
|
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
THis class generate proper prompts for the messaging system
|
| 3 |
+
"""
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class PromptGenerator:
|
| 9 |
+
|
| 10 |
+
def __init__(self, Core):
|
| 11 |
+
self.Core = Core
|
| 12 |
+
|
| 13 |
+
# --------------------------------------------------------------
|
| 14 |
+
# --------------------------------------------------------------
|
| 15 |
+
def generate_prompts(self):
|
| 16 |
+
"""
|
| 17 |
+
generates a personalized message for each student
|
| 18 |
+
:return:
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
# if we have personalized information about them, we generate a personalized prompt
|
| 22 |
+
for idx, row in tqdm(self.Core.users_df.iterrows(), desc="generating prompts"):
|
| 23 |
+
# check if we have enough information to generate a personalized message
|
| 24 |
+
prompt = self.generate_personalized_prompt(user=row)
|
| 25 |
+
# message = self.call_llm(prompt)
|
| 26 |
+
self.Core.users_df.at[idx, "prompt"] = prompt
|
| 27 |
+
self.Core.users_df.at[idx, "source"] = "AI-generated"
|
| 28 |
+
|
| 29 |
+
return self.Core
|
| 30 |
+
|
| 31 |
+
# --------------------------------------------------------------
|
| 32 |
+
def safe_get(self, value):
|
| 33 |
+
return str(value) if pd.notna(value) else "Not available"
|
| 34 |
+
|
| 35 |
+
# ==============================================================
|
| 36 |
+
def get_user_profile(self, user):
|
| 37 |
+
|
| 38 |
+
additional_info = self.user_additional_info(user)
|
| 39 |
+
|
| 40 |
+
user_info = f"""
|
| 41 |
+
### **User Information:**
|
| 42 |
+
|
| 43 |
+
Here is the information about the user:
|
| 44 |
+
{self.safe_get(self.Core.segment_info)}
|
| 45 |
+
|
| 46 |
+
**User profile:**
|
| 47 |
+
first name: {self.safe_get(user.get("first_name"))}
|
| 48 |
+
{self.safe_get(user.get("user_info"))}
|
| 49 |
+
last completed content: {self.safe_get(user.get("last_completed_content"))}
|
| 50 |
+
{self.safe_get(additional_info)}
|
| 51 |
+
Weeks since Last interaction:{self.safe_get(user.get("weeks_since_last_interaction"))}
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
return user_info
|
| 55 |
+
|
| 56 |
+
# --------------------------------------------------------------
|
| 57 |
+
def generate_personalized_prompt(self, user):
|
| 58 |
+
"""
|
| 59 |
+
generate a personalized prompt by putting the information from the user into a template prompt
|
| 60 |
+
:return: Personalized prompt (string)
|
| 61 |
+
"""
|
| 62 |
+
context = self.input_context()
|
| 63 |
+
cta = self.CTA_instructions()
|
| 64 |
+
|
| 65 |
+
if self.Core.involve_recsys_result or self.Core.target_content is not None:
|
| 66 |
+
if user["recommendation"] is not None or user["recommendation_info"] is not None:
|
| 67 |
+
recommendations_instructions = self.recommendations_instructions(user=user) + "\n"
|
| 68 |
+
else:
|
| 69 |
+
recommendations_instructions = ""
|
| 70 |
+
|
| 71 |
+
user_info = self.get_user_profile(user=user)
|
| 72 |
+
|
| 73 |
+
personalize_message_instructions = self.personalize_message_instructions(user)
|
| 74 |
+
|
| 75 |
+
output_instructions = self.output_instruction()
|
| 76 |
+
|
| 77 |
+
task_instructions = self.task_instructions()
|
| 78 |
+
|
| 79 |
+
prompt = f"""
|
| 80 |
+
{context}
|
| 81 |
+
{cta}
|
| 82 |
+
|
| 83 |
+
{personalize_message_instructions}
|
| 84 |
+
{recommendations_instructions}
|
| 85 |
+
{task_instructions}
|
| 86 |
+
|
| 87 |
+
{user_info}
|
| 88 |
+
{output_instructions}
|
| 89 |
+
"""
|
| 90 |
+
|
| 91 |
+
return prompt
|
| 92 |
+
|
| 93 |
+
# --------------------------------------------------------------
|
| 94 |
+
# --------------------------------------------------------------
|
| 95 |
+
def input_context(self):
|
| 96 |
+
"""
|
| 97 |
+
:return: input instructions as a string
|
| 98 |
+
"""
|
| 99 |
+
|
| 100 |
+
context = f""" You are a helpful assistant at Musora, an online music education platform that helps users
|
| 101 |
+
learn music. Your goal is to generate a fully personalized message specifically tailored to the user, to increase
|
| 102 |
+
their engagement with the message.
|
| 103 |
+
|
| 104 |
+
"""
|
| 105 |
+
|
| 106 |
+
return context
|
| 107 |
+
|
| 108 |
+
# --------------------------------------------------------------
|
| 109 |
+
# --------------------------------------------------------------
|
| 110 |
+
def CTA_instructions(self):
|
| 111 |
+
"""
|
| 112 |
+
define CTA instructions
|
| 113 |
+
:return: CTA instructions (str)
|
| 114 |
+
"""
|
| 115 |
+
|
| 116 |
+
instructions = f"""
|
| 117 |
+
Create a clear header, and a message considering the call to action we want the user to hear from us:
|
| 118 |
+
|
| 119 |
+
**Call to Action:**
|
| 120 |
+
- **{self.Core.CTA}** \n
|
| 121 |
+
"""
|
| 122 |
+
|
| 123 |
+
return instructions
|
| 124 |
+
|
| 125 |
+
# --------------------------------------------------------------
|
| 126 |
+
# --------------------------------------------------------------
|
| 127 |
+
def user_additional_info(self, user):
|
| 128 |
+
"""
|
| 129 |
+
providing additional information given in the input data
|
| 130 |
+
:param user:
|
| 131 |
+
:return:
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
if pd.notna(user["additional_info"]) and user["additional_info"] not in [None, [], {}] and (
|
| 135 |
+
not isinstance(user["additional_info"], str) or user["additional_info"].strip()):
|
| 136 |
+
additional_info = user["additional_info"]
|
| 137 |
+
else:
|
| 138 |
+
additional_info = ""
|
| 139 |
+
|
| 140 |
+
return additional_info
|
| 141 |
+
|
| 142 |
+
# --------------------------------------------------------------
|
| 143 |
+
# --------------------------------------------------------------
|
| 144 |
+
def recommendations_instructions(self, user):
|
| 145 |
+
"""
|
| 146 |
+
instructions about target recommendation for the user
|
| 147 |
+
:param user:
|
| 148 |
+
:return:
|
| 149 |
+
"""
|
| 150 |
+
|
| 151 |
+
instructions_for_recsys = f"""
|
| 152 |
+
### ** Recommendations instructions **:
|
| 153 |
+
Below is the content that we want to recommend to the user:
|
| 154 |
+
|
| 155 |
+
Recommended content: {user["recommendation_info"]}
|
| 156 |
+
|
| 157 |
+
- Use the **CONTENT_TITLE** naturally in the message if capable, but do not use the exact title verbatim or put it in quotes.
|
| 158 |
+
- Naturally mention the **CONTENT_TYPE** for course, workout, and quicktips if capable.
|
| 159 |
+
- If the recommended content has an **Artist** with a known full name, use the ** FULL NAME ** naturally in the message if capable. If only the first name of the Artist is available, ** DO NOT ** use it at all.
|
| 160 |
+
"""
|
| 161 |
+
|
| 162 |
+
# need to adjust
|
| 163 |
+
instructions_for_target_content = """
|
| 164 |
+
- Considering the information about the user, and the content that we want to recommend, include the **TITLE** inside single quotes, or use the title naturally without the exact title name and quotes if capable.
|
| 165 |
+
Naturally mention the **CONTENT_TYPE** for course, workout, quicktips if capable and shortly provide a reasoning why the content is helpful for them.
|
| 166 |
+
|
| 167 |
+
**Target recommended Content**:
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
instructions = ""
|
| 171 |
+
|
| 172 |
+
if self.Core.involve_recsys_result:
|
| 173 |
+
instructions += f"""
|
| 174 |
+
{instructions_for_recsys}
|
| 175 |
+
"""
|
| 176 |
+
|
| 177 |
+
elif self.Core.target_content is not None:
|
| 178 |
+
# fetching the information related to the target content from content_table
|
| 179 |
+
target_info = self.get_target_content_info(user)
|
| 180 |
+
instructions += f"""
|
| 181 |
+
{instructions_for_target_content}
|
| 182 |
+
{target_info}
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
+
return instructions
|
| 186 |
+
|
| 187 |
+
# --------------------------------------------------------------
|
| 188 |
+
# --------------------------------------------------------------
|
| 189 |
+
def get_target_content_info(self, user):
|
| 190 |
+
"""
|
| 191 |
+
fetching information about the target content that we want to recommend to the user
|
| 192 |
+
:param user: target user
|
| 193 |
+
:return:
|
| 194 |
+
"""
|
| 195 |
+
|
| 196 |
+
# checking that user[self.target_content] contains a content_id:
|
| 197 |
+
target_id = int(user[self.Core.target_content])
|
| 198 |
+
|
| 199 |
+
try:
|
| 200 |
+
|
| 201 |
+
# fetching the data for target content (self.target_content column in user)
|
| 202 |
+
content_info_row = self.Core.content_info.loc[self.Core.content_info['content_id'] == target_id]
|
| 203 |
+
|
| 204 |
+
text = f"""
|
| 205 |
+
**content_id** : {str(content_info_row["content_id"])}"
|
| 206 |
+
**content_info** : \n {content_info_row["content_info"]} \n\n"
|
| 207 |
+
"""
|
| 208 |
+
return text
|
| 209 |
+
except:
|
| 210 |
+
print(f"Target content cannot be found in the content database: content_id = {target_id}")
|
| 211 |
+
|
| 212 |
+
# --------------------------------------------------------------
|
| 213 |
+
# --------------------------------------------------------------
|
| 214 |
+
def personalize_message_instructions(self, user):
|
| 215 |
+
"""
|
| 216 |
+
:return: personalized message instructions as a string
|
| 217 |
+
"""
|
| 218 |
+
|
| 219 |
+
general_instructions = self.message_type_instructions()
|
| 220 |
+
|
| 221 |
+
instructions = """
|
| 222 |
+
### ** Personalized Message Specifications **
|
| 223 |
+
|
| 224 |
+
Based on the available information about the user, create a personalized message for the user:
|
| 225 |
+
\n
|
| 226 |
+
"""
|
| 227 |
+
|
| 228 |
+
# Name
|
| 229 |
+
if "first_name" in self.Core.list_of_features and pd.notna(user["first_name"]) and user["first_name"] not in [
|
| 230 |
+
None,
|
| 231 |
+
[],
|
| 232 |
+
{}] and (
|
| 233 |
+
not isinstance(user["first_name"], str) or user["first_name"].strip()):
|
| 234 |
+
instructions += f"""
|
| 235 |
+
- Address the user by their first name (only first letter capital) to make the message more personal. \n
|
| 236 |
+
"""
|
| 237 |
+
else:
|
| 238 |
+
instructions += """
|
| 239 |
+
- If the user's name is not available or invalid (e.g. email), proceed without addressing them by name. \n
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
# Birthday reminder
|
| 243 |
+
if "birthday_reminder" in self.Core.list_of_features and pd.notna(user["birthday_reminder"]) and user[
|
| 244 |
+
"birthday_reminder"] not in [None, [], {}] and (
|
| 245 |
+
not isinstance(user["birthday_reminder"], str) or user["birthday_reminder"].strip()):
|
| 246 |
+
instructions += """
|
| 247 |
+
- **Include a short message to remind them that their birthday is coming up.** \n
|
| 248 |
+
|
| 249 |
+
"""
|
| 250 |
+
|
| 251 |
+
# Additional instructions for input columns
|
| 252 |
+
if self.Core.additional_instructions is not None or str(self.Core.additional_instructions).strip() != '':
|
| 253 |
+
instructions += str(self.Core.additional_instructions)
|
| 254 |
+
|
| 255 |
+
instructions += self.fire_wall() + "\n"
|
| 256 |
+
|
| 257 |
+
final_instructions = f"""
|
| 258 |
+
{general_instructions}
|
| 259 |
+
|
| 260 |
+
{instructions}
|
| 261 |
+
|
| 262 |
+
"""
|
| 263 |
+
|
| 264 |
+
return final_instructions
|
| 265 |
+
|
| 266 |
+
# --------------------------------------------------------------
|
| 267 |
+
# --------------------------------------------------------------
|
| 268 |
+
|
| 269 |
+
def message_type_instructions(self):
|
| 270 |
+
"""
|
| 271 |
+
create a proper instruction for the message type, regarding the input platform
|
| 272 |
+
:return: message instructions as a string
|
| 273 |
+
"""
|
| 274 |
+
|
| 275 |
+
instructions = ""
|
| 276 |
+
message_style = self.message_style_instructions()
|
| 277 |
+
|
| 278 |
+
if self.Core.platform == "push":
|
| 279 |
+
instructions = f"""
|
| 280 |
+
### ** General Specifications: **
|
| 281 |
+
|
| 282 |
+
- The message is a **mobile push notification**.
|
| 283 |
+
- Make all parts of the message highly **personalized**, **eye-catching**, and **bring curiosity**
|
| 284 |
+
- ** Keep the First sentence as "header": short and less than 30 character **.
|
| 285 |
+
- ** For the "header", Use a space following with a proper emoji at the end (e.g. Great work John 😍) **
|
| 286 |
+
- Use drum emoji or general music emojis (e.g. 🥁, 🎶, 🎵), and Other emojis that relate to motivation, progress, inspiration, and create curiosity can also be used (like 🔥, 🚀, 💪, 🎉, 👀)
|
| 287 |
+
- **Keep the "message" concise and under 100 characters**.
|
| 288 |
+
- Every word should contribute to maximizing impact and engagement, so start directly with the message content without greetings or closing phrases.
|
| 289 |
+
- Avoid using same or similar words so close together in "message" and "header", and make sure there is no grammar problem.
|
| 290 |
+
- ****.
|
| 291 |
+
{message_style}
|
| 292 |
+
|
| 293 |
+
"""
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
elif self.Core.platform == "app":
|
| 297 |
+
instructions = f"""
|
| 298 |
+
Message Specifications:
|
| 299 |
+
- The message is an **in app notification**.
|
| 300 |
+
- ** Keep the First sentence as "header" that should be a short personalized eye catching sentence less than 40 character **.
|
| 301 |
+
- ** For the "header", don't use exclamation mark at the end, instead, use a space following with a proper emoji at the end of the "header" (e.g. Great work John 😍) **
|
| 302 |
+
- **Keep the message concise and straightforward**.
|
| 303 |
+
- **Start directly with the message content**; do not include greetings (e.g., "Hello") or closing phrases.
|
| 304 |
+
- Make the message highly **personalized** and **eye-catching**.
|
| 305 |
+
- "Personalized" means the user should feel the message is specifically crafted for them and not generic.
|
| 306 |
+
- **Every word should contribute to maximizing impact and engagement**.
|
| 307 |
+
- {message_style}
|
| 308 |
+
"""
|
| 309 |
+
|
| 310 |
+
return instructions
|
| 311 |
+
|
| 312 |
+
# --------------------------------------------------------------
|
| 313 |
+
# --------------------------------------------------------------
|
| 314 |
+
def message_style_instructions(self):
|
| 315 |
+
"""
|
| 316 |
+
defines the style of the message: e.g. friendly, kind, tone, etc.
|
| 317 |
+
:return: style_instructions(str)
|
| 318 |
+
"""
|
| 319 |
+
|
| 320 |
+
if self.Core.message_style is None and self.Core.sample_example is None:
|
| 321 |
+
message_style = f"""
|
| 322 |
+
- Keep the tone **kind**, **friendly causal**, and **encouraging**.
|
| 323 |
+
"""
|
| 324 |
+
|
| 325 |
+
else:
|
| 326 |
+
message_style = f"""
|
| 327 |
+
- {self.Core.message_style}.
|
| 328 |
+
"""
|
| 329 |
+
|
| 330 |
+
return message_style
|
| 331 |
+
|
| 332 |
+
# --------------------------------------------------------------
|
| 333 |
+
# --------------------------------------------------------------
|
| 334 |
+
def fire_wall(self):
|
| 335 |
+
"""
|
| 336 |
+
Provide explicit instructions to ensure that sensitive information is not included in the generated message.
|
| 337 |
+
:return: string
|
| 338 |
+
"""
|
| 339 |
+
fire_wall = f"""
|
| 340 |
+
### Restrictions:
|
| 341 |
+
|
| 342 |
+
- **Do not include** any personal sensitive or confidential information.
|
| 343 |
+
- **Avoid AI Jargon:** Skip overused phrases like: {self.Core.config_file["AI_Jargon"]}.
|
| 344 |
+
"""
|
| 345 |
+
return fire_wall
|
| 346 |
+
|
| 347 |
+
# --------------------------------------------------------------
|
| 348 |
+
# --------------------------------------------------------------
|
| 349 |
+
def output_instruction(self):
|
| 350 |
+
"""
|
| 351 |
+
:return: output instructions as a string
|
| 352 |
+
"""
|
| 353 |
+
|
| 354 |
+
example_output = self.example_output()
|
| 355 |
+
general_instructions = """
|
| 356 |
+
- The "header" must be less than 30 character.
|
| 357 |
+
- The "message" must be less than 100 character.
|
| 358 |
+
- Do not include any links in the message.
|
| 359 |
+
- Preserve special characters and emojis in the message.
|
| 360 |
+
- Ensure that the output is a valid JSON.
|
| 361 |
+
- Do not include any text outside the JSON code block.
|
| 362 |
+
"""
|
| 363 |
+
|
| 364 |
+
instructions = f"""
|
| 365 |
+
Your response should be in JSON format with the following structure:
|
| 366 |
+
|
| 367 |
+
{{
|
| 368 |
+
"header": "Generated title",
|
| 369 |
+
"message": "Generated message",
|
| 370 |
+
}}
|
| 371 |
+
|
| 372 |
+
{general_instructions}
|
| 373 |
+
"""
|
| 374 |
+
|
| 375 |
+
output_instructions = f"""
|
| 376 |
+
### **Output instructions**:
|
| 377 |
+
|
| 378 |
+
{example_output}
|
| 379 |
+
{instructions}
|
| 380 |
+
"""
|
| 381 |
+
|
| 382 |
+
return output_instructions
|
| 383 |
+
|
| 384 |
+
# --------------------------------------------------------------
|
| 385 |
+
# --------------------------------------------------------------
|
| 386 |
+
def example_output(self):
|
| 387 |
+
"""
|
| 388 |
+
returns an example output (1-shot) to guide the LLM
|
| 389 |
+
:return: example output
|
| 390 |
+
"""
|
| 391 |
+
|
| 392 |
+
if self.Core.sample_example is None:
|
| 393 |
+
|
| 394 |
+
return ""
|
| 395 |
+
|
| 396 |
+
else:
|
| 397 |
+
# one shot prompting
|
| 398 |
+
example = f"""
|
| 399 |
+
Based on the examples below, create a header and message that follows the same style, tone, characteristic, and creativity.
|
| 400 |
+
|
| 401 |
+
### **Examples:**
|
| 402 |
+
{self.Core.sample_example}
|
| 403 |
+
"""
|
| 404 |
+
|
| 405 |
+
return example
|
| 406 |
+
|
| 407 |
+
# --------------------------------------------------------------
|
| 408 |
+
# --------------------------------------------------------------
|
| 409 |
+
|
| 410 |
+
def task_instructions(self):
|
| 411 |
+
"""
|
| 412 |
+
creating instructions for specifying the tasks
|
| 413 |
+
:return:
|
| 414 |
+
"""
|
| 415 |
+
|
| 416 |
+
if self.Core.involve_recsys_result:
|
| 417 |
+
recsys_task = """
|
| 418 |
+
- Create a perfect message and the header following the instructions, using the user's information and the content that we want to recommend.
|
| 419 |
+
- Use the instructions to include the recommended content in the message.
|
| 420 |
+
- Follow the instructions to create the messages.
|
| 421 |
+
"""
|
| 422 |
+
else:
|
| 423 |
+
recsys_task = ""
|
| 424 |
+
|
| 425 |
+
message_task = """
|
| 426 |
+
- Create a perfect personalized message considering the information and instructions mentioned. Your output format should be based on **Output instructions**."""
|
| 427 |
+
|
| 428 |
+
instructions = f"""
|
| 429 |
+
### Tasks:
|
| 430 |
+
{recsys_task}
|
| 431 |
+
{message_task}
|
| 432 |
+
"""
|
| 433 |
+
|
| 434 |
+
return instructions
|
Messaging_system/SnowFlakeConnection.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This class create a connection to Snowflake, run queries (read and write)
|
| 3 |
+
"""
|
| 4 |
+
import json
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from snowflake.snowpark import Session
|
| 9 |
+
from sympy.strategies.branch import condition
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class SnowFlakeConn:
|
| 13 |
+
def __init__(self, session, brand):
|
| 14 |
+
self. session = session
|
| 15 |
+
self.brand = brand
|
| 16 |
+
|
| 17 |
+
self.final_columns = ['user_id', "email", "user_info", "permission", "expiration_date", "recsys_result", "message", "brand", "recommendation", "segment_name", "timestamp"]
|
| 18 |
+
|
| 19 |
+
# ---------------------------------------------------------------
|
| 20 |
+
# ---------------------------------------------------------------
|
| 21 |
+
def run_read_query(self, query, data):
|
| 22 |
+
"""
|
| 23 |
+
Executes a SQL query on Snowflake that fetch the data
|
| 24 |
+
:return: Pandas dataframe containing the query results
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
# Connect to Snowflake
|
| 28 |
+
try:
|
| 29 |
+
dataframe = self.session.sql(query).to_pandas()
|
| 30 |
+
dataframe.columns = dataframe.columns.str.lower()
|
| 31 |
+
print(f"reading {data} table successfully")
|
| 32 |
+
return dataframe
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f"Error in creating/updating table: {e}")
|
| 35 |
+
|
| 36 |
+
# ---------------------------------------------------------------
|
| 37 |
+
# ---------------------------------------------------------------
|
| 38 |
+
def is_json_parsed_to_collection(self, s):
|
| 39 |
+
try:
|
| 40 |
+
parsed = json.loads(s)
|
| 41 |
+
return isinstance(parsed, (dict, list))
|
| 42 |
+
except:
|
| 43 |
+
return False
|
| 44 |
+
# ---------------------------------------------------------------
|
| 45 |
+
# ---------------------------------------------------------------
|
| 46 |
+
def store_df_to_snowflake(self, table_name, dataframe, database="ONLINE_RECSYS", schema="GENERATED_DATA"):
|
| 47 |
+
"""
|
| 48 |
+
Executes a SQL query on Snowflake that write the preprocessed data on new tables
|
| 49 |
+
:param query: SQL query string to be executed
|
| 50 |
+
:return: None
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
self.session.use_database(database)
|
| 55 |
+
self.session.use_schema(schema)
|
| 56 |
+
|
| 57 |
+
dataframe = dataframe.reset_index(drop=True)
|
| 58 |
+
dataframe.columns = dataframe.columns.str.upper()
|
| 59 |
+
|
| 60 |
+
self.session.write_pandas(df=dataframe,
|
| 61 |
+
table_name=table_name.strip().upper(),
|
| 62 |
+
auto_create_table=True,
|
| 63 |
+
overwrite=True,
|
| 64 |
+
use_logical_type=True)
|
| 65 |
+
print(f"Data inserted into {table_name} successfully.")
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error in creating/updating/inserting table: {e}")
|
| 69 |
+
|
| 70 |
+
# ---------------------------------------------------------------
|
| 71 |
+
# ---------------------------------------------------------------
|
| 72 |
+
def get_data(self, data, list_of_ids=None):
|
| 73 |
+
"""
|
| 74 |
+
valid Data is = {users, contents, interactions, recsys, popular_contents}
|
| 75 |
+
:param data:
|
| 76 |
+
:return:
|
| 77 |
+
"""
|
| 78 |
+
valid_data = {'users', 'contents', 'interactions', 'recsys', 'popular_contents'}
|
| 79 |
+
|
| 80 |
+
if data not in valid_data:
|
| 81 |
+
raise ValueError(f"Invalid data type: {data}")
|
| 82 |
+
|
| 83 |
+
# Construct the method name based on the input
|
| 84 |
+
method_name = f"_get_{data}"
|
| 85 |
+
|
| 86 |
+
# Retrieve the method dynamically
|
| 87 |
+
method = getattr(self, method_name, None)
|
| 88 |
+
if method is None:
|
| 89 |
+
raise NotImplementedError(f"The method {method_name} is not implemented.")
|
| 90 |
+
|
| 91 |
+
query = method(list_of_ids)
|
| 92 |
+
data = self.run_read_query(query, data)
|
| 93 |
+
|
| 94 |
+
return data
|
| 95 |
+
# ---------------------------------------------------------------
|
| 96 |
+
# ---------------------------------------------------------------
|
| 97 |
+
def _get_contents(self, list_of_ids=None):
|
| 98 |
+
query = f"""
|
| 99 |
+
select CONTENT_ID, CONTENT_TYPE, CONTENT_PROFILE as content_info, CONTENT_PROFILE_VECTOR
|
| 100 |
+
from ONLINE_RECSYS.VECTOR_DB.VECTORIZED_CONTENT
|
| 101 |
+
where BRAND = '{self.brand}'
|
| 102 |
+
"""
|
| 103 |
+
return query
|
| 104 |
+
# ---------------------------------------------------------------
|
| 105 |
+
# ---------------------------------------------------------------
|
| 106 |
+
def _get_users(self, list_of_ids=None):
|
| 107 |
+
|
| 108 |
+
if list_of_ids is not None:
|
| 109 |
+
ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
|
| 110 |
+
condition = f"AND USER_ID in {ids_str}"
|
| 111 |
+
else :
|
| 112 |
+
condition = ""
|
| 113 |
+
|
| 114 |
+
query = f"""
|
| 115 |
+
select USER_ID, BRAND, FIRST_NAME, BIRTHDAY, TIMEZONE, EMAIL, CURRENT_TIMESTAMP() AS TIMESTAMP, DIFFICULTY, SELF_REPORT_DIFFICULTY, USER_PROFILE as user_info, PERMISSION, EXPIRATION_DATE,
|
| 116 |
+
DATEDIFF(
|
| 117 |
+
day,
|
| 118 |
+
CURRENT_DATE(),
|
| 119 |
+
CASE
|
| 120 |
+
WHEN DATE_FROM_PARTS(YEAR(CURRENT_DATE()), EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY)) < CURRENT_DATE()
|
| 121 |
+
THEN DATE_FROM_PARTS(YEAR(CURRENT_DATE()) + 1, EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY))
|
| 122 |
+
ELSE DATE_FROM_PARTS(YEAR(CURRENT_DATE()), EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY))
|
| 123 |
+
END) AS birthday_reminder
|
| 124 |
+
from ONLINE_RECSYS.PREPROCESSED.USERS
|
| 125 |
+
where BRAND = '{self.brand}' {condition}
|
| 126 |
+
"""
|
| 127 |
+
return query
|
| 128 |
+
# ---------------------------------------------------------------
|
| 129 |
+
# ---------------------------------------------------------------
|
| 130 |
+
def _get_interactions(self, list_of_ids=None):
|
| 131 |
+
|
| 132 |
+
if list_of_ids is not None:
|
| 133 |
+
ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
|
| 134 |
+
condition = f"AND USER_ID in {ids_str}"
|
| 135 |
+
else :
|
| 136 |
+
condition = ""
|
| 137 |
+
|
| 138 |
+
query = f"""
|
| 139 |
+
WITH latest_interactions AS(
|
| 140 |
+
SELECT
|
| 141 |
+
USER_ID, CONTENT_ID, CONTENT_TYPE, EVENT_TEXT, TIMESTAMP,
|
| 142 |
+
ROW_NUMBER() OVER(PARTITION BY USER_ID ORDER BY TIMESTAMP DESC) AS rn
|
| 143 |
+
FROM ONLINE_RECSYS.PREPROCESSED.RECSYS_INTEACTIONS
|
| 144 |
+
WHERE BRAND = '{self.brand}' AND EVENT_TEXT IN('Video Completed', 'Video Playing') {condition})
|
| 145 |
+
|
| 146 |
+
SELECT i.USER_ID, i.CONTENT_ID, i.CONTENT_TYPE, c.content_profile as last_completed_content, i.EVENT_TEXT, i.TIMESTAMP, DATEDIFF('week', i.TIMESTAMP, CURRENT_TIMESTAMP) AS weeks_since_last_interaction
|
| 147 |
+
FROM latest_interactions i
|
| 148 |
+
LEFT JOIN
|
| 149 |
+
ONLINE_RECSYS.VECTOR_DB.VECTORIZED_CONTENT c ON c.CONTENT_ID = i.CONTENT_ID
|
| 150 |
+
WHERE rn = 1;
|
| 151 |
+
"""
|
| 152 |
+
return query
|
| 153 |
+
# ---------------------------------------------------------------
|
| 154 |
+
# ---------------------------------------------------------------
|
| 155 |
+
def _get_recsys(self, list_of_ids=None):
|
| 156 |
+
|
| 157 |
+
if list_of_ids is not None:
|
| 158 |
+
ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
|
| 159 |
+
condition = f"WHERE USER_ID in {ids_str}"
|
| 160 |
+
else :
|
| 161 |
+
condition = ""
|
| 162 |
+
|
| 163 |
+
recsys_col = f"{self.brand}_recsys_v2"
|
| 164 |
+
query = f"""
|
| 165 |
+
select USER_ID, {recsys_col} as recsys_result
|
| 166 |
+
from RECSYS_V2.RECSYS_V2_CIO.RECSYS_V2_CUSTOMER_IO
|
| 167 |
+
{condition}
|
| 168 |
+
"""
|
| 169 |
+
return query
|
| 170 |
+
# ---------------------------------------------------------------
|
| 171 |
+
# ---------------------------------------------------------------
|
| 172 |
+
def _get_popular_contents(self, list_of_ids=None):
|
| 173 |
+
|
| 174 |
+
query = f"""
|
| 175 |
+
select POPULAR_CONTENT
|
| 176 |
+
from RECSYS_V2.RECSYS_V2_CIO.POPULAR_CONTENT_CUSTOMER_IO
|
| 177 |
+
where brand = '{self.brand.lower()}'
|
| 178 |
+
"""
|
| 179 |
+
|
| 180 |
+
return query
|
| 181 |
+
# ---------------------------------------------------------------
|
| 182 |
+
# ---------------------------------------------------------------
|
| 183 |
+
def extract_id_from_email(self, emails):
|
| 184 |
+
"""
|
| 185 |
+
extracting user_ids from emails
|
| 186 |
+
:param unique_emails:
|
| 187 |
+
:return:
|
| 188 |
+
"""
|
| 189 |
+
|
| 190 |
+
email_list_str = ', '.join(f"'{email}'" for email in emails)
|
| 191 |
+
query = f"""
|
| 192 |
+
SELECT id as USER_ID, email as EMAIL
|
| 193 |
+
FROM STITCH.MUSORA_ECOM_DB.USORA_USERS
|
| 194 |
+
WHERE email IN ({email_list_str})
|
| 195 |
+
"""
|
| 196 |
+
|
| 197 |
+
user_ids_df = self.run_read_query(query, data="User_ids")
|
| 198 |
+
return user_ids_df
|
| 199 |
+
# ---------------------------------------------------------------
|
| 200 |
+
# ---------------------------------------------------------------
|
| 201 |
+
|
| 202 |
+
def adjust_dataframe(self, dataframe):
|
| 203 |
+
"""
|
| 204 |
+
Filter dataframe to only include the columns in self.final_columns.
|
| 205 |
+
Add any missing columns with None values.
|
| 206 |
+
Ensure the final order is consistent with self.final_columns.
|
| 207 |
+
"""
|
| 208 |
+
# Work with a copy so that we don't modify the original input
|
| 209 |
+
final_df = dataframe.copy()
|
| 210 |
+
|
| 211 |
+
# Normalize column names to lower-case for matching (if needed)
|
| 212 |
+
final_df.columns = final_df.columns.str.lower()
|
| 213 |
+
expected_cols = [col.lower() for col in self.final_columns]
|
| 214 |
+
|
| 215 |
+
# Keep only those columns in the expected list
|
| 216 |
+
available = [col for col in final_df.columns if col in expected_cols]
|
| 217 |
+
final_df = final_df[available]
|
| 218 |
+
|
| 219 |
+
# Add missing columns with None values
|
| 220 |
+
for col in expected_cols:
|
| 221 |
+
if col not in final_df.columns:
|
| 222 |
+
final_df[col] = None
|
| 223 |
+
|
| 224 |
+
# Reorder the columns to the desired order
|
| 225 |
+
final_df = final_df[expected_cols]
|
| 226 |
+
|
| 227 |
+
# If you need the column names to match exactly what self.final_columns provides (case-sensitive),
|
| 228 |
+
# you can rename them accordingly.
|
| 229 |
+
rename_mapping = {col.lower(): col for col in self.final_columns}
|
| 230 |
+
final_df.rename(columns=rename_mapping, inplace=True)
|
| 231 |
+
|
| 232 |
+
return final_df
|
| 233 |
+
|
| 234 |
+
# ---------------------------------------------------------------
|
| 235 |
+
# ---------------------------------------------------------------
|
| 236 |
+
def close_connection(self):
|
| 237 |
+
self.session.close()
|
Messaging_system/StoreLayer.py
ADDED
|
File without changes
|
Messaging_system/context_validator.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import time
|
| 3 |
+
import openai
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Validator:
|
| 10 |
+
"""
|
| 11 |
+
LLM-based personalized message generator:
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, api_key):
|
| 15 |
+
|
| 16 |
+
# will be set by the user
|
| 17 |
+
self.validator_instructions = None
|
| 18 |
+
self.api_key = api_key
|
| 19 |
+
self.model = "gpt-4o-mini"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# to trace the number of tokens and estimate the cost if needed
|
| 23 |
+
self.temp_token_counter = 0
|
| 24 |
+
self.total_tokens = {
|
| 25 |
+
'prompt_tokens': 0,
|
| 26 |
+
'completion_tokens': 0,
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
# -------------------------------------------------------------------
|
| 30 |
+
def set_openai_api(self, openai_key):
|
| 31 |
+
"""
|
| 32 |
+
Setting template with placeholders manually connection
|
| 33 |
+
:param template: a string with placeholders
|
| 34 |
+
:return:
|
| 35 |
+
"""
|
| 36 |
+
self.api_key = openai_key
|
| 37 |
+
|
| 38 |
+
# -------------------------------------------------------------------
|
| 39 |
+
def context_prompt(self):
|
| 40 |
+
|
| 41 |
+
instructions = """
|
| 42 |
+
You are a text moderator and you should parse the input text. based on below instructions. you should decide if
|
| 43 |
+
the input text is a valid input or not.
|
| 44 |
+
"""
|
| 45 |
+
return instructions
|
| 46 |
+
|
| 47 |
+
# -------------------------------------------------------------------
|
| 48 |
+
def initial_prompt(self):
|
| 49 |
+
|
| 50 |
+
instructions = """You are a helpful assistant at Musora, an online music education platform that helps users
|
| 51 |
+
learn music. Our students will provide user-generated-context such as comments and forums on engaging musical
|
| 52 |
+
contents like songs, lessons, workouts or other type of musical and educational content. Your task is
|
| 53 |
+
to determine if the input text provided by our student is a valid text or not.
|
| 54 |
+
|
| 55 |
+
"""
|
| 56 |
+
return instructions
|
| 57 |
+
|
| 58 |
+
# -------------------------------------------------------------------
|
| 59 |
+
def set_validator_instructions(self, valid_instructions="", invalid_instructions=""):
|
| 60 |
+
|
| 61 |
+
instructions = f"""
|
| 62 |
+
** The text is INValid if it falls into any of the below criteria **:
|
| 63 |
+
|
| 64 |
+
{invalid_instructions}
|
| 65 |
+
{self.fire_wall()}
|
| 66 |
+
--------------------------
|
| 67 |
+
|
| 68 |
+
Please ensure that the text meets the following criteria to be considered **valid**:
|
| 69 |
+
|
| 70 |
+
{valid_instructions}
|
| 71 |
+
{self.default_valid_text()}
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
self.validator_instructions = instructions
|
| 75 |
+
|
| 76 |
+
# -------------------------------------------------------------------
|
| 77 |
+
def output_instruction(self):
|
| 78 |
+
"""
|
| 79 |
+
:return: output instructions as a string
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
output_instructions = """
|
| 83 |
+
** Task: **
|
| 84 |
+
- **Based on the input text, the music educational nature of our contents, and instructions about validating the student's input, check if the text is a valid input or not.**
|
| 85 |
+
- **Your output should be strictly "True" if it is a Valid text, or "False" if it not a valid text.**
|
| 86 |
+
- **You should provide the output in JSON format where the key is "valid"** - **Do not include any text outside the JSON code block**.
|
| 87 |
+
|
| 88 |
+
Your response should be in JSON format with the following structure:
|
| 89 |
+
|
| 90 |
+
example of a VALID text:
|
| 91 |
+
|
| 92 |
+
{
|
| 93 |
+
"valid": "True",
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
Example of an INVALID text:
|
| 97 |
+
|
| 98 |
+
{
|
| 99 |
+
"valid": "False",
|
| 100 |
+
}
|
| 101 |
+
"""
|
| 102 |
+
return output_instructions
|
| 103 |
+
|
| 104 |
+
# -------------------------------------------------------------------
|
| 105 |
+
def get_llm_response(self, prompt, max_retries=3):
|
| 106 |
+
"""
|
| 107 |
+
sending the prompt to the LLM and get back the response
|
| 108 |
+
"""
|
| 109 |
+
|
| 110 |
+
openai.api_key = self.api_key
|
| 111 |
+
instructions = self.context_prompt()
|
| 112 |
+
client = OpenAI(api_key=self.api_key)
|
| 113 |
+
|
| 114 |
+
for attempt in range(max_retries):
|
| 115 |
+
try:
|
| 116 |
+
response = client.chat.completions.create(
|
| 117 |
+
model=self.model,
|
| 118 |
+
response_format={"type": "json_object"},
|
| 119 |
+
messages=[
|
| 120 |
+
{"role": "system", "content": instructions},
|
| 121 |
+
{"role": "user", "content": prompt}
|
| 122 |
+
],
|
| 123 |
+
max_tokens=500,
|
| 124 |
+
n=1,
|
| 125 |
+
temperature=0.7
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
tokens = {
|
| 129 |
+
'prompt_tokens': response.usage.prompt_tokens,
|
| 130 |
+
'completion_tokens': response.usage.completion_tokens,
|
| 131 |
+
'total_tokens': response.usage.total_tokens
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
try:
|
| 135 |
+
content = response.choices[0].message.content
|
| 136 |
+
# Extract JSON code block
|
| 137 |
+
output = json.loads(content)
|
| 138 |
+
|
| 139 |
+
if 'valid' not in output:
|
| 140 |
+
print(f"'valid' key is missing in response on attempt {attempt + 1}. Retrying...")
|
| 141 |
+
continue # Continue to next attempt
|
| 142 |
+
|
| 143 |
+
else:
|
| 144 |
+
if output["valid"] not in ["True", "False"]:
|
| 145 |
+
print(f"True or False value missing in response on attempt {attempt + 1}. Retrying...")
|
| 146 |
+
continue
|
| 147 |
+
|
| 148 |
+
# validating the JSON
|
| 149 |
+
self.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
|
| 150 |
+
self.total_tokens['completion_tokens'] += tokens['completion_tokens']
|
| 151 |
+
self.temp_token_counter += tokens['prompt_tokens'] + tokens['completion_tokens']
|
| 152 |
+
return output
|
| 153 |
+
|
| 154 |
+
except json.JSONDecodeError:
|
| 155 |
+
print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
|
| 156 |
+
|
| 157 |
+
except openai.APIConnectionError as e:
|
| 158 |
+
print("The server could not be reached")
|
| 159 |
+
print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
| 160 |
+
except openai.RateLimitError as e:
|
| 161 |
+
print("A 429 status code was received; we should back off a bit.")
|
| 162 |
+
except openai.APIStatusError as e:
|
| 163 |
+
print("Another non-200-range status code was received")
|
| 164 |
+
print(e.status_code)
|
| 165 |
+
print(e.response)
|
| 166 |
+
|
| 167 |
+
print("Max retries exceeded. Returning empty response.")
|
| 168 |
+
return [], {}
|
| 169 |
+
|
| 170 |
+
# -------------------------------------------------------------------
|
| 171 |
+
def create_validation_prompt(self, input_text):
|
| 172 |
+
|
| 173 |
+
"""
|
| 174 |
+
creating the proper prompt and instructions around the input text
|
| 175 |
+
:param input_text:
|
| 176 |
+
:return:
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
prompt = f"""
|
| 180 |
+
{self.initial_prompt()}
|
| 181 |
+
|
| 182 |
+
**Input text provided by the Student:**
|
| 183 |
+
{input_text}
|
| 184 |
+
|
| 185 |
+
{self.validator_instructions}
|
| 186 |
+
{self.output_instruction()}
|
| 187 |
+
"""
|
| 188 |
+
|
| 189 |
+
return prompt
|
| 190 |
+
|
| 191 |
+
# -------------------------------------------------------------------
|
| 192 |
+
def validate_dataframe(self, dataframe, target_column, progress_callback=None):
|
| 193 |
+
"""
|
| 194 |
+
generating the prompt for every user based on their text input, generating the results (True or False),
|
| 195 |
+
updating and returning the input dataframe. :return:
|
| 196 |
+
"""
|
| 197 |
+
dataframe["valid"] = None
|
| 198 |
+
start_time = time.time()
|
| 199 |
+
total_users = len(dataframe)
|
| 200 |
+
|
| 201 |
+
for progress, (idx, row) in enumerate(tqdm(dataframe.iterrows(), desc="generating prompts")):
|
| 202 |
+
|
| 203 |
+
if progress_callback is not None:
|
| 204 |
+
progress_callback(progress, total_users)
|
| 205 |
+
input_text = row[target_column]
|
| 206 |
+
prompt = self.create_validation_prompt(input_text)
|
| 207 |
+
response = self.get_llm_response(prompt)
|
| 208 |
+
dataframe.at[idx, "valid"] = response["valid"]
|
| 209 |
+
|
| 210 |
+
current_time = time.time()
|
| 211 |
+
delta = current_time - start_time
|
| 212 |
+
|
| 213 |
+
# Check token limits
|
| 214 |
+
if self.temp_token_counter > 195000 and delta >= 60: # Using a safe margin
|
| 215 |
+
print("Sleeping for 60 seconds to respect the token limit...")
|
| 216 |
+
# reset the token counter
|
| 217 |
+
self.temp_token_counter = 0
|
| 218 |
+
start_time = time.time()
|
| 219 |
+
time.sleep(60) # Sleep for a minute before making new requests
|
| 220 |
+
|
| 221 |
+
return dataframe
|
| 222 |
+
|
| 223 |
+
# -------------------------------------------------------------------
|
| 224 |
+
def validate_text(self, text):
|
| 225 |
+
"""
|
| 226 |
+
generating the prompt for every user based on their text input, generating the results (True or False),
|
| 227 |
+
updating and returning the input dataframe. :return:
|
| 228 |
+
"""
|
| 229 |
+
|
| 230 |
+
prompt = self.create_validation_prompt(text)
|
| 231 |
+
response = self.get_llm_response(prompt)
|
| 232 |
+
return response["valid"]
|
| 233 |
+
|
| 234 |
+
# -------------------------------------------------------------------
|
| 235 |
+
def fire_wall(self):
|
| 236 |
+
"""
|
| 237 |
+
Provide explicit instructions to ensure that sensitive or inappropriate information is identified in the text.
|
| 238 |
+
:return: string
|
| 239 |
+
"""
|
| 240 |
+
fire_wall = """
|
| 241 |
+
As a content moderator, please review the text and ensure it does not contain any of the following:
|
| 242 |
+
|
| 243 |
+
**Disallowed Content Categories:**
|
| 244 |
+
|
| 245 |
+
1. **Sensitive Personal Information**: personal data such as phone numbers, email addresses, or other identifying information.
|
| 246 |
+
|
| 247 |
+
2. **Offensive or Discriminatory Language**: Hate speech, harassment, bullying, or any derogatory remarks targeting individuals or groups based on race, ethnicity, nationality, religion, gender, sexual orientation, age, disability, or any other characteristic.
|
| 248 |
+
|
| 249 |
+
3. **Sensitive Topics**: Content that discusses or promotes extremist views, political propaganda, or divisive religious beliefs in a manner that could incite hostility.
|
| 250 |
+
|
| 251 |
+
4. **Removed or Restricted Content**: Mentions of songs, media, or features that have been removed or are restricted on our platform.
|
| 252 |
+
|
| 253 |
+
5. **Technical Issues or Bugs**: Any references to glitches, errors, crashes, or other technical problems experienced on the platform.
|
| 254 |
+
|
| 255 |
+
6. ** Language that is excessively angry, aggressive, or includes profanity or vulgar expressions. **
|
| 256 |
+
|
| 257 |
+
7. **Privacy Violations**: Sharing of confidential information or content that infringes on someone's privacy rights.
|
| 258 |
+
|
| 259 |
+
8. **Intellectual Property Violations**: Unauthorized use or distribution of copyrighted material.
|
| 260 |
+
|
| 261 |
+
9. **Defamation**: False statements presented as facts that harm the reputation of an individual or organization.
|
| 262 |
+
|
| 263 |
+
**Examples of Invalid Content:**
|
| 264 |
+
|
| 265 |
+
- "This app is useless and the developers are idiots!"
|
| 266 |
+
- "They removed my favorite song; it sucks"
|
| 267 |
+
- "People who follow [specific religion] are all wrong and should be banned."
|
| 268 |
+
|
| 269 |
+
If the text contains any of the above issues, please flag it as invalid.
|
| 270 |
+
|
| 271 |
+
"""
|
| 272 |
+
return fire_wall
|
| 273 |
+
|
| 274 |
+
# -------------------------------------------------------------------
|
| 275 |
+
def default_valid_text(self):
|
| 276 |
+
"""
|
| 277 |
+
Provide explicit instructions to ensure that the text is appropriate and meets the content guidelines.
|
| 278 |
+
:return: string
|
| 279 |
+
"""
|
| 280 |
+
valid_text = """
|
| 281 |
+
|
| 282 |
+
**Allowed Content Criteria:**
|
| 283 |
+
|
| 284 |
+
1. **Positive Sentiment**: The text should be encouraging, uplifting, or convey a positive emotion.
|
| 285 |
+
|
| 286 |
+
2. **Constructive and Helpful**: Provides valuable insights, advice, or shares personal experiences that could
|
| 287 |
+
benefit others. This can be sharing struggling in practices, challenges or other type of difficulties that might need our attention.
|
| 288 |
+
|
| 289 |
+
3. **Respectful Language**: Uses polite and appropriate language, fostering a friendly and inclusive community environment.
|
| 290 |
+
|
| 291 |
+
**Examples of Valid Content:**
|
| 292 |
+
|
| 293 |
+
- "I love how this app helps me discover new music every day!"
|
| 294 |
+
- "Here's a tip: creating themed playlists can really enhance your listening experience."
|
| 295 |
+
- "I had a great time using this feature during my commute today."
|
| 296 |
+
- "This session is so challenging for me and I'm feeling so much pain in my foot, might go over the workout couple more"
|
| 297 |
+
"""
|
| 298 |
+
return valid_text
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
|
Messaging_system/protection_layer.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
protection layer on top of the messaging system to make sure the messages are as expected.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import openai
|
| 8 |
+
from openai import OpenAI
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# -----------------------------------------------------------------------
|
| 14 |
+
|
| 15 |
+
class ProtectionLayer:
|
| 16 |
+
"""
|
| 17 |
+
Protection layer to double check the generated message:
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, config_file, messaging_mode):
|
| 21 |
+
|
| 22 |
+
self.config_file = config_file
|
| 23 |
+
self.messaging_mode = messaging_mode
|
| 24 |
+
|
| 25 |
+
# LLM configs
|
| 26 |
+
self.api_key = os.environ.get("OPENAI_API") # will be set by user
|
| 27 |
+
self.model = "gpt-4o-mini" # will be set by user
|
| 28 |
+
|
| 29 |
+
# to trace the number of tokens and estimate the cost if needed
|
| 30 |
+
self.total_tokens = {
|
| 31 |
+
'prompt_tokens': 0,
|
| 32 |
+
'completion_tokens': 0,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
# --------------------------------------------------------------
|
| 36 |
+
def llm_instructions(self):
|
| 37 |
+
"""
|
| 38 |
+
Setting instructions for the LLM for the second pass.
|
| 39 |
+
"""
|
| 40 |
+
instructions = (
|
| 41 |
+
"You are a protection layer AI. Your task is to check the given JSON message "
|
| 42 |
+
"against specific rules. If it violates any rule, fix only those errors. If "
|
| 43 |
+
"it does not violate any rule, return it exactly as is. You must respond ONLY "
|
| 44 |
+
"with valid JSON in the specified structure, and no additional text."
|
| 45 |
+
)
|
| 46 |
+
return instructions
|
| 47 |
+
|
| 48 |
+
# --------------------------------------------------------------
|
| 49 |
+
def get_general_rules(self):
|
| 50 |
+
"""
|
| 51 |
+
Core rules to apply when checking or modifying the message.
|
| 52 |
+
"""
|
| 53 |
+
return f"""
|
| 54 |
+
1. No two consecutive sentences should end with exclamation points, change one of them to dot.
|
| 55 |
+
2. ONLY Capitalize the first word of the 'header' as well as names or any proper nouns. Other words in the 'header' must be lowercase. (e.g. Jump back in, David! 🥁)
|
| 56 |
+
3. If there is any grammar error in the message, you must fix it.
|
| 57 |
+
4. Always use "the" before proper nouns, including any titles of the recommended content, if the title was in the message.
|
| 58 |
+
5. Do not include any words that explicitly or implicitly reference a time-related concept (e.g., “new,” “recent,” “latest,” “upcoming,” etc.).
|
| 59 |
+
6. If the **Artist** name from the recommended content is referenced in the message, it MUST be the **FULL NAME**. If only the first name is available, ** DO NOT ** use the artist name at all.
|
| 60 |
+
7. If the message contains any AI_Jargon words (from below list) you MUST replace it with a more user-friendly synonym that makes sense.
|
| 61 |
+
AI_Jargon words are: {self.config_file["AI_Jargon"]}
|
| 62 |
+
|
| 63 |
+
8. Preserve the original JSON structure: {{"header": "...", "message": "..."}}
|
| 64 |
+
9. If no rule is violated, return the exact same JSON unchanged.
|
| 65 |
+
10. The output must be strictly valid JSON with no extra commentary or text.
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
# --------------------------------------------------------------
|
| 69 |
+
def output_instruction(self):
|
| 70 |
+
"""
|
| 71 |
+
:return: output instructions as a string
|
| 72 |
+
"""
|
| 73 |
+
instructions = f"""
|
| 74 |
+
**You must output only valid JSON in the form:**
|
| 75 |
+
|
| 76 |
+
{{
|
| 77 |
+
"header": "Original header or modified version",
|
| 78 |
+
"message": "Original header or modified version"
|
| 79 |
+
}}
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
**Constraints:**
|
| 83 |
+
- The "header" must be less than 30 character.
|
| 84 |
+
- The "message" must be less than 100 character.
|
| 85 |
+
- No text is allowed outside this JSON structure.\n"
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
return instructions
|
| 89 |
+
|
| 90 |
+
# --------------------------------------------------------------
|
| 91 |
+
def get_llm_response(self, prompt, max_retries=3):
|
| 92 |
+
"""
|
| 93 |
+
sending the prompt to the LLM and get back the response
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
openai.api_key = self.api_key
|
| 97 |
+
instructions = self.llm_instructions()
|
| 98 |
+
client = OpenAI(api_key=self.api_key)
|
| 99 |
+
|
| 100 |
+
for attempt in range(max_retries):
|
| 101 |
+
try:
|
| 102 |
+
response = client.chat.completions.create(
|
| 103 |
+
model=self.model,
|
| 104 |
+
response_format={"type": "json_object"},
|
| 105 |
+
messages=[
|
| 106 |
+
{"role": "system", "content": instructions},
|
| 107 |
+
{"role": "user", "content": prompt}
|
| 108 |
+
],
|
| 109 |
+
max_tokens=500,
|
| 110 |
+
n=1,
|
| 111 |
+
temperature=0.5
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
tokens = {
|
| 115 |
+
'prompt_tokens': response.usage.prompt_tokens,
|
| 116 |
+
'completion_tokens': response.usage.completion_tokens,
|
| 117 |
+
'total_tokens': response.usage.total_tokens
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
try:
|
| 121 |
+
content = response.choices[0].message.content
|
| 122 |
+
# Extract JSON code block
|
| 123 |
+
|
| 124 |
+
output = json.loads(content)
|
| 125 |
+
# output = json.loads(response.choices[0].message.content)
|
| 126 |
+
|
| 127 |
+
if 'message' not in output or 'header' not in output:
|
| 128 |
+
print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
|
| 129 |
+
continue # Continue to next attempt
|
| 130 |
+
|
| 131 |
+
else:
|
| 132 |
+
if len(output["header"].strip()) > self.config_file["header_limit"] or len(output["message"].strip()) > self.config_file["message_limit"]:
|
| 133 |
+
print(f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
|
| 134 |
+
continue
|
| 135 |
+
|
| 136 |
+
# validating the JSON
|
| 137 |
+
self.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
|
| 138 |
+
self.total_tokens['completion_tokens'] += tokens['completion_tokens']
|
| 139 |
+
return output
|
| 140 |
+
|
| 141 |
+
except json.JSONDecodeError:
|
| 142 |
+
print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
|
| 143 |
+
|
| 144 |
+
except openai.APIConnectionError as e:
|
| 145 |
+
print("The server could not be reached")
|
| 146 |
+
print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
| 147 |
+
except openai.RateLimitError as e:
|
| 148 |
+
print("A 429 status code was received; we should back off a bit.")
|
| 149 |
+
except openai.APIStatusError as e:
|
| 150 |
+
print("Another non-200-range status code was received")
|
| 151 |
+
print(e.status_code)
|
| 152 |
+
print(e.response)
|
| 153 |
+
|
| 154 |
+
print("Max retries exceeded. Returning empty response.")
|
| 155 |
+
return [], {}
|
| 156 |
+
|
| 157 |
+
# --------------------------------------------------------------
|
| 158 |
+
def get_context(self):
|
| 159 |
+
"""
|
| 160 |
+
context for the LLM
|
| 161 |
+
:return: the context string
|
| 162 |
+
"""
|
| 163 |
+
context = (
|
| 164 |
+
"We created a personalized message for a user "
|
| 165 |
+
"considering the provided information. Your task is to double-check "
|
| 166 |
+
"the message and correct or improve the output, according to instructions."
|
| 167 |
+
)
|
| 168 |
+
return context
|
| 169 |
+
|
| 170 |
+
# --------------------------------------------------------------
|
| 171 |
+
def generate_prompt(self, message, user):
|
| 172 |
+
"""
|
| 173 |
+
generating the prompt for criticizing
|
| 174 |
+
:param query: input query
|
| 175 |
+
:param message: llm response
|
| 176 |
+
:return: new prompt
|
| 177 |
+
"""
|
| 178 |
+
recommended_content = ""
|
| 179 |
+
if self.messaging_mode == "recsys_result":
|
| 180 |
+
recommended_content = f"""
|
| 181 |
+
### ** Recommended Content **
|
| 182 |
+
{user['recommendation_info']}
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
+
prompt = f"""
|
| 186 |
+
|
| 187 |
+
### System Instruction:
|
| 188 |
+
{self.llm_instructions()}
|
| 189 |
+
|
| 190 |
+
### Context:
|
| 191 |
+
We created a personalized message for a user based on available information.
|
| 192 |
+
Your job is to check the message and correct only if it violates rules. Otherwise, leave it unchanged.
|
| 193 |
+
|
| 194 |
+
### Original JSON Message:
|
| 195 |
+
{message}
|
| 196 |
+
|
| 197 |
+
{recommended_content}
|
| 198 |
+
|
| 199 |
+
### Rules:
|
| 200 |
+
{self.get_general_rules()}
|
| 201 |
+
|
| 202 |
+
### Output Requirements:
|
| 203 |
+
{self.output_instruction()}
|
| 204 |
+
"""
|
| 205 |
+
return prompt
|
| 206 |
+
|
| 207 |
+
# --------------------------------------------------------------
|
| 208 |
+
def criticize(self, message, user):
|
| 209 |
+
"""
|
| 210 |
+
criticize the llm response by using additional layer of query
|
| 211 |
+
:return: updated users_df with extracted information and personalize messages.
|
| 212 |
+
"""
|
| 213 |
+
|
| 214 |
+
prompt = self.generate_prompt(message, user)
|
| 215 |
+
response = self.get_llm_response(prompt)
|
| 216 |
+
|
| 217 |
+
return response, self.total_tokens
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
|
Messaging_system/sending_time.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
calculating sending time for each individual user
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
from snowflake.snowpark import Session
|
| 7 |
+
import json
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import os
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
class PersonalizedTime:
|
| 14 |
+
"""
|
| 15 |
+
This module will calcualte the best tiume to send for each individual users
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def calculate_sending_time(self):
|
| 19 |
+
|
| 20 |
+
# fetching data
|
| 21 |
+
session = self.snowflake_connection()
|
| 22 |
+
query = self.fetch_users_time(session)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def fetch_users_time(self, session):
|
| 26 |
+
"""
|
| 27 |
+
fetching user's activity data
|
| 28 |
+
:param dataframe:
|
| 29 |
+
:return:
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
query = self.get_query()
|
| 33 |
+
|
| 34 |
+
# Connect to Snowflake
|
| 35 |
+
try:
|
| 36 |
+
spark_df = session.sql(query).collect()
|
| 37 |
+
dataframe = pd.DataFrame(spark_df)
|
| 38 |
+
print(f"reading content table successfully")
|
| 39 |
+
return dataframe
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"Error in reading table: {e}")
|
| 42 |
+
|
| 43 |
+
def get_query(self):
|
| 44 |
+
|
| 45 |
+
query = """
|
| 46 |
+
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def snowflake_connection(self):
|
| 53 |
+
"""
|
| 54 |
+
setting snowflake connection
|
| 55 |
+
:return:
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
conn = {
|
| 59 |
+
"user": os.getenv('snowflake_user'),
|
| 60 |
+
"password": os.getenv('snowflake_password'),
|
| 61 |
+
"account": os.getenv('snowflake_account'),
|
| 62 |
+
"role": os.getenv('snowflake_role'),
|
| 63 |
+
"database": os.getenv('snowflake_database'),
|
| 64 |
+
"warehouse": os.getenv('snowflake_warehouse'),
|
| 65 |
+
"schema": os.getenv('snowflake_schema'),
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
session = Session.builder.configs(conn).create()
|
| 69 |
+
return session
|
README.md
CHANGED
|
@@ -12,9 +12,5 @@ short_description: 'UI for AI Messaging system '
|
|
| 12 |
license: apache-2.0
|
| 13 |
---
|
| 14 |
|
| 15 |
-
|
| 16 |
|
| 17 |
-
Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
| 18 |
-
|
| 19 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 20 |
-
forums](https://discuss.streamlit.io).
|
|
|
|
| 12 |
license: apache-2.0
|
| 13 |
---
|
| 14 |
|
| 15 |
+
AI messaging system UI
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json, os
|
| 2 |
+
from io import StringIO
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from snowflake.snowpark import Session
|
| 6 |
+
from bs4 import BeautifulSoup
|
| 7 |
+
|
| 8 |
+
from Messaging_system.Permes import Permes
|
| 9 |
+
from Messaging_system.context_validator import Validator
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 14 |
+
# Helpers
|
| 15 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 16 |
+
@st.cache_data
|
| 17 |
+
def load_data(buf) -> pd.DataFrame:
|
| 18 |
+
return pd.read_csv(buf)
|
| 19 |
+
|
| 20 |
+
def load_config_(file_path: str) -> dict:
|
| 21 |
+
with open(file_path) as f:
|
| 22 |
+
return json.load(f)
|
| 23 |
+
|
| 24 |
+
def get_credential(key):
|
| 25 |
+
return st.secrets.get(key) or os.getenv(key)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def init_state() -> None:
|
| 29 |
+
defaults = dict(
|
| 30 |
+
involve_recsys_result=False,
|
| 31 |
+
involve_last_interaction=False,
|
| 32 |
+
valid_instructions="",
|
| 33 |
+
invalid_instructions="",
|
| 34 |
+
messaging_type="push",
|
| 35 |
+
generated=False,
|
| 36 |
+
include_recommendation=False,
|
| 37 |
+
data=None, brand=None, recsys_contents=[], csv_output=None,
|
| 38 |
+
users_message=None, messaging_mode=None, target_column=None,
|
| 39 |
+
ugc_column=None, identifier_column=None, input_validator=None,
|
| 40 |
+
selected_input_features=None, selected_features=None,
|
| 41 |
+
additional_instructions=None, segment_info="", message_style="",
|
| 42 |
+
sample_example="", CTA="", all_features=None, number_of_messages=1,
|
| 43 |
+
instructionset={}, segment_name="", number_of_samples=20,
|
| 44 |
+
selected_source_features=[], platform=None, generate_clicked=False,
|
| 45 |
+
)
|
| 46 |
+
for k, v in defaults.items():
|
| 47 |
+
st.session_state.setdefault(k, v)
|
| 48 |
+
|
| 49 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 50 |
+
# PAGE CONFIG + THEME
|
| 51 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 52 |
+
st.set_page_config(
|
| 53 |
+
page_title="Personalized Message Generator",
|
| 54 |
+
page_icon="📬",
|
| 55 |
+
layout="wide",
|
| 56 |
+
initial_sidebar_state="expanded"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
st.markdown(
|
| 60 |
+
"""
|
| 61 |
+
<style>
|
| 62 |
+
html, body, [class*="css"] {
|
| 63 |
+
background-color:#0d0d0d;
|
| 64 |
+
color:#ffd700;
|
| 65 |
+
}
|
| 66 |
+
.stButton>button, .stDownloadButton>button {
|
| 67 |
+
border-radius:8px;
|
| 68 |
+
background:#ffd700;
|
| 69 |
+
color:#0d0d0d;
|
| 70 |
+
font-weight:600;
|
| 71 |
+
}
|
| 72 |
+
.stTabs [data-baseweb="tab"] {
|
| 73 |
+
font-weight:600;
|
| 74 |
+
}
|
| 75 |
+
.stTabs [aria-selected="true"] {
|
| 76 |
+
color:#ffd700;
|
| 77 |
+
}
|
| 78 |
+
h1, h2, h3 {color:#ffd700;}
|
| 79 |
+
.small {font-size:0.85rem; opacity:0.7;}
|
| 80 |
+
</style>
|
| 81 |
+
""",
|
| 82 |
+
unsafe_allow_html=True
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 86 |
+
# SIDEBAR – the “control panel”
|
| 87 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 88 |
+
init_state()
|
| 89 |
+
with st.sidebar:
|
| 90 |
+
st.header("📂 Upload your CSV")
|
| 91 |
+
uploaded_file = st.file_uploader("Choose file", type="csv")
|
| 92 |
+
if uploaded_file:
|
| 93 |
+
st.session_state.data = load_data(uploaded_file)
|
| 94 |
+
st.success("File loaded!")
|
| 95 |
+
|
| 96 |
+
st.markdown("---")
|
| 97 |
+
|
| 98 |
+
if st.session_state.data is not None:
|
| 99 |
+
# ─ Identifier
|
| 100 |
+
id_col = st.selectbox(
|
| 101 |
+
"Identifier column",
|
| 102 |
+
st.session_state.data.columns,
|
| 103 |
+
key="identifier_column"
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# ─ Brand
|
| 107 |
+
st.selectbox(
|
| 108 |
+
"Brand",
|
| 109 |
+
["drumeo", "pianote", "guitareo", "singeo"],
|
| 110 |
+
key="brand"
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# ─ Personalisation
|
| 114 |
+
st.text_area("Segment info *", key="segment_info")
|
| 115 |
+
st.text_area("CTA *", key="CTA")
|
| 116 |
+
with st.expander("🔧 Optional tone & examples"):
|
| 117 |
+
st.text_area("Message style", key="message_style",
|
| 118 |
+
placeholder="Be kind and friendly…")
|
| 119 |
+
st.text_area("Additional instructions", key="additional_instructions",
|
| 120 |
+
placeholder="e.g. Mention the number weeks since their last practice")
|
| 121 |
+
st.text_area("Sample example", key="sample_example",
|
| 122 |
+
placeholder="Hello! We have crafted…")
|
| 123 |
+
st.number_input("Number of samples", 1, 100, 20,
|
| 124 |
+
key="number_of_samples")
|
| 125 |
+
|
| 126 |
+
# ─ Sequential messages
|
| 127 |
+
st.number_input("Sequential messages / user", 1, 10, 1,
|
| 128 |
+
key="number_of_messages")
|
| 129 |
+
st.text_input("Segment name", key="segment_name",
|
| 130 |
+
placeholder="no_recent_activity")
|
| 131 |
+
if st.session_state.number_of_messages > 1:
|
| 132 |
+
st.caption("Additional per-message instructions")
|
| 133 |
+
for i in range(1, st.session_state.number_of_messages + 1):
|
| 134 |
+
st.text_input(f"Message {i} instruction",
|
| 135 |
+
key=f"instr_{i}")
|
| 136 |
+
|
| 137 |
+
# ─ Source feature selection
|
| 138 |
+
st.multiselect(
|
| 139 |
+
"Source features",
|
| 140 |
+
["instrument", "weeks_since_last_interaction",
|
| 141 |
+
"birthday_reminder"],
|
| 142 |
+
default=["instrument"],
|
| 143 |
+
key="selected_source_features"
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# ─ Rec-sys
|
| 147 |
+
st.checkbox("Include content recommendation", key="include_recommendation")
|
| 148 |
+
if st.session_state.include_recommendation:
|
| 149 |
+
st.multiselect(
|
| 150 |
+
"Recommendation types",
|
| 151 |
+
["song", "workout", "quick_tips", "course"],
|
| 152 |
+
key="recsys_contents"
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
st.markdown("---")
|
| 156 |
+
generate = st.button("🚀 Generate messages")
|
| 157 |
+
st.session_state["generate_clicked"] = generate
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 161 |
+
# MAIN AREA – three tabs
|
| 162 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 163 |
+
tab0, tab1, tab2 = st.tabs(
|
| 164 |
+
["📊 Data preview", "🛠️ Configure", "📨 Results"])
|
| 165 |
+
|
| 166 |
+
# ------------------------------------------------------------------ TAB 0 ---#
|
| 167 |
+
with tab0:
|
| 168 |
+
st.header("📊 Data preview")
|
| 169 |
+
if st.session_state.data is not None:
|
| 170 |
+
st.dataframe(st.session_state.data.head(100))
|
| 171 |
+
else:
|
| 172 |
+
st.info("Upload a CSV to preview it here.")
|
| 173 |
+
|
| 174 |
+
# ------------------------------------------------------------------ TAB 1 ---#
|
| 175 |
+
with tab1:
|
| 176 |
+
st.header("🛠️ Configure & launch")
|
| 177 |
+
if st.session_state.data is None:
|
| 178 |
+
st.warning("Upload a CSV first ⬅")
|
| 179 |
+
elif not generate:
|
| 180 |
+
st.info("Adjust settings in the sidebar, then hit *Generate*.")
|
| 181 |
+
else:
|
| 182 |
+
st.success("Parameters captured – see **Results** tab.")
|
| 183 |
+
|
| 184 |
+
# ------------------------------------------------------------------ TAB 2 ---#
|
| 185 |
+
with tab2:
|
| 186 |
+
st.header("📨 Generated messages")
|
| 187 |
+
# Run generation only once per click
|
| 188 |
+
if st.session_state.generate_clicked and not st.session_state.generated:
|
| 189 |
+
|
| 190 |
+
# ─ simple validation
|
| 191 |
+
if not st.session_state.CTA.strip() or not st.session_state.segment_info.strip():
|
| 192 |
+
st.error("CTA and Segment info are mandatory 🚫")
|
| 193 |
+
st.stop()
|
| 194 |
+
|
| 195 |
+
# ─ build Snowflake session
|
| 196 |
+
conn = dict(
|
| 197 |
+
user=get_credential("snowflake_user"),
|
| 198 |
+
password=get_credential("snowflake_password"),
|
| 199 |
+
account=get_credential("snowflake_account"),
|
| 200 |
+
role=get_credential("snowflake_role"),
|
| 201 |
+
database=get_credential("snowflake_database"),
|
| 202 |
+
warehouse=get_credential("snowflake_warehouse"),
|
| 203 |
+
schema=get_credential("snowflake_schema")
|
| 204 |
+
)
|
| 205 |
+
config = load_config_("Config_files/message_system_config.json")
|
| 206 |
+
session = Session.builder.configs(conn).create()
|
| 207 |
+
|
| 208 |
+
# ─ prepare parameters
|
| 209 |
+
st.session_state.messaging_mode = (
|
| 210 |
+
"recsys_result" if st.session_state.include_recommendation
|
| 211 |
+
else "message"
|
| 212 |
+
)
|
| 213 |
+
st.session_state.involve_recsys_result = st.session_state.include_recommendation
|
| 214 |
+
st.session_state.instructionset = {
|
| 215 |
+
i: st.session_state.get(f"instr_{i}")
|
| 216 |
+
for i in range(1, st.session_state.number_of_messages + 1)
|
| 217 |
+
if st.session_state.get(f"instr_{i}", "").strip()
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
# ─ progress callback
|
| 221 |
+
prog = st.progress(0)
|
| 222 |
+
status = st.empty()
|
| 223 |
+
|
| 224 |
+
def cb(done, total):
|
| 225 |
+
pct = int(done / total * 100)
|
| 226 |
+
prog.progress(pct)
|
| 227 |
+
status.write(f"{pct}%")
|
| 228 |
+
|
| 229 |
+
permes = Permes()
|
| 230 |
+
df_msg = permes.create_personalize_messages(
|
| 231 |
+
session=session,
|
| 232 |
+
users=st.session_state.data,
|
| 233 |
+
brand=st.session_state.brand,
|
| 234 |
+
config_file=config,
|
| 235 |
+
openai_api_key=get_credential("OPENAI_API"),
|
| 236 |
+
CTA=st.session_state.CTA,
|
| 237 |
+
segment_info=st.session_state.segment_info,
|
| 238 |
+
number_of_samples=st.session_state.number_of_samples,
|
| 239 |
+
message_style=st.session_state.message_style,
|
| 240 |
+
sample_example=st.session_state.sample_example,
|
| 241 |
+
selected_input_features=st.session_state.selected_features,
|
| 242 |
+
selected_source_features=st.session_state.selected_source_features,
|
| 243 |
+
additional_instructions=st.session_state.additional_instructions,
|
| 244 |
+
platform=st.session_state.messaging_type,
|
| 245 |
+
involve_recsys_result=st.session_state.involve_recsys_result,
|
| 246 |
+
messaging_mode=st.session_state.messaging_mode,
|
| 247 |
+
identifier_column=st.session_state.identifier_column,
|
| 248 |
+
target_column=st.session_state.target_column,
|
| 249 |
+
recsys_contents=st.session_state.recsys_contents,
|
| 250 |
+
progress_callback=cb,
|
| 251 |
+
number_of_messages=st.session_state.number_of_messages,
|
| 252 |
+
instructionset=st.session_state.instructionset,
|
| 253 |
+
segment_name=st.session_state.segment_name
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
# ─ cache output
|
| 257 |
+
st.session_state.users_message = df_msg
|
| 258 |
+
st.session_state.csv_output = df_msg.to_csv(
|
| 259 |
+
index=False, encoding="utf-8-sig")
|
| 260 |
+
st.session_state.generated = True
|
| 261 |
+
prog.empty(); status.empty()
|
| 262 |
+
st.balloons()
|
| 263 |
+
|
| 264 |
+
# -------- show results (if any)
|
| 265 |
+
# -------- show results (if any)
|
| 266 |
+
if st.session_state.generated:
|
| 267 |
+
df = st.session_state.users_message
|
| 268 |
+
id_col = st.session_state.identifier_column
|
| 269 |
+
|
| 270 |
+
# expandable per-user cards
|
| 271 |
+
for i, (_, row) in enumerate(df.iterrows(), 1):
|
| 272 |
+
with st.expander(f"{i}. User ID: {row[id_col.lower()]}", expanded=(i == 1)):
|
| 273 |
+
st.write("##### 👤 Features")
|
| 274 |
+
feats = st.session_state.selected_source_features
|
| 275 |
+
cols = st.columns(3)
|
| 276 |
+
for idx, f in enumerate(feats):
|
| 277 |
+
cols[idx % 3].markdown(f"**{f}**: {row.get(f, '—')}")
|
| 278 |
+
|
| 279 |
+
st.markdown("---")
|
| 280 |
+
st.write("##### ✉️ Messages")
|
| 281 |
+
try:
|
| 282 |
+
blob = json.loads(row["message"])
|
| 283 |
+
seq = (blob.get("messages_sequence", blob)
|
| 284 |
+
if isinstance(blob, dict) else blob)
|
| 285 |
+
|
| 286 |
+
for j, msg in enumerate(seq, 1):
|
| 287 |
+
st.markdown(f"**{j}. {msg.get('header', '(no header)')}**")
|
| 288 |
+
thumb = (msg.get("thumbnail_url") # per-message
|
| 289 |
+
or row.get("thumbnail_url")) # per-user fallback
|
| 290 |
+
if thumb:
|
| 291 |
+
st.image(thumb, width=150)
|
| 292 |
+
# ---------------------------------------------------------
|
| 293 |
+
|
| 294 |
+
st.markdown(msg.get("message", ""))
|
| 295 |
+
st.markdown(f"[Read more]({msg.get('web_url_path', '#')})")
|
| 296 |
+
st.markdown("---")
|
| 297 |
+
|
| 298 |
+
except Exception as e:
|
| 299 |
+
st.error(f"Failed to parse JSON: {e}")
|
| 300 |
+
|
local_llm/LocalLM.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import time
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import ollama
|
| 6 |
+
|
| 7 |
+
class LocalLM:
|
| 8 |
+
|
| 9 |
+
def __init__(self, model):
|
| 10 |
+
# Initialize the Ollama client
|
| 11 |
+
self.client = ollama.Client()
|
| 12 |
+
self.model = model
|
| 13 |
+
|
| 14 |
+
# def get_llm_response(self, prompt):
|
| 15 |
+
#
|
| 16 |
+
# # Send the query to the model
|
| 17 |
+
# response = self.client.generate(model=self.model, prompt=prompt)
|
| 18 |
+
# return response.response
|
| 19 |
+
|
| 20 |
+
def preprocess_and_parse_json(self, response):
|
| 21 |
+
# Remove any leading/trailing whitespace and newlines
|
| 22 |
+
if response.startswith('```json') and response.endswith('```'):
|
| 23 |
+
cleaned_response = response[len('```json'):-len('```')].strip()
|
| 24 |
+
|
| 25 |
+
# Parse the cleaned response into a JSON object
|
| 26 |
+
try:
|
| 27 |
+
json_object = json.loads(cleaned_response)
|
| 28 |
+
return json_object
|
| 29 |
+
except json.JSONDecodeError as e:
|
| 30 |
+
print(f"Failed to parse JSON: {e}")
|
| 31 |
+
return None
|
| 32 |
+
|
| 33 |
+
def get_llm_response(self, prompt, mode, max_retries=10):
|
| 34 |
+
"""
|
| 35 |
+
Send the prompt to the LLM and get back the response.
|
| 36 |
+
Includes handling for GPU memory issues by clearing cache and waiting before retry.
|
| 37 |
+
"""
|
| 38 |
+
for attempt in range(max_retries):
|
| 39 |
+
try:
|
| 40 |
+
# Try generating the response
|
| 41 |
+
response = self.client.generate(model=self.model, prompt=prompt)
|
| 42 |
+
except Exception as e:
|
| 43 |
+
# This catches errors like the connection being forcibly closed
|
| 44 |
+
print(f"Error on attempt {attempt + 1}: {e}.")
|
| 45 |
+
try:
|
| 46 |
+
# Clear GPU cache if you're using PyTorch; this may help free up memory
|
| 47 |
+
torch.cuda.empty_cache()
|
| 48 |
+
print("Cleared GPU cache.")
|
| 49 |
+
except Exception as cache_err:
|
| 50 |
+
print("Failed to clear GPU cache:", cache_err)
|
| 51 |
+
# Wait a bit before retrying to allow memory to recover
|
| 52 |
+
time.sleep(2)
|
| 53 |
+
continue
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
tokens = {
|
| 57 |
+
'prompt_tokens': 0,
|
| 58 |
+
'completion_tokens': 0,
|
| 59 |
+
'total_tokens': 0
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
try:
|
| 63 |
+
output = self.preprocess_and_parse_json(response.response)
|
| 64 |
+
if output is None:
|
| 65 |
+
continue
|
| 66 |
+
|
| 67 |
+
if mode == "rating":
|
| 68 |
+
# Check if all keys and values are integers (or convertible to integers)
|
| 69 |
+
all_int = True
|
| 70 |
+
for k, v in output.items():
|
| 71 |
+
try:
|
| 72 |
+
int(k)
|
| 73 |
+
int(v)
|
| 74 |
+
except ValueError:
|
| 75 |
+
all_int = False
|
| 76 |
+
break
|
| 77 |
+
if all_int:
|
| 78 |
+
return output, tokens
|
| 79 |
+
else:
|
| 80 |
+
print(f"Keys and values are not integers on attempt {attempt + 1}. Retrying...")
|
| 81 |
+
continue # Continue to next attempt
|
| 82 |
+
else:
|
| 83 |
+
print(f"Invalid mode: {mode}")
|
| 84 |
+
return None, tokens
|
| 85 |
+
|
| 86 |
+
except json.JSONDecodeError:
|
| 87 |
+
print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
|
| 88 |
+
except Exception as parse_error:
|
| 89 |
+
print("Error processing output:", parse_error)
|
| 90 |
+
|
| 91 |
+
print("Max retries exceeded. Returning empty response.")
|
| 92 |
+
return [], {}
|
requirements.txt
ADDED
|
Binary file (6.39 kB). View file
|
|
|