Messaging_system_variant_B

Sleeping

App Files Files Community

Danialebrat commited on May 18, 2025

Commit

d0e3307

1 Parent(s): 6b1529a

Adding files and directories

Browse files

- System works with CSV, we still need CIO and snowflake integration

Files changed (27) hide show

.dockerignore +9 -0
.gitignore +9 -0
.idea/.gitignore +8 -0
.idea/inspectionProfiles/Project_Default.xml +188 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
CIO/CIO_integration_Python.py +146 -0
Config_files/message_system_config.json +24 -0
Dockerfile +23 -0
Messaging_system/CoreConfig.py +270 -0
Messaging_system/DataCollector.py +182 -0
Messaging_system/LLMR.py +386 -0
Messaging_system/Message_generator.py +258 -0
Messaging_system/MultiMessage.py +324 -0
Messaging_system/Permes.py +160 -0
Messaging_system/PromptGenerator.py +434 -0
Messaging_system/SnowFlakeConnection.py +237 -0
Messaging_system/StoreLayer.py +0 -0
Messaging_system/context_validator.py +302 -0
Messaging_system/protection_layer.py +220 -0
Messaging_system/sending_time.py +69 -0
README.md +1 -5
app.py +300 -0
local_llm/LocalLM.py +92 -0
requirements.txt +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,9 @@

+# Ignore the .streamlit directory and its contents
+Config_files/mysql_credentials.json
+Config_files/secrets.json
+Config_files/snowflake_credentials_Danial.json
+.streamlit/secrets.toml
+# Ignore the .env file
+Analysis/.env
+.env

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+# Ignore the .streamlit directory and its contents
+Config_files/mysql_credentials.json
+Config_files/secrets.json
+Config_files/snowflake_credentials_Danial.json
+.streamlit/secrets.toml
+# Ignore the .env file
+Analysis/.env
+.env

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,188 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
+      <option name="ignoredPackages">
+        <value>
+          <list size="153">
+            <item index="0" class="java.lang.String" itemvalue="urllib3" />
+            <item index="1" class="java.lang.String" itemvalue="tokenizers" />
+            <item index="2" class="java.lang.String" itemvalue="transformers" />
+            <item index="3" class="java.lang.String" itemvalue="huggingface-hub" />
+            <item index="4" class="java.lang.String" itemvalue="safetensors" />
+            <item index="5" class="java.lang.String" itemvalue="pandas" />
+            <item index="6" class="java.lang.String" itemvalue="protobuf" />
+            <item index="7" class="java.lang.String" itemvalue="httpx" />
+            <item index="8" class="java.lang.String" itemvalue="openai" />
+            <item index="9" class="java.lang.String" itemvalue="anyio" />
+            <item index="10" class="java.lang.String" itemvalue="h11" />
+            <item index="11" class="java.lang.String" itemvalue="httpcore" />
+            <item index="12" class="java.lang.String" itemvalue="tangled-up-in-unicode" />
+            <item index="13" class="java.lang.String" itemvalue="numba" />
+            <item index="14" class="java.lang.String" itemvalue="Babel" />
+            <item index="15" class="java.lang.String" itemvalue="PyYAML" />
+            <item index="16" class="java.lang.String" itemvalue="pickleshare" />
+            <item index="17" class="java.lang.String" itemvalue="defusedxml" />
+            <item index="18" class="java.lang.String" itemvalue="executing" />
+            <item index="19" class="java.lang.String" itemvalue="pycparser" />
+            <item index="20" class="java.lang.String" itemvalue="torchvision" />
+            <item index="21" class="java.lang.String" itemvalue="patsy" />
+            <item index="22" class="java.lang.String" itemvalue="ipython-genutils" />
+            <item index="23" class="java.lang.String" itemvalue="Pygments" />
+            <item index="24" class="java.lang.String" itemvalue="bleach" />
+            <item index="25" class="java.lang.String" itemvalue="jupyter_server_terminals" />
+            <item index="26" class="java.lang.String" itemvalue="soupsieve" />
+            <item index="27" class="java.lang.String" itemvalue="torchaudio" />
+            <item index="28" class="java.lang.String" itemvalue="jsonschema" />
+            <item index="29" class="java.lang.String" itemvalue="pywin32" />
+            <item index="30" class="java.lang.String" itemvalue="qtconsole" />
+            <item index="31" class="java.lang.String" itemvalue="terminado" />
+            <item index="32" class="java.lang.String" itemvalue="comm" />
+            <item index="33" class="java.lang.String" itemvalue="pydantic" />
+            <item index="34" class="java.lang.String" itemvalue="wordcloud" />
+            <item index="35" class="java.lang.String" itemvalue="jupyterlab-pygments" />
+            <item index="36" class="java.lang.String" itemvalue="ipykernel" />
+            <item index="37" class="java.lang.String" itemvalue="nbconvert" />
+            <item index="38" class="java.lang.String" itemvalue="phik" />
+            <item index="39" class="java.lang.String" itemvalue="attrs" />
+            <item index="40" class="java.lang.String" itemvalue="contourpy" />
+            <item index="41" class="java.lang.String" itemvalue="psutil" />
+            <item index="42" class="java.lang.String" itemvalue="jedi" />
+            <item index="43" class="java.lang.String" itemvalue="jupyter_server" />
+            <item index="44" class="java.lang.String" itemvalue="pure-eval" />
+            <item index="45" class="java.lang.String" itemvalue="regex" />
+            <item index="46" class="java.lang.String" itemvalue="asttokens" />
+            <item index="47" class="java.lang.String" itemvalue="platformdirs" />
+            <item index="48" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="49" class="java.lang.String" itemvalue="idna" />
+            <item index="50" class="java.lang.String" itemvalue="referencing" />
+            <item index="51" class="java.lang.String" itemvalue="decorator" />
+            <item index="52" class="java.lang.String" itemvalue="networkx" />
+            <item index="53" class="java.lang.String" itemvalue="pandas-profiling" />
+            <item index="54" class="java.lang.String" itemvalue="json5" />
+            <item index="55" class="java.lang.String" itemvalue="cffi" />
+            <item index="56" class="java.lang.String" itemvalue="pandocfilters" />
+            <item index="57" class="java.lang.String" itemvalue="numpy" />
+            <item index="58" class="java.lang.String" itemvalue="jupyter-events" />
+            <item index="59" class="java.lang.String" itemvalue="sniffio" />
+            <item index="60" class="java.lang.String" itemvalue="websocket-client" />
+            <item index="61" class="java.lang.String" itemvalue="exceptiongroup" />
+            <item index="62" class="java.lang.String" itemvalue="jupyter" />
+            <item index="63" class="java.lang.String" itemvalue="seaborn" />
+            <item index="64" class="java.lang.String" itemvalue="stack-data" />
+            <item index="65" class="java.lang.String" itemvalue="multimethod" />
+            <item index="66" class="java.lang.String" itemvalue="PyWavelets" />
+            <item index="67" class="java.lang.String" itemvalue="zipp" />
+            <item index="68" class="java.lang.String" itemvalue="nest-asyncio" />
+            <item index="69" class="java.lang.String" itemvalue="prompt-toolkit" />
+            <item index="70" class="java.lang.String" itemvalue="visions" />
+            <item index="71" class="java.lang.String" itemvalue="ipywidgets" />
+            <item index="72" class="java.lang.String" itemvalue="scipy" />
+            <item index="73" class="java.lang.String" itemvalue="tornado" />
+            <item index="74" class="java.lang.String" itemvalue="ydata-profiling" />
+            <item index="75" class="java.lang.String" itemvalue="jsonpointer" />
+            <item index="76" class="java.lang.String" itemvalue="Send2Trash" />
+            <item index="77" class="java.lang.String" itemvalue="torch" />
+            <item index="78" class="java.lang.String" itemvalue="overrides" />
+            <item index="79" class="java.lang.String" itemvalue="mistune" />
+            <item index="80" class="java.lang.String" itemvalue="importlib-resources" />
+            <item index="81" class="java.lang.String" itemvalue="mpmath" />
+            <item index="82" class="java.lang.String" itemvalue="jupyter-console" />
+            <item index="83" class="java.lang.String" itemvalue="typing_extensions" />
+            <item index="84" class="java.lang.String" itemvalue="debugpy" />
+            <item index="85" class="java.lang.String" itemvalue="statsmodels" />
+            <item index="86" class="java.lang.String" itemvalue="argon2-cffi" />
+            <item index="87" class="java.lang.String" itemvalue="pytz" />
+            <item index="88" class="java.lang.String" itemvalue="dacite" />
+            <item index="89" class="java.lang.String" itemvalue="webencodings" />
+            <item index="90" class="java.lang.String" itemvalue="Pillow" />
+            <item index="91" class="java.lang.String" itemvalue="notebook_shim" />
+            <item index="92" class="java.lang.String" itemvalue="tiktoken" />
+            <item index="93" class="java.lang.String" itemvalue="traitlets" />
+            <item index="94" class="java.lang.String" itemvalue="pywinpty" />
+            <item index="95" class="java.lang.String" itemvalue="rfc3339-validator" />
+            <item index="96" class="java.lang.String" itemvalue="joblib" />
+            <item index="97" class="java.lang.String" itemvalue="arrow" />
+            <item index="98" class="java.lang.String" itemvalue="python-dateutil" />
+            <item index="99" class="java.lang.String" itemvalue="nbclient" />
+            <item index="100" class="java.lang.String" itemvalue="QtPy" />
+            <item index="101" class="java.lang.String" itemvalue="cycler" />
+            <item index="102" class="java.lang.String" itemvalue="MarkupSafe" />
+            <item index="103" class="java.lang.String" itemvalue="tinycss2" />
+            <item index="104" class="java.lang.String" itemvalue="mkl" />
+            <item index="105" class="java.lang.String" itemvalue="fsspec" />
+            <item index="106" class="java.lang.String" itemvalue="python-json-logger" />
+            <item index="107" class="java.lang.String" itemvalue="filelock" />
+            <item index="108" class="java.lang.String" itemvalue="jupyterlab-widgets" />
+            <item index="109" class="java.lang.String" itemvalue="pyzmq" />
+            <item index="110" class="java.lang.String" itemvalue="certifi" />
+            <item index="111" class="java.lang.String" itemvalue="pyparsing" />
+            <item index="112" class="java.lang.String" itemvalue="sympy" />
+            <item index="113" class="java.lang.String" itemvalue="notebook" />
+            <item index="114" class="java.lang.String" itemvalue="isoduration" />
+            <item index="115" class="java.lang.String" itemvalue="jupyter-lsp" />
+            <item index="116" class="java.lang.String" itemvalue="fqdn" />
+            <item index="117" class="java.lang.String" itemvalue="jupyter_client" />
+            <item index="118" class="java.lang.String" itemvalue="kiwisolver" />
+            <item index="119" class="java.lang.String" itemvalue="jupyterlab_server" />
+            <item index="120" class="java.lang.String" itemvalue="fonttools" />
+            <item index="121" class="java.lang.String" itemvalue="backcall" />
+            <item index="122" class="java.lang.String" itemvalue="tbb" />
+            <item index="123" class="java.lang.String" itemvalue="widgetsnbextension" />
+            <item index="124" class="java.lang.String" itemvalue="argon2-cffi-bindings" />
+            <item index="125" class="java.lang.String" itemvalue="distro" />
+            <item index="126" class="java.lang.String" itemvalue="matplotlib-inline" />
+            <item index="127" class="java.lang.String" itemvalue="webcolors" />
+            <item index="128" class="java.lang.String" itemvalue="more-itertools" />
+            <item index="129" class="java.lang.String" itemvalue="wcwidth" />
+            <item index="130" class="java.lang.String" itemvalue="llvmlite" />
+            <item index="131" class="java.lang.String" itemvalue="jupyter_core" />
+            <item index="132" class="java.lang.String" itemvalue="importlib-metadata" />
+            <item index="133" class="java.lang.String" itemvalue="Jinja2" />
+            <item index="134" class="java.lang.String" itemvalue="rfc3986-validator" />
+            <item index="135" class="java.lang.String" itemvalue="typeguard" />
+            <item index="136" class="java.lang.String" itemvalue="jsonschema-specifications" />
+            <item index="137" class="java.lang.String" itemvalue="rpds-py" />
+            <item index="138" class="java.lang.String" itemvalue="uri-template" />
+            <item index="139" class="java.lang.String" itemvalue="tomli" />
+            <item index="140" class="java.lang.String" itemvalue="jupyterlab" />
+            <item index="141" class="java.lang.String" itemvalue="parso" />
+            <item index="142" class="java.lang.String" itemvalue="intel-openmp" />
+            <item index="143" class="java.lang.String" itemvalue="nbformat" />
+            <item index="144" class="java.lang.String" itemvalue="tzdata" />
+            <item index="145" class="java.lang.String" itemvalue="ipython" />
+            <item index="146" class="java.lang.String" itemvalue="packaging" />
+            <item index="147" class="java.lang.String" itemvalue="fastjsonschema" />
+            <item index="148" class="java.lang.String" itemvalue="prometheus-client" />
+            <item index="149" class="java.lang.String" itemvalue="tqdm" />
+            <item index="150" class="java.lang.String" itemvalue="colorama" />
+            <item index="151" class="java.lang.String" itemvalue="async-lru" />
+            <item index="152" class="java.lang.String" itemvalue="ImageHash" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N803" />
+          <option value="N806" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyStubPackagesAdvertiser" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <list>
+          <option value="pyspark-stubs==3.0.0.post3" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredIdentifiers">
+        <list>
+          <option value="str.__or__" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.9 (AI_Message_Generator)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/AI_Message_Generator.iml" filepath="$PROJECT_DIR$/.idea/AI_Message_Generator.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

CIO/CIO_integration_Python.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import http.client
+import json
+import pandas as pd
+import logging
+import base64
+import requests
+from customerio import CustomerIO, Regions
+class CustomerIOIntegration:
+    def __init__(self, site_id, api_key):
+        self.cio = CustomerIO(site_id=site_id, api_key=api_key)
+        logging.basicConfig(level=logging.INFO)
+        # Authentication
+        self.site_id = site_id
+        self.api_key = api_key
+        # Base URL for Customer.io App API endpoints (used for segments management)
+        self.base_url = "https://api.customer.io/v1"
+        # Create Basic Auth header
+        auth_b64 = base64.b64encode(f"{self.site_id}:{self.api_key}".encode('utf-8')).decode('utf-8')
+        self.headers = {
+            "Authorization": f"Basic {auth_b64}",
+            "Content-Type": "application/json"
+        }
+    def add_attributes(self, dataframe):
+        # Filter out rows without messages or cio_id
+        filtered_df = dataframe.dropna(subset=['ai_generated_message', 'email'])
+        for index, row in filtered_df.iterrows():
+            try:
+                self.cio.identify(id=row['email'], ai_generated_message=row['ai_generated_message'])
+                logging.info(f"Successfully updated user {row['email']} with message")
+            except Exception as e:
+                logging.error(f"Failed to update user {row['email']}: {e}")
+    def get_segment(self, segment_name):
+        # Step 1: Check if the segment exists
+        resp = requests.get(f"{self.base_url}/segments", headers=self.headers)
+        if resp.status_code != 200:
+            raise Exception(f"Error fetching segments: {resp.text}")
+        segments = resp.json()  # assuming a list of segments is returned
+        segment_id = None
+        for seg in segments:
+            if seg.get("name") == segment_name:
+                segment_id = seg.get("id")
+                break
+        return segment_id
+    def update_segment_from_dataframe(self, df: pd.DataFrame,
+                                      segment_name: str,
+                                      segment_description: str) -> str:
+        """
+        Given a pandas DataFrame, create (if needed) and update a Customer.io manual segment.
+        The DataFrame must contain an "email" column (used as the unique identifier) plus other columns
+        that become customer attributes.
+        Parameters:
+          df: DataFrame containing customer data.
+          segment_name: The name of the segment to create or update.
+          segment_description: A description for the segment (used when creating it).
+        Returns:
+          The segment ID (as returned by the API).
+        """
+        segment_id = self.get_segment(segment_name)
+        # If segment does not exist, create it
+        if segment_id is None:
+            payload = {
+                "name": segment_name,
+                "description": segment_description,
+                "type": "manual"  # manual segments require that you add customers explicitly
+            }
+            resp = requests.post(f"{self.base_url}/segments", headers=self.headers, data=json.dumps(payload))
+            if resp.status_code not in (200, 201):
+                raise Exception(f"Error creating segment: {resp.text}")
+            segment = resp.json()
+            segment_id = segment.get("id")
+            print(f"Segment '{segment_name}' created with ID: {segment_id}")
+        else:
+            print(f"Segment '{segment_name}' already exists with ID: {segment_id}")
+        # Step 2: For each row in the DataFrame, update the customer profile.
+        # We use the "email" column as the id.
+        for index, row in df.iterrows():
+            email = row["email"]
+            # Prepare a dictionary of attributes (all columns except email)
+            attrs = row.drop("email").to_dict()
+            # Use the customer.io client to create or update the profile.
+            # Note: any keyword argument you pass becomes a custom attribute.
+            self.cio.identify(id=email, **attrs)
+        # Step 3: Add all customers (emails) from the DataFrame to the segment.
+        customer_ids = df["email"].tolist()
+        payload = {
+            "ids": customer_ids,
+            "id_type": "email"  # since we use emails as the identifier
+        }
+        resp = requests.put(f"{self.base_url}/segments/{segment_id}", headers=self.headers, data=json.dumps(payload))
+        if resp.status_code != 200:
+            raise Exception(f"Error adding customers to segment: {resp.text}")
+        print(f"Successfully updated segment '{segment_name}' with {len(customer_ids)} customers.")
+        return segment_id
+def load_config_(file_path):
+    """
+    Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
+    :param file_path: local path to the JSON file
+    :return: JSON file
+    """
+    with open(file_path, 'r') as file:
+        return json.load(file)
+# Example usage
+if __name__ == "__main__":
+    data = pd.DataFrame({'email': ['danial@musora.com'],
+                         'message': ['This is the second test message'],
+                         'json_att': [{"message": "test", "url": "test"}]})
+    df = pd.DataFrame(data)
+    secrets_file = 'Config_files/secrets.json'
+    secrets = load_config_(secrets_file)
+    track_api_key = secrets["MUSORA_CUSTOMER_IO_TRACK_API_KEY"]
+    site_id = secrets["MUSORA_CUSTOMER_IO_SITE_ID"]
+    api_key = secrets["MUSORA_CUSTOMER_IO_APP_API_KEY"]
+    workspace_id = secrets["MUSORA_CUSTOMER_IO_WORKSPACE_ID"]
+    cio_integration = CustomerIOIntegration(api_key=track_api_key, site_id=site_id)
+    # Update (or create) the segment
+    segment_id = cio_integration.update_segment_from_dataframe(df,
+                                               segment_name="Danial_ Manual Segment _ AI",
+                                               segment_description="Customers imported from DataFrame")
+    print(f"Segment ID: {segment_id}")

Config_files/message_system_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "user_info_features": [
+      "first_name",
+      "country",
+      "instrument",
+      "biography",
+      "birthday_reminder",
+      "topics",
+      "genres",
+      "last_completed_content"
+    ],
+  "interaction_features": ["last_content_info"],
+  "check_feasibility": [
+    "first_name",
+    "biography",
+    "birthday",
+    "topics",
+    "genres"
+  ],
+  "AI_Jargon": ["elevate", "enhance", "reignite", "passion", "boost", "fuel", "thrill", "revive", "spark", "performing", "fresh", "tone"],
+  "header_limit": 30,
+  "message_limit": 110
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Start from a lightweight Python image
+FROM python:3.9
+# Set environment variables for better behavior in containers
+ENV PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+# Create and set the working directory
+WORKDIR /app
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . .
+# Expose the port that the application will listen on
+EXPOSE 7860
+# Run the Streamlit app
+# Streamlit will read PORT from the environment and bind to 0.0.0.0
+CMD streamlit run app.py --server.port=$PORT --server.headless true --server.address 0.0.0.0

Messaging_system/CoreConfig.py ADDED Viewed

	@@ -0,0 +1,270 @@

+"""
+the flow of the Program starts from create_personalized_message function
+"""
+import time
+from Messaging_system.SnowFlakeConnection import SnowFlakeConn
+class CoreConfig:
+    """
+    LLM-based personalized message generator:
+    """
+    def __init__(self, session, users_df, brand, platform, config_file):
+        self.session = session
+        self.users_df = users_df
+        self.config_file = config_file
+        self.platform = platform  # valid values: [push, app]
+        self.brand = brand
+        # LLM configs
+        self.api_key = None  # will be set by user
+        self.model = "gpt-4o-mini"  # will be set by user
+        # will be set by user
+        self.CTA = None
+        self.message_style = None
+        self.sample_example = None
+        self.template_message = self.CTA
+        self.segment_info = None
+        self.subsequence_messages = 1
+        self.number_of_samples = 0
+        self.list_of_features = None
+        self.consider_last_interaction = True
+        self.additional_instructions = None
+        # to trace the number of tokens and estimate the cost if needed
+        self.temp_token_counter = 0
+        self.total_tokens = {
+            'prompt_tokens': 0,
+            'completion_tokens': 0,
+        }
+        # Recsys_result
+        self.recsys_result = None
+        self.recsys_contents = ["song", "workout", "course", "quick_tips"]
+        self.content_info = None
+        self.involve_recsys_result = False
+        self.popular_contents_df = None
+        # Additional_info
+        self.additional_info_columns = None
+        self.messaging_mode = "message"
+        self.target_content = None
+        self.start_time = time.time()
+        self.remaining_tokens = None
+        self.wait_time = None
+        # Instantiate the connection to Snowflake
+        self.SF = SnowFlakeConn(session=self.session, brand=self.brand)
+        # segment name
+        self.segment_name = None
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_message_style(self, message_style):
+        """
+        Setting message style
+        :param message_style: a string with placeholders
+        :return:
+        """
+        self.message_style = message_style
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_involve_recsys_result(self, involve_recsys_result):
+        self.involve_recsys_result = involve_recsys_result
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_recsys_contents(self, recsys_contents):
+        """
+        setting recsys contents -> content types that we want to include in recommendations. Default value will contain all.
+        :param recsys_contents:
+        :return:
+        """
+        self.recsys_contents = recsys_contents
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_messaging_mode(self, messaging_mode):
+        """
+        setting the messaging mode -> [recsys_result, message(default), recommend_playlist, recommend_content]
+        :param messaging_mode:
+        :return:
+        """
+        valid_modes = ["recsys_result", "message", "recommend_playlist", "recommend_content"]
+        if messaging_mode in valid_modes:
+            self.messaging_mode = messaging_mode
+        else:
+            print(f"{messaging_mode} is not a valid messaging mode. available modes are: \n {valid_modes}")
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_openai_api(self, openai_key):
+        """
+        Setting openai key
+        :param openai_key: a string with placeholders
+        :return:
+        """
+        self.api_key = openai_key
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_number_of_samples(self, number_of_samples):
+        """
+        Setting number_of_samples to generate messages
+        :param number_of_samples: int
+        """
+        self.number_of_samples = int(number_of_samples)
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_sample_example(self, sample_example):
+        """
+        Setting sample_example for one-shot prompting
+        :param sample_example: a string with placeholders
+        :return:
+        """
+        self.sample_example = sample_example
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_CTA(self, CTA):
+        """
+        Setting CTA (call to action), the main goal that we are sending the message.
+        :param CTA: a string with placeholders
+        :return:
+        """
+        self.CTA = CTA
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_segment_info(self, segment_info):
+        """
+        Setting segment_info: information that is common between all the users in the segment
+        :param segment_info: a string with placeholders
+        :return:
+        """
+        self.segment_info = segment_info
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_additional_instructions(self, additional_instructions):
+        """
+        setting additional_instructions
+        :param additional_instructions:
+        :return:
+        """
+        self.additional_instructions = additional_instructions
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_features_to_use(self, list_of_features):
+        """
+        Setting list_of_features to use in the message
+        :param list_of_features: list of features to user
+        :return:
+        """
+        self.list_of_features = list_of_features
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_target_feature(self, target_content):
+        """
+        Setting the target feature when we want to recommend a content from the input data (it should be a content_id)
+        :return:
+        """
+        self.target_content = target_content
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_number_of_messages(self, number_of_messages=1, instructionset=None):
+        """
+        If the number of messages is more than 1, we will set self.subsequence_messages to a dictionary where
+        the key is an integer from 1 to number_of_messages, and the values are corresponding instructions in instructionset.
+        :param number_of_messages: int
+        :param instructionset: list of instructions
+        :return:
+        """
+        if number_of_messages == 1:
+            self.subsequence_messages = {1: None}
+        else:
+            if instructionset is not None:
+                self.subsequence_messages = instructionset
+            else:
+                raise ValueError("Instructionset must have instructions for each subsequence message")
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def get_instrument(self):
+        """
+        get the instrument name based on the brand
+        :return: instrument (str)
+        """
+        # map
+        switch_dict = {
+            "drumeo": "Drum",
+            "pianote": "Piano",
+            "guitareo": "Guitar",
+            "singeo": "Vocal"
+        }
+        return switch_dict[self.brand]
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def respect_request_ratio(self):
+        """
+        sleeping for a while to respect request rate ratio
+        :return:
+        """
+        current_time = time.time()
+        delta = current_time - self.start_time
+        # Check token limits
+        if self.temp_token_counter > 3997000 and delta <= 60:  # Using a safe margin
+            print("Sleeping for few seconds to respect the token limit...")
+            # reset the token counter
+            self.temp_token_counter = 0
+            self.start_time = time.time()
+            time.sleep(10)  # Sleep for a while before making new requests
+        if delta > 60:
+            # reset the token counter
+            self.temp_token_counter = 0
+            self.start_time = time.time()
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def checkpoint(self):
+        """
+        saving the current process
+        :return:
+        """
+        save_data = self.users_df[
+            ["user_id", "email", "first_name", "message", "additional_info", "recommendation_info"]]
+        save_data.to_csv(f"drumeo_not_active_segment.csv", encoding='utf-8-sig', index=False)
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def set_segment_name(self, segment_name):
+        """
+        saving the current process
+        :return:
+        """
+        self.segment_name = segment_name

Messaging_system/DataCollector.py ADDED Viewed

	@@ -0,0 +1,182 @@

+"""
+setting instructions and inputs required to generate personalized messages
+"""
+import numpy as np
+import pandas as pd
+class DataCollector:
+    def __init__(self, CoreConfig):
+        self.Core = CoreConfig
+    # -----------------------------------------------------------------
+    # -----------------------------------------------------------------
+    def gather_data(self):
+        """
+        main function of the class to flow the work for gathering all the data that we need.
+        :return: updated users_df with extracted information
+        """
+        # extract user_ids and other data
+        self.extract_musora_id()
+        # selecting a sample of users
+        self.select_sample()
+        self.fetch_data()
+        # calculate the remaining days to their birthday
+        self.remaining_days_to_birthday()
+        self.create_columns()
+        # creating additional info if applicable
+        if len(self.Core.additional_info_columns) != 0:
+            self.create_additional_information()
+        return self.Core
+    # -----------------------------------------------------------------
+    # -----------------------------------------------------------------
+    def extract_musora_id(self):
+        """
+        Extracts the musora user id and preserves additional columns.
+        """
+        self.Core.users_df.columns = self.Core.users_df.columns.str.lower()
+        # Define valid columns in order of preference.
+        valid_columns = ['user_id', 'musora_user_id', 'id', 'email']
+        # Find the first valid column present in the DataFrame.
+        id_col = next((col for col in valid_columns if col in self.Core.users_df.columns), None)
+        if id_col is None:
+            raise Exception("Input data must contain user_id, musora_user_id, id, or email column.")
+        # Normalize the identification column to 'user_id'
+        if id_col in ['musora_user_id', 'id']:
+            self.Core.users_df.rename(columns={id_col: 'user_id'}, inplace=True)
+        elif id_col == 'email':
+            self.Core._lookup_user_ids_from_email()
+        # Identify additional columns: exclude identification columns
+        identification_columns = {'user_id', 'email'} if 'email' in self.Core.users_df.columns else {'user_id'}
+        additional_columns = [col for col in self.Core.users_df.columns if col not in identification_columns]
+        self.Core.additional_info_columns = [col.lower() for col in additional_columns]
+    # -----------------------------------------------------------------
+    # -----------------------------------------------------------------
+    def _lookup_user_ids_from_email(self):
+        """
+        Looks up user IDs based on unique email addresses and merges the results
+        into self.users_df. Assumes self.users_df contains an 'email' column.
+        """
+        unique_emails = self.Core.users_df["email"].unique()
+        data = self.Core.SF.extract_id_from_email(emails=unique_emails)
+        self.Core.users_df = pd.merge(self.Core.users_df, data, on='email', how='left')
+    # -----------------------------------------------------------------
+    # -----------------------------------------------------------------
+    def remaining_days_to_birthday(self):
+        """
+        calculating the remaining days to the user's birthday
+        :return: updating users_df
+        """
+        # Iterate through each row in the DataFrame
+        for idx, row in self.Core.users_df.iterrows():
+            if pd.notna(row.get("birthday")):
+                if int(row["birthday_reminder"]) <= 7:
+                    remaining_days = int(row["birthday_reminder"])
+                    self.Core.users_df.at[idx, "birthday_reminder"] = f"{remaining_days} days until student's birthday"
+                else:
+                    self.Core.users_df.at[idx, "birthday_reminder"] = None
+    # -----------------------------------------------------------------
+    # -----------------------------------------------------------------
+    def fetch_data(self):
+        # Fetch datasets
+        user_ids = self.Core.users_df["user_id"].unique()
+        users_data = self.Core.SF.get_data("users", user_ids)
+        interactions_data = self.Core.SF.get_data("interactions",user_ids)
+        recsys_data = self.Core.SF.get_data("recsys", user_ids)
+        contents_data = self.Core.SF.get_data("contents")
+        popular_contents_data = self.Core.SF.get_data("popular_contents")
+        self.Core.users_df["user_id"] = self.Core.users_df["user_id"].astype(int)
+        interactions_data["user_id"] = interactions_data["user_id"].astype(int)
+        # Merge additional user details into the base dataframe (self.users_df)
+        # Assuming self.users_df already exists and contains a "USER_ID" column
+        self.Core.users_df = self.Core.users_df.merge(users_data, on="user_id", how="left", suffixes=("", "_users"))
+        self.Core.users_df = self.Core.users_df.merge(interactions_data, on="user_id", how="left",
+                                            suffixes=("", "_interactions"))
+        self.Core.users_df = self.Core.users_df.merge(recsys_data, on="user_id", how="left", suffixes=("", "_recsys"))
+        for col in self.Core.users_df.columns:
+            # Replace additional empty representations with np.nan
+            self.Core.users_df[col] = self.Core.users_df[col].replace(['', 'None', 'nan'], np.nan)
+        # Now drop rows where 'permission' is missing
+        self.Core.users_df.dropna(subset=["permission"], inplace=True)
+        self.Core.content_info = contents_data
+        # self.content_info['content_info'] = self.content_info['content_info'].str.replace(chr(10), '\n')
+        self.Core.popular_contents_df = popular_contents_data
+    # -----------------------------------------------------------------
+    # -----------------------------------------------------------------
+    def create_columns(self):
+        """
+        Creating user profile based on available information, and adding additional columns for messages
+        :return: updates users_df
+        """
+        # adding new columns, initially with none values
+        # self.users_df["user_info"] = self.users_df["user_profile"]  # represent users
+        self.Core.users_df["message"] = None  # will contain the final message
+        self.Core.users_df["source"] = None  # [AI-generated]
+        self.Core.users_df["prompt"] = None  # will contain final prompt
+        self.Core.users_df["instrument"] = self.Core.get_instrument()
+        self.Core.users_df["platform"] = self.Core.platform
+        self.Core.users_df["segment_name"] = self.Core.segment_name
+    # -------------------------------------------------------------
+    # -------------------------------------------------------------
+    def create_additional_information(self):
+        """
+        providing additional input and instructions based on available columns in the input file
+        :return: instructions
+        """
+        self.Core.users_df["additional_info"] = None
+        # Iterate through each row in the DataFrame
+        for idx, row in self.Core.users_df.iterrows():
+            additional_info = []
+            # populating additional_info
+            for feature in self.Core.additional_info_columns:
+                value = row.get(feature)
+                if pd.notna(value) and value not in [None, [], {}] and (
+                        not isinstance(value, str) or value.strip()):
+                    additional_info.append(f"{feature}: {str(value)}")
+            self.Core.users_df.at[idx, "additional_info"] = "\n".join(additional_info)
+    # -----------------------------------------------------------------
+    # -----------------------------------------------------------------
+    def select_sample(self, sample_size=None):
+        """
+        Select a sample of the input users.
+        :param sample_size: Number of users to select (default to 20).
+        :return: DataFrame containing the selected sample.
+        """
+        # Use self.number_of_samples if sample_size is None, otherwise default to 20
+        if sample_size is None:
+            sample_size = self.Core.number_of_samples if self.Core.number_of_samples is not None else 20
+        total_users = self.Core.users_df.shape[0]
+        sample_size = min(total_users, sample_size)
+        self.Core.users_df = self.Core.users_df.sample(n=sample_size, replace=False)

Messaging_system/LLMR.py ADDED Viewed

	@@ -0,0 +1,386 @@

+"""
+This class is a LLM based recommender that can choose the perfect content for the user given user profile and our goal
+"""
+import json
+import os
+import pandas as pd
+import openai
+from openai import OpenAI
+from dotenv import load_dotenv
+import time
+import streamlit as st
+from tqdm import tqdm
+load_dotenv()
+# -----------------------------------------------------------------------
+class LLMR:
+    def __init__(self, CoreConfig):
+        self.Core = CoreConfig
+        self.user = None
+        self.selected_content_ids = []  # will be populated for each user
+    def get_recommendations(self, progress_callback):
+        """
+        selecting the recommended content for each user
+        :return:
+        """
+        self.Core.users_df["recommendation"] = None
+        self.Core.users_df["recommendation_info"] = None
+        total_users = len(self.Core.users_df)
+        st.write("Choosing the best content to recommend ... ")
+        self.Core.start_time = time.time()
+        for progress, (idx, row) in enumerate(
+                tqdm(self.Core.users_df.iterrows(), desc="Selecting the best content to recommend ...")):
+            # if we have a prompt to generate a personalized message
+            # Update progress if callback is provided
+            if progress_callback is not None:
+                progress_callback(progress, total_users)
+            self.user = row
+            content_id, content_info, recsys_json, token = self._get_recommendation()
+            if content_id is None:  # error in selecting a content to recommend
+                continue
+            else:
+                # updating tokens
+                self.Core.total_tokens['prompt_tokens'] += int(token['prompt_tokens'])
+                self.Core.total_tokens['completion_tokens'] += int(token['completion_tokens'])
+                self.Core.temp_token_counter = int(token['prompt_tokens']) + int(token['completion_tokens'])
+                self.Core.users_df.at[idx, "recommendation"] = content_id
+                self.Core.users_df.at[idx, "recommendation_info"] = content_info
+                self.Core.users_df.at[idx, "recsys_result"] = recsys_json
+                self.Core.respect_request_ratio()
+        return self.Core
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _get_recommendation(self):
+        """
+        select and return the recommendation from the available list of contents
+        :return: content_id
+        """
+        prompt, recsys_json = self._generate_prompt()
+        if prompt is None:
+            return None, None, None, None
+        else:
+            content_id, tokens = self.get_llm_response(prompt)
+            if content_id == 0:
+                # was not able to receive a recommendation
+                return None, None, None, None
+            else:
+                content_info = self._get_content_info(content_id)
+                return content_id, content_info, recsys_json, tokens
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _generate_prompt(self):
+        """
+        Generates the prompts for given user in order to choose the recommendation from the available list
+        :param user:
+        :return:
+        """
+        available_contents, recsys_json = self._get_available_contents()
+        if available_contents.strip() == "": # no item to recommend
+            return None
+        # Getting different part of the prompts
+        input_context = self._input_context()
+        user_info = self._get_user_profile()
+        task = self._task_instructions()
+        output_instruction = self._output_instruction()
+        prompt = f"""
+### Context:
+{input_context}
+### User Information:
+{user_info}
+### Available Contents:
+{available_contents}
+### Main Task:
+{task}
+### Output Instructions:
+{output_instruction}
+"""
+        return prompt, recsys_json
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _input_context(self):
+        """
+        :return: input instructions as a string
+        """
+        context = f"""
+You are a helpful assistant at Musora, an online music education platform that helps users learn music. Your goal is to choose a perfect content to recommend to the user given the information that we have from the user and available contents to recommend.
+"""
+        return context
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _system_instructions(self):
+        """
+        (Optional) A helper function that defines high-level system context for certain LLMs.
+        For example, if your LLM endpoint supports messages in the form of role='system'.
+        """
+        return (
+            "You are a helpful recommendation assistant at Musora, an online music education platform. "
+            "Use the provided user information and content details to choose the best content to recommend. "
+            "Make sure to follow the instructions precisely and only return the chosen content_id as JSON."
+        )
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _task_instructions(self):
+        """
+        creating the instructions about the task
+        :return: task
+        """
+        task = """
+- You must select exactly ONE content from the 'Available Contents' to recommend.
+- Base your decision on the User information and focus on providing the most relevant recommendation.
+- Do not recommended content where the topic is focused on a specific Gear (e.g. YAMAHA)
+- Provide the content_id of the recommended content in the output based on Output instructions.
+"""
+        return task
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _get_user_profile(self):
+        """
+        getting user's goal and user's last completed content to use for choosing the recommended content
+        :return:
+        """
+        last_completed_content = self._get_user_data(attribute="last_completed_content")
+        user_info = self._get_user_data(attribute="user_info")
+        recommendation_info = f"""
+**User information and preferences:**
+{user_info}
+**Previous completed content:**
+{last_completed_content}
+"""
+        return recommendation_info
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _get_user_data(self, attribute):
+        """
+        get user's information for the requested attribute
+        :param user:
+        :return: user_info
+        """
+        # Previous interaction
+        if pd.notna(self.user[attribute]) and self.user[attribute] not in [
+            None, [], {}] and (not isinstance(self.user[attribute], str) or self.user[attribute].strip()):
+            user_info = self.user[attribute]
+        else:
+            user_info = "Not Available"
+        return user_info
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _get_user_recommendation(self):
+        recsys_json = self.user["recsys_result"]
+        try:
+            recsys_data = json.loads(recsys_json)
+            # Sections to process
+            sections = self.Core.recsys_contents
+            # Check if none of the sections are present in recsys_data --> cold start scenario
+            if not any(section in recsys_data for section in sections):
+                popular_content = self.Core.popular_contents_df.iloc[0][f"popular_content"]
+                return popular_content
+            else:
+                return recsys_json
+        except:
+            popular_content = self.Core.popular_contents_df.iloc[0][f"popular_content"]
+            return popular_content
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _get_available_contents(self):
+        # Get the user ID
+        recsys_json = self._get_user_recommendation()
+        recsys_data = json.loads(recsys_json)
+        # Sections to process
+        sections = self.Core.recsys_contents
+        # Collect selected content_ids
+        selected_content_ids = []
+        for section in sections:
+            if section in recsys_data:
+                # Get the list of recommendations in this section
+                recs = recsys_data[section]
+                # Sort by recommendation_rank (ascending order)
+                recs_sorted = sorted(recs, key=lambda x: x['recommendation_rank'])
+                # Select top 3 recommendations
+                top_recs = recs_sorted[:3]
+                # Get the content_ids
+                content_ids = [rec['content_id'] for rec in top_recs]
+                # Append to the list
+                selected_content_ids.extend(content_ids)
+        # Fetch content info for the selected content_ids
+        content_info_rows = self.Core.content_info[self.Core.content_info['content_id'].isin(selected_content_ids)]
+        # Create a mapping from CONTENT_ID to CONTENT_INFO
+        content_info_map = dict(zip(content_info_rows['content_id'], content_info_rows['content_info']))
+        # Assemble the text in a structured way using a list
+        lines = []
+        for content_id in selected_content_ids:
+            # Retrieve the content_info (which may include multi-line text)
+            content_info = content_info_map.get(content_id, "No content info found")
+            # Append the structured lines without extra spaces
+            lines.append(f"**content_id**: {content_id}")
+            lines.append("**content_info**:")
+            lines.append(content_info)  # this line may already contain internal newlines
+            lines.append("")  # blank line for separation
+        # Join all lines into a single text string with newline characters
+        text = "\n".join(lines)
+        self.selected_content_ids = selected_content_ids
+        return text, recsys_json
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _get_content_info(self, content_id):
+        """
+        getting content_info for the recommended content
+        :param content_id:
+        :return:
+        """
+        content_info_row = self.Core.content_info[self.Core.content_info['content_id'] == content_id]
+        content_info = content_info_row['content_info'].iloc[0]
+        return content_info
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def is_valid_content_id(self, content_id):
+        """
+        check if the llm respond is a valid content_id
+        :param content_id:
+        :return:
+        """
+        if content_id in self.selected_content_ids:
+            return True
+        else:
+            return False
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def _output_instruction(self):
+        """
+        :return: output instructions as a string
+        """
+        instructions = f"""
+    Return the content_id of the final recommendation in **JSON** format with the following structure:
+    {{
+      "content_id": "content_id of the recommended content from Available Contents, as an integer",
+    }}
+    Do not include any additional keys or text outside the JSON.
+    """
+        return instructions
+    def get_llm_response(self, prompt, max_retries=4):
+        """
+        sending the prompt to the LLM and get back the response
+        """
+        openai.api_key = self.Core.api_key
+        instructions = self._system_instructions()
+        client = OpenAI(api_key=self.Core.api_key)
+        for attempt in range(max_retries):
+            try:
+                response = client.chat.completions.create(
+                    model=self.Core.model,
+                    response_format={"type": "json_object"},
+                    messages=[
+                        {"role": "system", "content": instructions},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=20,
+                    n=1,
+                    temperature=0.7
+                )
+                tokens = {
+                    'prompt_tokens': response.usage.prompt_tokens,
+                    'completion_tokens': response.usage.completion_tokens,
+                    'total_tokens': response.usage.total_tokens
+                }
+                try:
+                    content = response.choices[0].message.content
+                    # Extract JSON code block
+                    output = json.loads(content)
+                    if 'content_id' in output and self.is_valid_content_id(int(output['content_id'])):
+                        return int(output['content_id']), tokens
+                    else:
+                        print(f"'content_id' missing or invalid in response on attempt {attempt + 1}. Retrying...")
+                        continue  # Continue to next attempt
+                except json.JSONDecodeError:
+                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
+            except openai.APIConnectionError as e:
+                print("The server could not be reached")
+                print(e.__cause__)  # an underlying Exception, likely raised within httpx.
+            except openai.RateLimitError as e:
+                print("A 429 status code was received; we should back off a bit.")
+            except openai.APIStatusError as e:
+                print("Another non-200-range status code was received")
+                print(e.status_code)
+                print(e.response)
+        print("Max retries exceeded. Returning empty response.")
+        return 0, 0

Messaging_system/Message_generator.py ADDED Viewed

	@@ -0,0 +1,258 @@

+"""
+THis class will generate message or messages based on the number of requested.
+"""
+import json
+import time
+from openai import OpenAI
+from tqdm import tqdm
+import streamlit as st
+from Messaging_system.MultiMessage import MultiMessage
+from Messaging_system.protection_layer import ProtectionLayer
+import openai
+class MessageGenerator:
+    def __init__(self, CoreConfig):
+        self.Core = CoreConfig
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def generate_messages(self, progress_callback):
+        """
+        generating messages based on prompts for each user
+        :return: updating message column for each user
+        """
+        total_users = len(self.Core.users_df)
+        st.write("Generating messages ... ")
+        self.Core.start_time = time.time()
+        for progress, (idx, row) in enumerate(tqdm(self.Core.users_df.iterrows(), desc="generating messages")):
+            # if we have a prompt to generate a personalized message
+            # Update progress if callback is provided
+            if progress_callback is not None:
+                progress_callback(progress, total_users)
+            if row["prompt"] is not None:
+                first_message = self.get_llm_response(row["prompt"])
+                if first_message is not None:
+                    # adding protection layer
+                    protect = ProtectionLayer(config_file=self.Core.config_file,
+                                              messaging_mode=self.Core.messaging_mode)
+                    message, total_tokens = protect.criticize(message=first_message, user=row)
+                    # updating tokens
+                    self.Core.total_tokens['prompt_tokens'] += total_tokens['prompt_tokens']
+                    self.Core.total_tokens['completion_tokens'] += total_tokens['completion_tokens']
+                    self.Core.temp_token_counter += total_tokens['prompt_tokens'] + total_tokens['completion_tokens']
+                    # double check output structure
+                    if isinstance(message, dict) and "message" in message and isinstance(message["message"], str):
+                        # parsing output result
+                        message = self.parsing_output_message(message, row)
+                        self.Core.users_df.at[idx, "message"] = message
+                        row["message"] = message
+                    else:
+                        self.Core.users_df.at[idx, "message"] = None
+                    self.Core.checkpoint()
+                    self.Core.respect_request_ratio()
+                else:
+                    self.Core.users_df.at[idx, "message"] = None
+            # generating subsequence messages if needed:
+            if isinstance(self.Core.subsequence_messages, dict) and len(self.Core.subsequence_messages.keys()) > 1 and \
+                    self.Core.users_df.at[idx, "message"] is not None and row["message"] is not None:
+                MM = MultiMessage(self.Core)
+                message = MM.generate_multi_messages(row)
+                self.Core.users_df.at[idx, "message"] = message
+            else:
+            # ---------------------------------------------------------
+            # SINGLE-MESSAGE path
+            # ---------------------------------------------------------
+                single_msg = row["message"] or self.Core.users_df.at[idx, "message"]
+                if single_msg is not None:
+                    # If the single message is still a JSON string, turn it into a dict first
+                    if isinstance(single_msg, str):
+                        try:
+                            single_msg = json.loads(single_msg)
+                        except json.JSONDecodeError:
+                            # leave it as-is if it’s not valid JSON
+                            pass
+                    msg_wrapper = {"messages_sequence": [single_msg]}
+                    # Again, store a proper JSON string
+                    self.Core.users_df.at[idx, "message"] = json.dumps(msg_wrapper,
+                                                                       ensure_ascii=False)
+                else:
+                    self.Core.users_df.at[idx, "message"] = None
+        return self.Core
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def parsing_output_message(self, message, user):
+        """
+        Parses the output JSON from the LLM and enriches it with additional content information if needed.
+        :param message: Output JSON from LLM (expected to have at least a "message" key)
+        :param user: The user row
+        :return: Parsed and enriched output as a JSON object
+        """
+        if self.Core.involve_recsys_result:
+            output_message = self.fetch_recommendation_data(user, message)
+        elif self.Core.messaging_mode == "recommend_playlist":
+            # adding playlist url to the message
+            if "playlist_id" in message and "message" in message:
+                playlist_id = str(message["playlist_id"])
+                web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
+                # Add these to the message dict
+                output_message = {
+                    "header": message["header"],
+                    "message": message["message"],
+                    "playlist_id": int(message["playlist_id"]),
+                    "web_url_path": web_url_path,
+                }
+        else:
+            # Only "message" is expected when involve_recsys_result is False and we are not recommending any other content from input
+            if "message" not in message or "header" not in message:
+                print("LLM output is missing 'message'.")
+                return None
+            output_message = {"header": message["header"], "message": message["message"]}
+        return json.dumps(output_message, ensure_ascii=False)
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def fetch_recommendation_data(self, user, message):
+        user_id = user["user_id"]
+        content_id = int(user["recommendation"])
+        recsys_json_str = user["recsys_result"]
+        recsys_data = json.loads(recsys_json_str)
+        # Initialize variables to store found item and category
+        found_item = None
+        # Search through all categories in the recsys data
+        for category, items in recsys_data.items():
+            for item in items:
+                if item.get("content_id") == content_id:
+                    found_item = item
+                    break  # Exit inner loop if item is found
+            if found_item:
+                break  # Exit outer loop if item is found
+        if not found_item:
+            print(f"content_id {content_id} not found in recsys_data for user_id {user_id}.")
+            return None
+        # Extract required fields from found_item
+        web_url_path = found_item.get("web_url_path")
+        title = found_item.get("title")
+        thumbnail_url = found_item.get("thumbnail_url")
+        message["message"].replace('\\', '').replace('"', '')
+        # Add these to the message dict
+        output_message = {
+            "header": message.get("header"),
+            "message": message.get("message"),
+            "content_id": content_id,
+            "web_url_path": web_url_path,
+            "title": title,
+            "thumbnail_url": thumbnail_url
+        }
+        return output_message
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def get_llm_response(self, prompt, max_retries=4):
+        """
+        sending the prompt to the LLM and get back the response
+        """
+        openai.api_key = self.Core.api_key
+        instructions = self.llm_instructions()
+        client = OpenAI(api_key=self.Core.api_key)
+        for attempt in range(max_retries):
+            try:
+                response = client.chat.completions.create(
+                    model=self.Core.model,
+                    response_format={"type": "json_object"},
+                    messages=[
+                        {"role": "system", "content": instructions},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=500,
+                    n=1,
+                    temperature=0.6
+                )
+                tokens = {
+                    'prompt_tokens': response.usage.prompt_tokens,
+                    'completion_tokens': response.usage.completion_tokens,
+                    'total_tokens': response.usage.total_tokens
+                }
+                try:
+                    content = response.choices[0].message.content
+                    # Extract JSON code block
+                    output = json.loads(content)
+                    # output = json.loads(response.choices[0].message.content)
+                    if 'message' not in output or 'header' not in output:
+                        print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
+                        continue  # Continue to next attempt
+                    else:
+                        if len(output["header"].strip()) > self.Core.config_file["header_limit"] or len(
+                                output["message"].strip()) > self.Core.config_file["message_limit"]:
+                            print(
+                                f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
+                            continue
+                    # validating the JSON
+                    self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
+                    self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
+                    self.Core.temp_token_counter += tokens['total_tokens']
+                    return output
+                except json.JSONDecodeError:
+                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
+            except openai.APIConnectionError as e:
+                print("The server could not be reached")
+                print(e.__cause__)  # an underlying Exception, likely raised within httpx.
+            except openai.RateLimitError as e:
+                print("A 429 status code was received; we should back off a bit.")
+            except openai.APIStatusError as e:
+                print("Another non-200-range status code was received")
+                print(e.status_code)
+                print(e.response)
+        print("Max retries exceeded. Returning empty response.")
+        return None
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def llm_instructions(self):
+        """
+        Setting instructions for llm
+        :return: instructions as string
+        """
+        # set LLM initial instruction
+        instructions = """You are an AI assistant that receives information of a music student and generate personalized
+         motivation message. """
+        return instructions

Messaging_system/MultiMessage.py ADDED Viewed

	@@ -0,0 +1,324 @@

+import json
+import time
+from openai import OpenAI
+from Messaging_system.protection_layer import ProtectionLayer
+import openai
+class MultiMessage:
+    def __init__(self, CoreConfig):
+        """
+        Class that generates a sequence of messages (multi-step push notifications)
+        for each user, building on previously generated messages.
+        """
+        self.Core = CoreConfig
+    # --------------------------------------------------------------
+    def generate_multi_messages(self, user):
+        """
+        Generates multiple messages per user, storing them in a single JSON structure.
+        The first message is assumed to already exist in user["message"].
+        Subsequent messages are generated by referencing all previously generated ones.
+        :param user: A row (dictionary-like) containing user data and the first message.
+        :return: JSON string containing the entire sequence of messages
+                 (or None if something goes wrong).
+        """
+        # 1) Get the first message if it exists
+        first_message_str = user.get("message", None)
+        if not first_message_str:
+            print("No initial message found; cannot build a multi-message sequence.")
+            return None
+        # Parse the first message as JSON
+        try:
+            first_message_dict = json.loads(first_message_str)
+        except (json.JSONDecodeError, TypeError):
+            print("Could not parse the first message as JSON. Returning None.")
+            return None
+        # Start our sequence with the first message
+        message_sequence = [first_message_dict]
+        # We'll reuse the same ProtectionLayer
+        protect = ProtectionLayer(
+            config_file=self.Core.config_file,
+            messaging_mode=self.Core.messaging_mode
+        )
+        # If user requested multiple messages, generate the rest
+        # number_of_messages is the *total* number of messages requested
+        total_to_generate = len(self.Core.subsequence_messages.keys())
+        # Already have the first message, so generate the next (n-1) messages
+        for step in range(2, total_to_generate + 1):
+            # 2) Generate the next message referencing all so-far messages
+            next_msg_raw = self.generate_next_messages(message_sequence, step)
+            if not next_msg_raw:
+                print(f"Could not generate the message for step {step}. Stopping.")
+                break
+            # 3) Pass it through the protection layer
+            criticized_msg, tokens_used = protect.criticize(
+                message=next_msg_raw,
+                user=user
+            )
+            # Update token usage stats
+            self.Core.total_tokens['prompt_tokens'] += tokens_used['prompt_tokens']
+            self.Core.total_tokens['completion_tokens'] += tokens_used['completion_tokens']
+            self.Core.temp_token_counter += tokens_used['prompt_tokens'] + tokens_used['completion_tokens']
+            # 4) Parse & validate the next message (we do the same as the single-message pipeline)
+            parsed_output_str = self.parsing_output_message(criticized_msg, user)
+            if not parsed_output_str:
+                print(f"Parsing output failed for step {step}. Stopping.")
+                break
+            try:
+                parsed_output_dict = json.loads(parsed_output_str)
+            except json.JSONDecodeError:
+                print(f"Could not parse the new message as JSON for step {step}. Stopping.")
+                break
+            # Add this next message to our sequence
+            message_sequence.append(parsed_output_dict)
+        # 5) Return the entire sequence so it can be stored back in the DataFrame or elsewhere
+        final_structure = {"messages_sequence": message_sequence}
+        return json.dumps(final_structure, ensure_ascii=False)
+    # --------------------------------------------------------------
+    def generate_next_messages(self, previous_messages, step):
+        """
+        Uses all previously generated messages to produce the next message.
+        Returns a *raw* dictionary (header, message, etc.) from the LLM.
+        :param previous_messages: A list of dicts, each containing at least "header" and "message".
+        :return: A dictionary from LLM (with 'header' and 'message'), or None if generation fails.
+        """
+        # 1) Build a prompt that includes all previous messages
+        prompt = self.generate_prompt(previous_messages, step)
+        # 2) Call our existing LLM routine (identical to the one in MessageGenerator)
+        response_dict = self.get_llm_response(prompt)
+        return response_dict
+    # --------------------------------------------------------------
+    def generate_prompt(self, previous_messages, step):
+        """
+        Creates a prompt to feed to the LLM, incorporating all previously generated messages.
+        :param previous_messages: A list of dicts, each containing 'header' and 'message'.
+        :return: A user-facing prompt string instructing the model to produce a new message.
+        """
+        # Build a textual summary of previous messages
+        previous_text = []
+        for i, m in enumerate(previous_messages, start=1):
+            header = m.get("header", "").strip()
+            body   = m.get("message", "").strip()
+            previous_text.append(f"Message {i}: (Header) {header}\n           (Body) {body}")
+        # Combine into a single string
+        previous_text_str = "\n\n".join(previous_text)
+        # Provide constraints for our next push notification
+        header_limit = self.Core.config_file.get("header_limit", 50)
+        message_limit = self.Core.config_file.get("message_limit", 200)
+        # Craft the prompt
+        prompt = f"""
+We have previously sent these push notifications to the user:
+{previous_text_str}
+The user has still not re-engaged. Generate the *next* push notification to motivate the user
+to return and continue their music learning.
+Constraints:
+- "header" must be fewer than {header_limit} characters.
+- "message" must be fewer than {message_limit} characters.
+- Output must be valid JSON with exactly two keys: "header" and "message".
+- Do NOT repeat the exact same wording as prior messages; keep the same overall style.
+- The user is a music student who hasn't been active recently.
+Tune:
+- {self.Core.subsequence_messages[step]}
+Return only JSON of the form:
+{{
+  "header": "...",
+  "message": "..."
+}}
+""".strip()
+        return prompt
+    # --------------------------------------------------------------
+    def parsing_output_message(self, message, user):
+        """
+        Parses the output JSON from the LLM and enriches it with additional content
+        information if needed (e.g., from recsys). Re-uses the logic from the single-message
+        pipeline to keep the results consistent.
+        :param message: Output JSON *dictionary* from the LLM (with at least "message" and "header").
+        :param user: The user row dictionary.
+        :return: A valid JSON string or None if the structure is invalid.
+        """
+        if self.Core.involve_recsys_result:
+            # If recsys is used, fetch recommendation data
+            output_message = self.fetch_recommendation_data(user, message)
+        elif self.Core.messaging_mode == "recommend_playlist":
+            # If recommending a playlist, add the relevant fields
+            if "playlist_id" in message and "message" in message:
+                playlist_id = str(message["playlist_id"])
+                web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
+                output_message = {
+                    "header": message.get("header", ""),
+                    "message": message.get("message", ""),
+                    "playlist_id": int(message["playlist_id"]),
+                    "web_url_path": web_url_path,
+                }
+            else:
+                print("LLM output is missing either 'playlist_id' or 'message'.")
+                return None
+        else:
+            # Basic scenario: Only 'header' and 'message' expected
+            if "message" not in message or "header" not in message:
+                print("LLM output is missing 'header' or 'message'.")
+                return None
+            output_message = {
+                "header": message["header"],
+                "message": message["message"]
+            }
+        return json.dumps(output_message, ensure_ascii=False)
+    # --------------------------------------------------------------
+    def fetch_recommendation_data(self, user, message):
+        """
+        Extracts recommendation data from user's recsys_result and merges it into the given
+        message dictionary. Identical to single-message usage.
+        :param user: The user row (with 'recsys_result', 'recommendation', etc.).
+        :param message: Dictionary with at least "header" and "message".
+        :return: Enriched dict (header, message, content_id, web_url_path, title, thumbnail_url)
+        """
+        user_id = user["user_id"]
+        content_id = int(user["recommendation"])
+        recsys_json_str = user["recsys_result"]
+        recsys_data = json.loads(recsys_json_str)
+        # Initialize variable to store found item
+        found_item = None
+        for category, items in recsys_data.items():
+            for item in items:
+                if item.get("content_id") == content_id:
+                    found_item = item
+                    break
+            if found_item:
+                break
+        if not found_item:
+            print(f"content_id {content_id} not found in recsys_data for user_id {user_id}.")
+            return None
+        web_url_path = found_item.get("web_url_path")
+        title = found_item.get("title")
+        thumbnail_url = found_item.get("thumbnail_url")
+        # Construct final dictionary
+        output_message = {
+            "header": message.get("header"),
+            "message": message.get("message", "").replace('\\', '').replace('"', ''),
+            "content_id": content_id,
+            "web_url_path": web_url_path,
+            "title": title,
+            "thumbnail_url": thumbnail_url
+        }
+        return output_message
+    # --------------------------------------------------------------
+    def get_llm_response(self, prompt, max_retries=4):
+        """
+        Calls the LLM (similar to MessageGenerator) with the prompt, returning a dict
+        with keys like 'header' and 'message' if successful, or None otherwise.
+        :param prompt: The text prompt for the LLM.
+        :param max_retries: Number of retries for potential LLM/connection failures.
+        :return: Dictionary with 'header' and 'message', or None if unsuccessful.
+        """
+        openai.api_key = self.Core.api_key
+        instructions = self.llm_instructions()
+        client = OpenAI(api_key=self.Core.api_key)
+        for attempt in range(max_retries):
+            try:
+                response = client.chat.completions.create(
+                    model=self.Core.model,
+                    response_format={"type": "json_object"},
+                    messages=[
+                        {"role": "system", "content": instructions},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=500,
+                    n=1,
+                    temperature=0.6
+                )
+                tokens = {
+                    'prompt_tokens': response.usage.prompt_tokens,
+                    'completion_tokens': response.usage.completion_tokens,
+                    'total_tokens': response.usage.total_tokens
+                }
+                try:
+                    content = response.choices[0].message.content
+                    output = json.loads(content)
+                    # Validate output keys
+                    if 'message' not in output or 'header' not in output:
+                        print(f"'message' or 'header' missing in response (attempt {attempt+1}). Retrying...")
+                        continue
+                    # Check character length constraints
+                    if (len(output["header"].strip()) > self.Core.config_file["header_limit"] or
+                            len(output["message"].strip()) > self.Core.config_file["message_limit"]):
+                        print(f"Header or message exceeded character limits (attempt {attempt+1}). Retrying...")
+                        continue
+                    # If we're good here, update token usage
+                    self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
+                    self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
+                    self.Core.temp_token_counter += tokens['total_tokens']
+                    return output
+                except json.JSONDecodeError:
+                    print(f"Invalid JSON from LLM (attempt {attempt+1}). Retrying...")
+            except openai.APIConnectionError as e:
+                print("The server could not be reached")
+                print(e.__cause__)
+            except openai.RateLimitError as e:
+                print("Received a 429 status code; backing off might be needed.")
+            except openai.APIStatusError as e:
+                print("A non-200 status code was received")
+                print(e.status_code)
+                print(e.response)
+        print("Max retries exceeded. Returning None.")
+        return None
+    # --------------------------------------------------------------
+    def llm_instructions(self):
+        """
+        System instructions for the LLM, focusing on generating motivational messages
+        for a returning music student. Extended or adapted as needed.
+        :return: A string with top-level instructions for the model.
+        """
+        instructions = """
+You are an AI assistant helping to create push notification messages for a music student
+who has not been active recently. Each new message should build on previously sent
+messages. Provide short, motivational text that encourages the user to come back.
+Ensure the final output is valid JSON with keys "header" and "message."
+""".strip()
+        return instructions

Messaging_system/Permes.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""
+the flow of the Program starts from create_personalized_message function
+"""
+import time
+from tqdm import tqdm
+from Messaging_system.DataCollector import DataCollector
+from Messaging_system.CoreConfig import CoreConfig
+from Messaging_system.LLMR import LLMR
+import streamlit as st
+from Messaging_system.Message_generator import MessageGenerator
+from Messaging_system.PromptGenerator import PromptGenerator
+from Messaging_system.SnowFlakeConnection import SnowFlakeConn
+class Permes:
+    """
+    LLM-based personalized message generator:
+    """
+    def create_personalize_messages(self, session, users, brand, config_file, openai_api_key, CTA, segment_info,
+                                    platform="push", number_of_messages=1, instructionset=None,
+                                    message_style=None, selected_input_features=None, selected_source_features=None
+                                    , recsys_contents=None,
+                                    additional_instructions=None, identifier_column="user_id",
+                                    sample_example=None, number_of_samples=None, involve_recsys_result=False,
+                                    messaging_mode="message", target_column=None, ongoing_df=None,
+                                    progress_callback=None, segment_name="no_recent_activity"):
+        """
+        creating personalized messages for the input users given the parameters for both app and push platform.
+        :param session: snowflake connection object
+        :param users: users dataframe
+        :param brand
+        :param config_file
+        :param openai_api_key
+        :param CTA: call to action for the messages
+        :param segment_info: common information about the users
+        :param message_style: style of the message
+        :param sample_example: a sample for one shot prompting
+        :return:
+        """
+        # primary processing
+        users = self.identify_users(users_df=users, identifier_column=identifier_column)
+        if selected_input_features is None:
+            selected_input_features = []
+            selected_input_features.append(identifier_column)
+        else:
+            if identifier_column not in selected_input_features and selected_input_features is not None:
+                selected_input_features.append(identifier_column.upper())
+                users = users[selected_input_features]
+        personalize_message = CoreConfig(session=session,
+                                         users_df=users,
+                                         brand=brand,
+                                         platform=platform,
+                                         config_file=config_file)
+        personalize_message.set_CTA(CTA)
+        personalize_message.set_segment_info(segment_info)
+        personalize_message.set_openai_api(openai_api_key)
+        personalize_message.set_segment_name(segment_name=segment_name)
+        personalize_message.set_number_of_messages(number_of_messages=number_of_messages, instructionset=instructionset)
+        if message_style:  # Check if message_style is not empty
+            personalize_message.set_message_style(message_style)
+        if sample_example:  # Check if sample_example is not empty
+            personalize_message.set_sample_example(sample_example)
+        if additional_instructions:
+            personalize_message.set_additional_instructions(additional_instructions)
+        if number_of_samples:
+            personalize_message.set_number_of_samples(number_of_samples)
+        if selected_source_features:
+            personalize_message.set_features_to_use(selected_source_features)
+        if involve_recsys_result:
+            personalize_message.set_messaging_mode("recsys_result")
+            personalize_message.set_involve_recsys_result(involve_recsys_result)
+        if target_column:
+            personalize_message.set_target_feature(target_column)
+        if messaging_mode != "message":
+            personalize_message.set_messaging_mode(messaging_mode)
+        if recsys_contents:
+            personalize_message.set_recsys_contents(recsys_contents)
+        users_df = self._create_personalized_message(CoreConfig=personalize_message, progress_callback=progress_callback)
+        total_prompt_tokens = personalize_message.total_tokens["prompt_tokens"]
+        total_completion_tokens = personalize_message.total_tokens["completion_tokens"]
+        total_cost = ((total_prompt_tokens / 1000000) * 0.15) + (
+                (total_completion_tokens / 1000000) * 0.6)  # Cost calculation estimation
+        print(f"Estimated Cost (USD): {total_cost:.5f}")
+        # Storing process can also happen after some evaluation steps
+        # snowflake_conn = SnowFlakeConn(session=session, brand=brand)
+        # query = snowflake_conn.generate_write_sql_query(table_name="AI_generated_messages", dataframe=users_df)
+        # snowflake_conn.run_write_query(query=query, table_name="AI_generated_messages", dataframe=users_df)
+        # snowflake_conn.close_connection()
+        return users_df
+    # -----------------------------------------------------
+    def identify_users(self, users_df, identifier_column):
+        """
+        specifying the users for identification
+        :param identifier_column:
+        :return: updated users
+        """
+        if identifier_column.upper() == "EMAIL":
+            return users_df
+        else:
+            users_df.rename(columns={identifier_column: "USER_ID"}, inplace=True)
+            return users_df
+    # ------------------------------------------------------------------
+    def _create_personalized_message(self, CoreConfig, progress_callback):
+        """
+        main function of the class to flow the work between functions inorder to create personalized messages.
+        :return: updated users_df with extracted information and personalize messages.
+        """
+        # Collecting all the data that we need to personalize messages
+        datacollect = DataCollector(CoreConfig)
+        CoreConfig = datacollect.gather_data()
+        # generating recommendations for users, if we want to include recommendations in the message
+        if CoreConfig.involve_recsys_result:
+            Recommender = LLMR(CoreConfig)
+            CoreConfig = Recommender.get_recommendations(progress_callback)
+        # generating proper prompt for each user
+        prompt = PromptGenerator(CoreConfig)
+        CoreConfig = prompt.generate_prompts()
+        # generating messages for each user
+        message_generator = MessageGenerator(CoreConfig)
+        CoreConfig = message_generator.generate_messages(progress_callback)
+        # Eliminating rows where we don't have a valid message (null, empty, or whitespace only)
+        CoreConfig.users_df = CoreConfig.users_df[CoreConfig.users_df["message"].str.strip().astype(bool)]
+        CoreConfig.checkpoint()
+        # closing snowflake connection
+        # CoreConfig.session.close()
+        return CoreConfig.users_df

Messaging_system/PromptGenerator.py ADDED Viewed

	@@ -0,0 +1,434 @@

+"""
+THis class generate proper prompts for the messaging system
+"""
+import pandas as pd
+from tqdm import tqdm
+class PromptGenerator:
+    def __init__(self, Core):
+        self.Core = Core
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def generate_prompts(self):
+        """
+        generates a personalized message for each student
+        :return:
+        """
+        # if we have personalized information about them, we generate a personalized prompt
+        for idx, row in tqdm(self.Core.users_df.iterrows(), desc="generating prompts"):
+            # check if we have enough information to generate a personalized message
+            prompt = self.generate_personalized_prompt(user=row)
+            # message = self.call_llm(prompt)
+            self.Core.users_df.at[idx, "prompt"] = prompt
+            self.Core.users_df.at[idx, "source"] = "AI-generated"
+        return self.Core
+    # --------------------------------------------------------------
+    def safe_get(self, value):
+        return str(value) if pd.notna(value) else "Not available"
+    # ==============================================================
+    def get_user_profile(self, user):
+        additional_info = self.user_additional_info(user)
+        user_info = f"""
+       ### **User Information:**
+       Here is the information about the user:
+       {self.safe_get(self.Core.segment_info)}
+       **User profile:**
+       first name: {self.safe_get(user.get("first_name"))}
+       {self.safe_get(user.get("user_info"))}
+       last completed content: {self.safe_get(user.get("last_completed_content"))}
+       {self.safe_get(additional_info)}
+       Weeks since Last interaction:{self.safe_get(user.get("weeks_since_last_interaction"))}
+       """
+        return user_info
+    # --------------------------------------------------------------
+    def generate_personalized_prompt(self, user):
+        """
+        generate a personalized prompt by putting the information from the user into a template prompt
+        :return: Personalized prompt (string)
+        """
+        context = self.input_context()
+        cta = self.CTA_instructions()
+        if self.Core.involve_recsys_result or self.Core.target_content is not None:
+            if user["recommendation"] is not None or user["recommendation_info"] is not None:
+                recommendations_instructions = self.recommendations_instructions(user=user) + "\n"
+        else:
+            recommendations_instructions = ""
+        user_info = self.get_user_profile(user=user)
+        personalize_message_instructions = self.personalize_message_instructions(user)
+        output_instructions = self.output_instruction()
+        task_instructions = self.task_instructions()
+        prompt = f"""
+    {context}
+    {cta}
+    {personalize_message_instructions}
+    {recommendations_instructions}
+    {task_instructions}
+    {user_info}
+    {output_instructions}
+    """
+        return prompt
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def input_context(self):
+        """
+        :return: input instructions as a string
+        """
+        context = f""" You are a helpful assistant at Musora, an online music education platform that helps users
+        learn music. Your goal is to generate a fully personalized message specifically tailored to the user, to increase
+        their engagement with the message.
+        """
+        return context
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def CTA_instructions(self):
+        """
+        define CTA instructions
+        :return: CTA instructions (str)
+        """
+        instructions = f"""
+    Create a clear header, and a message considering the call to action we want the user to hear from us:
+    **Call to Action:**
+        -	**{self.Core.CTA}** \n
+    """
+        return instructions
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def user_additional_info(self, user):
+        """
+        providing additional information given in the input data
+        :param user:
+        :return:
+        """
+        if pd.notna(user["additional_info"]) and user["additional_info"] not in [None, [], {}] and (
+                not isinstance(user["additional_info"], str) or user["additional_info"].strip()):
+            additional_info = user["additional_info"]
+        else:
+            additional_info = ""
+        return additional_info
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def recommendations_instructions(self, user):
+        """
+        instructions about target recommendation for the user
+        :param user:
+        :return:
+        """
+        instructions_for_recsys = f"""
+    ### ** Recommendations instructions **:
+     Below is the content that we want to recommend to the user:
+     Recommended content: {user["recommendation_info"]}
+    -   Use the **CONTENT_TITLE** naturally in the message if capable, but do not use the exact title verbatim or put it in quotes.
+    -   Naturally mention the **CONTENT_TYPE** for course, workout, and quicktips if capable.
+    -   If the recommended content has an **Artist** with a known full name, use the ** FULL NAME ** naturally in the message if capable. If only the first name of the Artist is available, ** DO NOT ** use it at all.
+    """
+        # need to adjust
+        instructions_for_target_content = """
+        -   Considering the information about the user, and the content that we want to recommend, include the **TITLE** inside single quotes, or use the title naturally without the exact title name and quotes if capable.
+        Naturally mention the **CONTENT_TYPE** for course, workout, quicktips if capable and shortly provide a reasoning why the content is helpful for them.
+        **Target recommended Content**:
+        """
+        instructions = ""
+        if self.Core.involve_recsys_result:
+            instructions += f"""
+    {instructions_for_recsys}
+            """
+        elif self.Core.target_content is not None:
+            # fetching the information related to the target content from content_table
+            target_info = self.get_target_content_info(user)
+            instructions += f"""
+    {instructions_for_target_content}
+    {target_info}
+            """
+        return instructions
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def get_target_content_info(self, user):
+        """
+        fetching information about the target content that we want to recommend to the user
+        :param user: target user
+        :return:
+        """
+        # checking that user[self.target_content] contains a content_id:
+        target_id = int(user[self.Core.target_content])
+        try:
+            # fetching the data for target content (self.target_content column in user)
+            content_info_row = self.Core.content_info.loc[self.Core.content_info['content_id'] == target_id]
+            text = f"""
+        **content_id** : {str(content_info_row["content_id"])}"
+        **content_info** : \n {content_info_row["content_info"]} \n\n"
+        """
+            return text
+        except:
+            print(f"Target content cannot be found in the content database: content_id = {target_id}")
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def personalize_message_instructions(self, user):
+        """
+        :return: personalized message instructions as a string
+        """
+        general_instructions = self.message_type_instructions()
+        instructions = """
+    ### ** Personalized Message Specifications **
+    Based on the available information about the user, create a personalized message for the user:
+    \n
+    """
+        # Name
+        if "first_name" in self.Core.list_of_features and pd.notna(user["first_name"]) and user["first_name"] not in [
+            None,
+            [],
+            {}] and (
+                not isinstance(user["first_name"], str) or user["first_name"].strip()):
+            instructions += f"""
+    - Address the user by their first name (only first letter capital) to make the message more personal. \n
+    """
+        else:
+            instructions += """
+    - If the user's name is not available or invalid (e.g. email), proceed without addressing them by name. \n
+    """
+        # Birthday reminder
+        if "birthday_reminder" in self.Core.list_of_features and pd.notna(user["birthday_reminder"]) and user[
+            "birthday_reminder"] not in [None, [], {}] and (
+                not isinstance(user["birthday_reminder"], str) or user["birthday_reminder"].strip()):
+            instructions += """
+    - **Include a short message to remind them that their birthday is coming up.** \n
+    """
+        # Additional instructions for input columns
+        if self.Core.additional_instructions is not None or str(self.Core.additional_instructions).strip() != '':
+            instructions += str(self.Core.additional_instructions)
+        instructions += self.fire_wall() + "\n"
+        final_instructions = f"""
+    {general_instructions}
+    {instructions}
+        """
+        return final_instructions
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def message_type_instructions(self):
+        """
+        create a proper instruction for the message type, regarding the input platform
+        :return: message instructions as a string
+        """
+        instructions = ""
+        message_style = self.message_style_instructions()
+        if self.Core.platform == "push":
+            instructions = f"""
+    ### ** General Specifications: **
+    - The message is a **mobile push notification**.
+    - Make all parts of the message highly **personalized**, **eye-catching**, and **bring curiosity**
+    - ** Keep the First sentence as "header": short and less than 30 character **.
+    - ** For the "header", Use a space following with a proper emoji at the end (e.g. Great work John 😍) **
+    - Use drum emoji or general music emojis (e.g. 🥁, 🎶, 🎵), and Other emojis that relate to motivation, progress, inspiration, and create curiosity can also be used (like 🔥, 🚀, 💪, 🎉, 👀)
+    - **Keep the "message" concise and under 100 characters**.
+    - Every word should contribute to maximizing impact and engagement, so start directly with the message content without greetings or closing phrases.
+    - Avoid using same or similar words so close together in "message" and "header", and make sure there is no grammar problem.
+    - ****.
+    {message_style}
+    """
+        elif self.Core.platform == "app":
+            instructions = f"""
+    Message Specifications:
+    - The message is an **in app notification**.
+    - ** Keep the First sentence as "header" that should be a short personalized eye catching sentence less than 40 character **.
+    - ** For the "header", don't use exclamation mark at the end, instead, use a space following with a proper emoji at the end of the "header" (e.g. Great work John 😍) **
+    - **Keep the message concise and straightforward**.
+    - **Start directly with the message content**; do not include greetings (e.g., "Hello") or closing phrases.
+    - Make the message highly **personalized** and **eye-catching**.
+        - "Personalized" means the user should feel the message is specifically crafted for them and not generic.
+    - **Every word should contribute to maximizing impact and engagement**.
+    - {message_style}
+            """
+        return instructions
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def message_style_instructions(self):
+        """
+        defines the style of the message: e.g. friendly, kind, tone, etc.
+        :return: style_instructions(str)
+        """
+        if self.Core.message_style is None and self.Core.sample_example is None:
+            message_style = f"""
+    - Keep the tone **kind**, **friendly causal**, and **encouraging**.
+    """
+        else:
+            message_style = f"""
+        - {self.Core.message_style}.
+        """
+        return message_style
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def fire_wall(self):
+        """
+        Provide explicit instructions to ensure that sensitive information is not included in the generated message.
+        :return: string
+        """
+        fire_wall = f"""
+    ### Restrictions:
+    - **Do not include** any personal sensitive or confidential information.
+    - **Avoid AI Jargon:** Skip overused phrases like: {self.Core.config_file["AI_Jargon"]}.
+    """
+        return fire_wall
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def output_instruction(self):
+        """
+        :return: output instructions as a string
+        """
+        example_output = self.example_output()
+        general_instructions = """
+    - The "header" must be less than 30 character.
+    - The "message" must be less than 100 character.
+    - Do not include any links in the message.
+    - Preserve special characters and emojis in the message.
+    - Ensure that the output is a valid JSON.
+    - Do not include any text outside the JSON code block.
+        """
+        instructions = f"""
+    Your response should be in JSON format with the following structure:
+    {{
+      "header": "Generated title",
+      "message": "Generated message",
+    }}
+    {general_instructions}
+    """
+        output_instructions = f"""
+    ### **Output instructions**:
+    {example_output}
+    {instructions}
+        """
+        return output_instructions
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def example_output(self):
+        """
+        returns an example output (1-shot) to guide the LLM
+        :return: example output
+        """
+        if self.Core.sample_example is None:
+            return ""
+        else:
+            # one shot prompting
+            example = f"""
+    Based on the examples below, create a header and message that follows the same style, tone, characteristic, and creativity.
+    ### **Examples:**
+    {self.Core.sample_example}
+    """
+            return example
+    # --------------------------------------------------------------
+    # --------------------------------------------------------------
+    def task_instructions(self):
+        """
+        creating instructions for specifying the tasks
+        :return:
+        """
+        if self.Core.involve_recsys_result:
+            recsys_task = """
+    - Create a perfect message and the header following the instructions, using the user's information and the content that we want to recommend.
+    - Use the instructions to include the recommended content in the message.
+    - Follow the instructions to create the messages.
+            """
+        else:
+            recsys_task = ""
+        message_task = """
+    - Create a perfect personalized message considering the information and instructions mentioned. Your output format should be based on **Output instructions**."""
+        instructions = f"""
+    ### Tasks:
+    {recsys_task}
+    {message_task}
+    """
+        return instructions

Messaging_system/SnowFlakeConnection.py ADDED Viewed

	@@ -0,0 +1,237 @@

+"""
+This class create a connection to Snowflake, run queries (read and write)
+"""
+import json
+import numpy as np
+import pandas as pd
+from snowflake.snowpark import Session
+from sympy.strategies.branch import condition
+class SnowFlakeConn:
+    def __init__(self, session, brand):
+        self. session = session
+        self.brand = brand
+        self.final_columns = ['user_id', "email", "user_info", "permission", "expiration_date", "recsys_result", "message", "brand", "recommendation", "segment_name", "timestamp"]
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def run_read_query(self, query, data):
+        """
+        Executes a SQL query on Snowflake that fetch the data
+        :return: Pandas dataframe containing the query results
+        """
+        # Connect to Snowflake
+        try:
+            dataframe = self.session.sql(query).to_pandas()
+            dataframe.columns = dataframe.columns.str.lower()
+            print(f"reading {data} table successfully")
+            return dataframe
+        except Exception as e:
+            print(f"Error in creating/updating table: {e}")
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def is_json_parsed_to_collection(self, s):
+        try:
+            parsed = json.loads(s)
+            return isinstance(parsed, (dict, list))
+        except:
+            return False
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def store_df_to_snowflake(self, table_name, dataframe, database="ONLINE_RECSYS", schema="GENERATED_DATA"):
+        """
+        Executes a SQL query on Snowflake that write the preprocessed data on new tables
+        :param query: SQL query string to be executed
+        :return: None
+        """
+        try:
+            self.session.use_database(database)
+            self.session.use_schema(schema)
+            dataframe = dataframe.reset_index(drop=True)
+            dataframe.columns = dataframe.columns.str.upper()
+            self.session.write_pandas(df=dataframe,
+                                      table_name=table_name.strip().upper(),
+                                      auto_create_table=True,
+                                      overwrite=True,
+                                      use_logical_type=True)
+            print(f"Data inserted into {table_name} successfully.")
+        except Exception as e:
+            print(f"Error in creating/updating/inserting table: {e}")
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def get_data(self, data, list_of_ids=None):
+        """
+        valid Data is = {users, contents, interactions, recsys, popular_contents}
+        :param data:
+        :return:
+        """
+        valid_data = {'users', 'contents', 'interactions', 'recsys', 'popular_contents'}
+        if data not in valid_data:
+            raise ValueError(f"Invalid data type: {data}")
+        # Construct the method name based on the input
+        method_name = f"_get_{data}"
+        # Retrieve the method dynamically
+        method = getattr(self, method_name, None)
+        if method is None:
+            raise NotImplementedError(f"The method {method_name} is not implemented.")
+        query = method(list_of_ids)
+        data = self.run_read_query(query, data)
+        return data
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def _get_contents(self, list_of_ids=None):
+        query = f"""
+        select CONTENT_ID, CONTENT_TYPE, CONTENT_PROFILE as content_info, CONTENT_PROFILE_VECTOR
+        from ONLINE_RECSYS.VECTOR_DB.VECTORIZED_CONTENT
+        where BRAND = '{self.brand}'
+        """
+        return query
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def _get_users(self, list_of_ids=None):
+        if list_of_ids is not None:
+            ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
+            condition = f"AND USER_ID in {ids_str}"
+        else :
+            condition = ""
+        query = f"""
+        select USER_ID, BRAND, FIRST_NAME, BIRTHDAY, TIMEZONE, EMAIL, CURRENT_TIMESTAMP() AS TIMESTAMP, DIFFICULTY, SELF_REPORT_DIFFICULTY, USER_PROFILE as user_info, PERMISSION, EXPIRATION_DATE,
+        DATEDIFF(
+        day,
+        CURRENT_DATE(),
+        CASE
+            WHEN DATE_FROM_PARTS(YEAR(CURRENT_DATE()), EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY)) < CURRENT_DATE()
+            THEN DATE_FROM_PARTS(YEAR(CURRENT_DATE()) + 1, EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY))
+            ELSE DATE_FROM_PARTS(YEAR(CURRENT_DATE()), EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY))
+        END) AS birthday_reminder
+        from ONLINE_RECSYS.PREPROCESSED.USERS
+        where BRAND = '{self.brand}' {condition}
+        """
+        return query
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def _get_interactions(self, list_of_ids=None):
+        if list_of_ids is not None:
+            ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
+            condition = f"AND USER_ID in {ids_str}"
+        else :
+            condition = ""
+        query = f"""
+        WITH latest_interactions AS(
+         SELECT
+            USER_ID, CONTENT_ID, CONTENT_TYPE, EVENT_TEXT, TIMESTAMP,
+            ROW_NUMBER() OVER(PARTITION BY USER_ID ORDER BY TIMESTAMP DESC) AS rn
+         FROM ONLINE_RECSYS.PREPROCESSED.RECSYS_INTEACTIONS
+         WHERE BRAND = '{self.brand}' AND EVENT_TEXT IN('Video Completed', 'Video Playing') {condition})
+        SELECT i.USER_ID, i.CONTENT_ID, i.CONTENT_TYPE, c.content_profile as last_completed_content, i.EVENT_TEXT, i.TIMESTAMP, DATEDIFF('week', i.TIMESTAMP, CURRENT_TIMESTAMP) AS weeks_since_last_interaction
+        FROM latest_interactions i
+        LEFT JOIN
+            ONLINE_RECSYS.VECTOR_DB.VECTORIZED_CONTENT c ON c.CONTENT_ID = i.CONTENT_ID
+        WHERE rn = 1;
+        """
+        return query
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def _get_recsys(self, list_of_ids=None):
+        if list_of_ids is not None:
+            ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
+            condition = f"WHERE USER_ID in {ids_str}"
+        else :
+            condition = ""
+        recsys_col = f"{self.brand}_recsys_v2"
+        query = f"""
+        select USER_ID, {recsys_col} as recsys_result
+        from RECSYS_V2.RECSYS_V2_CIO.RECSYS_V2_CUSTOMER_IO
+        {condition}
+        """
+        return query
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def _get_popular_contents(self, list_of_ids=None):
+        query = f"""
+        select POPULAR_CONTENT
+        from RECSYS_V2.RECSYS_V2_CIO.POPULAR_CONTENT_CUSTOMER_IO
+        where brand = '{self.brand.lower()}'
+        """
+        return query
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def extract_id_from_email(self, emails):
+        """
+        extracting user_ids from emails
+        :param unique_emails:
+        :return:
+        """
+        email_list_str = ', '.join(f"'{email}'" for email in emails)
+        query = f"""
+        SELECT id as USER_ID, email as EMAIL
+        FROM STITCH.MUSORA_ECOM_DB.USORA_USERS
+        WHERE email IN ({email_list_str})
+        """
+        user_ids_df = self.run_read_query(query, data="User_ids")
+        return user_ids_df
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def adjust_dataframe(self, dataframe):
+        """
+        Filter dataframe to only include the columns in self.final_columns.
+        Add any missing columns with None values.
+        Ensure the final order is consistent with self.final_columns.
+        """
+        # Work with a copy so that we don't modify the original input
+        final_df = dataframe.copy()
+        # Normalize column names to lower-case for matching (if needed)
+        final_df.columns = final_df.columns.str.lower()
+        expected_cols = [col.lower() for col in self.final_columns]
+        # Keep only those columns in the expected list
+        available = [col for col in final_df.columns if col in expected_cols]
+        final_df = final_df[available]
+        # Add missing columns with None values
+        for col in expected_cols:
+            if col not in final_df.columns:
+                final_df[col] = None
+        # Reorder the columns to the desired order
+        final_df = final_df[expected_cols]
+        # If you need the column names to match exactly what self.final_columns provides (case-sensitive),
+        # you can rename them accordingly.
+        rename_mapping = {col.lower(): col for col in self.final_columns}
+        final_df.rename(columns=rename_mapping, inplace=True)
+        return final_df
+    # ---------------------------------------------------------------
+    # ---------------------------------------------------------------
+    def close_connection(self):
+        self.session.close()

Messaging_system/StoreLayer.py ADDED Viewed

File without changes

Messaging_system/context_validator.py ADDED Viewed

	@@ -0,0 +1,302 @@

+import json
+import time
+import openai
+from openai import OpenAI
+from tqdm import tqdm
+class Validator:
+    """
+    LLM-based personalized message generator:
+    """
+    def __init__(self, api_key):
+        # will be set by the user
+        self.validator_instructions = None
+        self.api_key = api_key
+        self.model = "gpt-4o-mini"
+        # to trace the number of tokens and estimate the cost if needed
+        self.temp_token_counter = 0
+        self.total_tokens = {
+            'prompt_tokens': 0,
+            'completion_tokens': 0,
+        }
+    # -------------------------------------------------------------------
+    def set_openai_api(self, openai_key):
+        """
+        Setting template with placeholders manually connection
+        :param template: a string with placeholders
+        :return:
+        """
+        self.api_key = openai_key
+    # -------------------------------------------------------------------
+    def context_prompt(self):
+        instructions = """
+        You are a text moderator and you should parse the input text. based on below instructions. you should decide if
+        the input text is a valid input or not.
+        """
+        return instructions
+    # -------------------------------------------------------------------
+    def initial_prompt(self):
+        instructions = """You are a helpful assistant at Musora, an online music education platform that helps users
+        learn music. Our students will provide user-generated-context such as comments and forums on engaging musical
+        contents like songs, lessons, workouts or other type of musical and educational content. Your task is
+        to determine if the input text provided by our student is a valid text or not.
+        """
+        return instructions
+    # -------------------------------------------------------------------
+    def set_validator_instructions(self, valid_instructions="", invalid_instructions=""):
+        instructions = f"""
+        ** The text is INValid if it falls into any of the below criteria **:
+        {invalid_instructions}
+        {self.fire_wall()}
+        --------------------------
+        Please ensure that the text meets the following criteria to be considered **valid**:
+        {valid_instructions}
+        {self.default_valid_text()}
+        """
+        self.validator_instructions = instructions
+    # -------------------------------------------------------------------
+    def output_instruction(self):
+        """
+        :return: output instructions as a string
+        """
+        output_instructions = """
+        ** Task: **
+        - **Based on the input text, the music educational nature of our contents, and instructions about validating the student's input, check if the text is a valid input or not.**
+        - **Your output should be strictly "True" if it is a Valid text, or "False" if it not a valid text.**
+        - **You should provide the output in JSON format where the key is "valid"** - **Do not include any text outside the JSON code block**.
+        Your response should be in JSON format with the following structure:
+        example of a VALID text:
+        {
+            "valid": "True",
+        }
+        Example of an INVALID text:
+        {
+            "valid": "False",
+        }
+        """
+        return output_instructions
+    # -------------------------------------------------------------------
+    def get_llm_response(self, prompt, max_retries=3):
+        """
+        sending the prompt to the LLM and get back the response
+        """
+        openai.api_key = self.api_key
+        instructions = self.context_prompt()
+        client = OpenAI(api_key=self.api_key)
+        for attempt in range(max_retries):
+            try:
+                response = client.chat.completions.create(
+                    model=self.model,
+                    response_format={"type": "json_object"},
+                    messages=[
+                        {"role": "system", "content": instructions},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=500,
+                    n=1,
+                    temperature=0.7
+                )
+                tokens = {
+                    'prompt_tokens': response.usage.prompt_tokens,
+                    'completion_tokens': response.usage.completion_tokens,
+                    'total_tokens': response.usage.total_tokens
+                }
+                try:
+                    content = response.choices[0].message.content
+                    # Extract JSON code block
+                    output = json.loads(content)
+                    if 'valid' not in output:
+                        print(f"'valid' key is missing in response on attempt {attempt + 1}. Retrying...")
+                        continue  # Continue to next attempt
+                    else:
+                        if output["valid"] not in ["True", "False"]:
+                            print(f"True or False value missing in response on attempt {attempt + 1}. Retrying...")
+                            continue
+                    # validating the JSON
+                    self.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
+                    self.total_tokens['completion_tokens'] += tokens['completion_tokens']
+                    self.temp_token_counter += tokens['prompt_tokens'] + tokens['completion_tokens']
+                    return output
+                except json.JSONDecodeError:
+                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
+            except openai.APIConnectionError as e:
+                print("The server could not be reached")
+                print(e.__cause__)  # an underlying Exception, likely raised within httpx.
+            except openai.RateLimitError as e:
+                print("A 429 status code was received; we should back off a bit.")
+            except openai.APIStatusError as e:
+                print("Another non-200-range status code was received")
+                print(e.status_code)
+                print(e.response)
+        print("Max retries exceeded. Returning empty response.")
+        return [], {}
+    # -------------------------------------------------------------------
+    def create_validation_prompt(self, input_text):
+        """
+        creating the proper prompt and instructions around the input text
+        :param input_text:
+        :return:
+        """
+        prompt = f"""
+        {self.initial_prompt()}
+        **Input text provided by the Student:**
+        {input_text}
+        {self.validator_instructions}
+        {self.output_instruction()}
+        """
+        return prompt
+    # -------------------------------------------------------------------
+    def validate_dataframe(self, dataframe, target_column, progress_callback=None):
+        """
+        generating the prompt for every user based on their text input, generating the results (True or False),
+        updating and returning the input dataframe. :return:
+        """
+        dataframe["valid"] = None
+        start_time = time.time()
+        total_users = len(dataframe)
+        for progress, (idx, row) in enumerate(tqdm(dataframe.iterrows(), desc="generating prompts")):
+            if progress_callback is not None:
+                progress_callback(progress, total_users)
+            input_text = row[target_column]
+            prompt = self.create_validation_prompt(input_text)
+            response = self.get_llm_response(prompt)
+            dataframe.at[idx, "valid"] = response["valid"]
+            current_time = time.time()
+            delta = current_time - start_time
+            # Check token limits
+            if self.temp_token_counter > 195000 and delta >= 60:  # Using a safe margin
+                print("Sleeping for 60 seconds to respect the token limit...")
+                # reset the token counter
+                self.temp_token_counter = 0
+                start_time = time.time()
+                time.sleep(60)  # Sleep for a minute before making new requests
+        return dataframe
+    # -------------------------------------------------------------------
+    def validate_text(self, text):
+        """
+        generating the prompt for every user based on their text input, generating the results (True or False),
+        updating and returning the input dataframe. :return:
+        """
+        prompt = self.create_validation_prompt(text)
+        response = self.get_llm_response(prompt)
+        return response["valid"]
+    # -------------------------------------------------------------------
+    def fire_wall(self):
+        """
+        Provide explicit instructions to ensure that sensitive or inappropriate information is identified in the text.
+        :return: string
+        """
+        fire_wall = """
+    As a content moderator, please review the text and ensure it does not contain any of the following:
+    **Disallowed Content Categories:**
+    1. **Sensitive Personal Information**: personal data such as phone numbers, email addresses, or other identifying information.
+    2. **Offensive or Discriminatory Language**: Hate speech, harassment, bullying, or any derogatory remarks targeting individuals or groups based on race, ethnicity, nationality, religion, gender, sexual orientation, age, disability, or any other characteristic.
+    3. **Sensitive Topics**: Content that discusses or promotes extremist views, political propaganda, or divisive religious beliefs in a manner that could incite hostility.
+    4. **Removed or Restricted Content**: Mentions of songs, media, or features that have been removed or are restricted on our platform.
+    5. **Technical Issues or Bugs**: Any references to glitches, errors, crashes, or other technical problems experienced on the platform.
+    6. ** Language that is excessively angry, aggressive, or includes profanity or vulgar expressions. **
+    7. **Privacy Violations**: Sharing of confidential information or content that infringes on someone's privacy rights.
+    8. **Intellectual Property Violations**: Unauthorized use or distribution of copyrighted material.
+    9. **Defamation**: False statements presented as facts that harm the reputation of an individual or organization.
+    **Examples of Invalid Content:**
+    - "This app is useless and the developers are idiots!"
+    - "They removed my favorite song; it sucks"
+    - "People who follow [specific religion] are all wrong and should be banned."
+    If the text contains any of the above issues, please flag it as invalid.
+    """
+        return fire_wall
+    # -------------------------------------------------------------------
+    def default_valid_text(self):
+        """
+        Provide explicit instructions to ensure that the text is appropriate and meets the content guidelines.
+        :return: string
+        """
+        valid_text = """
+    **Allowed Content Criteria:**
+    1. **Positive Sentiment**: The text should be encouraging, uplifting, or convey a positive emotion.
+    2. **Constructive and Helpful**: Provides valuable insights, advice, or shares personal experiences that could
+    benefit others. This can be sharing struggling in practices, challenges or other type of difficulties that might need our attention.
+    3. **Respectful Language**: Uses polite and appropriate language, fostering a friendly and inclusive community environment.
+    **Examples of Valid Content:**
+    - "I love how this app helps me discover new music every day!"
+    - "Here's a tip: creating themed playlists can really enhance your listening experience."
+    - "I had a great time using this feature during my commute today."
+    - "This session is so challenging for me and I'm feeling so much pain in my foot, might go over the workout couple more"
+    """
+        return valid_text

Messaging_system/protection_layer.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+protection layer on top of the messaging system to make sure the messages are as expected.
+"""
+import json
+import os
+import openai
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+# -----------------------------------------------------------------------
+class ProtectionLayer:
+    """
+    Protection layer to double check the generated message:
+    """
+    def __init__(self, config_file, messaging_mode):
+        self.config_file = config_file
+        self.messaging_mode = messaging_mode
+        # LLM configs
+        self.api_key = os.environ.get("OPENAI_API")  # will be set by user
+        self.model = "gpt-4o-mini"  # will be set by user
+        # to trace the number of tokens and estimate the cost if needed
+        self.total_tokens = {
+            'prompt_tokens': 0,
+            'completion_tokens': 0,
+        }
+    # --------------------------------------------------------------
+    def llm_instructions(self):
+        """
+        Setting instructions for the LLM for the second pass.
+        """
+        instructions = (
+            "You are a protection layer AI. Your task is to check the given JSON message "
+            "against specific rules. If it violates any rule, fix only those errors. If "
+            "it does not violate any rule, return it exactly as is. You must respond ONLY "
+            "with valid JSON in the specified structure, and no additional text."
+        )
+        return instructions
+        # --------------------------------------------------------------
+    def get_general_rules(self):
+        """
+        Core rules to apply when checking or modifying the message.
+        """
+        return f"""
+    1. No two consecutive sentences should end with exclamation points, change one of them to dot.
+    2. ONLY Capitalize the first word of the 'header' as well as names or any proper nouns. Other words in the 'header' must be lowercase. (e.g. Jump back in, David! 🥁)
+    3. If there is any grammar error in the message, you must fix it.
+    4. Always use "the" before proper nouns, including any titles of the recommended content, if the title was in the message.
+    5. Do not include any words that explicitly or implicitly reference a time-related concept (e.g., “new,” “recent,” “latest,” “upcoming,” etc.).
+    6. If the **Artist** name from the recommended content is referenced in the message, it MUST be the **FULL NAME**. If only the first name is available, ** DO NOT ** use the artist name at all.
+    7. If the message contains any AI_Jargon words (from below list) you MUST replace it with a more user-friendly synonym that makes sense.
+       AI_Jargon words are: {self.config_file["AI_Jargon"]}
+    8. Preserve the original JSON structure: {{"header": "...", "message": "..."}}
+    9. If no rule is violated, return the exact same JSON unchanged.
+    10. The output must be strictly valid JSON with no extra commentary or text.
+            """
+    # --------------------------------------------------------------
+    def output_instruction(self):
+        """
+        :return: output instructions as a string
+        """
+        instructions = f"""
+        **You must output only valid JSON in the form:**
+        {{
+          "header": "Original header or modified version",
+          "message": "Original header or modified version"
+        }}
+        **Constraints:**
+            - The "header" must be less than 30 character.
+            - The "message" must be less than 100 character.
+            - No text is allowed outside this JSON structure.\n"
+        """
+        return instructions
+    # --------------------------------------------------------------
+    def get_llm_response(self, prompt, max_retries=3):
+        """
+        sending the prompt to the LLM and get back the response
+        """
+        openai.api_key = self.api_key
+        instructions = self.llm_instructions()
+        client = OpenAI(api_key=self.api_key)
+        for attempt in range(max_retries):
+            try:
+                response = client.chat.completions.create(
+                    model=self.model,
+                    response_format={"type": "json_object"},
+                    messages=[
+                        {"role": "system", "content": instructions},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=500,
+                    n=1,
+                    temperature=0.5
+                )
+                tokens = {
+                    'prompt_tokens': response.usage.prompt_tokens,
+                    'completion_tokens': response.usage.completion_tokens,
+                    'total_tokens': response.usage.total_tokens
+                }
+                try:
+                    content = response.choices[0].message.content
+                    # Extract JSON code block
+                    output = json.loads(content)
+                    # output = json.loads(response.choices[0].message.content)
+                    if 'message' not in output or 'header' not in output:
+                        print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
+                        continue  # Continue to next attempt
+                    else:
+                        if len(output["header"].strip()) > self.config_file["header_limit"] or len(output["message"].strip()) > self.config_file["message_limit"]:
+                            print(f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
+                            continue
+                    # validating the JSON
+                    self.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
+                    self.total_tokens['completion_tokens'] += tokens['completion_tokens']
+                    return output
+                except json.JSONDecodeError:
+                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
+            except openai.APIConnectionError as e:
+                print("The server could not be reached")
+                print(e.__cause__)  # an underlying Exception, likely raised within httpx.
+            except openai.RateLimitError as e:
+                print("A 429 status code was received; we should back off a bit.")
+            except openai.APIStatusError as e:
+                print("Another non-200-range status code was received")
+                print(e.status_code)
+                print(e.response)
+        print("Max retries exceeded. Returning empty response.")
+        return [], {}
+    # --------------------------------------------------------------
+    def get_context(self):
+        """
+        context for the LLM
+        :return: the context string
+        """
+        context = (
+            "We created a personalized message for a user "
+            "considering the provided information. Your task is to double-check "
+            "the message and correct or improve the output, according to instructions."
+        )
+        return context
+    # --------------------------------------------------------------
+    def generate_prompt(self, message, user):
+        """
+        generating the prompt for criticizing
+        :param query: input query
+        :param message: llm response
+        :return: new prompt
+        """
+        recommended_content = ""
+        if self.messaging_mode == "recsys_result":
+            recommended_content = f"""
+        ### ** Recommended Content **
+        {user['recommendation_info']}
+            """
+        prompt = f"""
+        ### System Instruction:
+        {self.llm_instructions()}
+        ### Context:
+        We created a personalized message for a user based on available information.
+        Your job is to check the message and correct only if it violates rules. Otherwise, leave it unchanged.
+        ### Original JSON Message:
+        {message}
+        {recommended_content}
+        ### Rules:
+        {self.get_general_rules()}
+        ### Output Requirements:
+        {self.output_instruction()}
+        """
+        return prompt
+    # --------------------------------------------------------------
+    def criticize(self, message, user):
+        """
+        criticize the llm response by using additional layer of query
+        :return: updated users_df with extracted information and personalize messages.
+        """
+        prompt = self.generate_prompt(message, user)
+        response = self.get_llm_response(prompt)
+        return response, self.total_tokens

Messaging_system/sending_time.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+calculating sending time for each individual user
+"""
+import numpy as np
+from snowflake.snowpark import Session
+import json
+import pandas as pd
+import os
+from dotenv import load_dotenv
+load_dotenv()
+class PersonalizedTime:
+    """
+    This module will calcualte the best tiume to send for each individual users
+    """
+    def calculate_sending_time(self):
+        # fetching data
+        session = self.snowflake_connection()
+        query = self.fetch_users_time(session)
+    def fetch_users_time(self, session):
+        """
+        fetching user's activity data
+        :param dataframe:
+        :return:
+        """
+        query = self.get_query()
+        # Connect to Snowflake
+        try:
+            spark_df = session.sql(query).collect()
+            dataframe = pd.DataFrame(spark_df)
+            print(f"reading content table successfully")
+            return dataframe
+        except Exception as e:
+            print(f"Error in reading table: {e}")
+    def get_query(self):
+        query = """
+        """
+    def snowflake_connection(self):
+        """
+        setting snowflake connection
+        :return:
+        """
+        conn = {
+            "user": os.getenv('snowflake_user'),
+            "password": os.getenv('snowflake_password'),
+            "account": os.getenv('snowflake_account'),
+            "role": os.getenv('snowflake_role'),
+            "database": os.getenv('snowflake_database'),
+            "warehouse": os.getenv('snowflake_warehouse'),
+            "schema": os.getenv('snowflake_schema'),
+        }
+        session = Session.builder.configs(conn).create()
+        return session

README.md CHANGED Viewed

@@ -12,9 +12,5 @@ short_description: 'UI for AI Messaging system '
 license: apache-2.0
 ---
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

 license: apache-2.0
 ---
+AI messaging system UI

app.py ADDED Viewed

	@@ -0,0 +1,300 @@

+import json, os
+from io import StringIO
+import pandas as pd
+import streamlit as st
+from snowflake.snowpark import Session
+from bs4 import BeautifulSoup
+from Messaging_system.Permes import Permes
+from Messaging_system.context_validator import Validator
+from dotenv import load_dotenv
+load_dotenv()
+# ──────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ──────────────────────────────────────────────────────────────────────────────
+@st.cache_data
+def load_data(buf) -> pd.DataFrame:
+    return pd.read_csv(buf)
+def load_config_(file_path: str) -> dict:
+    with open(file_path) as f:
+        return json.load(f)
+def get_credential(key):
+    return st.secrets.get(key) or os.getenv(key)
+def init_state() -> None:
+    defaults = dict(
+        involve_recsys_result=False,
+        involve_last_interaction=False,
+        valid_instructions="",
+        invalid_instructions="",
+        messaging_type="push",
+        generated=False,
+        include_recommendation=False,
+        data=None, brand=None, recsys_contents=[], csv_output=None,
+        users_message=None, messaging_mode=None, target_column=None,
+        ugc_column=None, identifier_column=None, input_validator=None,
+        selected_input_features=None, selected_features=None,
+        additional_instructions=None, segment_info="", message_style="",
+        sample_example="", CTA="", all_features=None, number_of_messages=1,
+        instructionset={}, segment_name="", number_of_samples=20,
+        selected_source_features=[], platform=None, generate_clicked=False,
+    )
+    for k, v in defaults.items():
+        st.session_state.setdefault(k, v)
+# ──────────────────────────────────────────────────────────────────────────────
+# PAGE CONFIG + THEME
+# ──────────────────────────────────────────────────────────────────────────────
+st.set_page_config(
+    page_title="Personalized Message Generator",
+    page_icon="📬",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+st.markdown(
+    """
+    <style>
+    html, body, [class*="css"]  {
+        background-color:#0d0d0d;
+        color:#ffd700;
+    }
+    .stButton>button, .stDownloadButton>button {
+        border-radius:8px;
+        background:#ffd700;
+        color:#0d0d0d;
+        font-weight:600;
+    }
+    .stTabs [data-baseweb="tab"] {
+        font-weight:600;
+    }
+    .stTabs [aria-selected="true"] {
+        color:#ffd700;
+    }
+    h1, h2, h3 {color:#ffd700;}
+    .small {font-size:0.85rem; opacity:0.7;}
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# ──────────────────────────────────────────────────────────────────────────────
+#  SIDEBAR  –  the “control panel”
+# ──────────────────────────────────────────────────────────────────────────────
+init_state()
+with st.sidebar:
+    st.header("📂 Upload your CSV")
+    uploaded_file = st.file_uploader("Choose file", type="csv")
+    if uploaded_file:
+        st.session_state.data = load_data(uploaded_file)
+        st.success("File loaded!")
+    st.markdown("---")
+    if st.session_state.data is not None:
+        # ─ Identifier
+        id_col = st.selectbox(
+            "Identifier column",
+            st.session_state.data.columns,
+            key="identifier_column"
+        )
+        # ─ Brand
+        st.selectbox(
+            "Brand",
+            ["drumeo", "pianote", "guitareo", "singeo"],
+            key="brand"
+        )
+        # ─ Personalisation
+        st.text_area("Segment info *", key="segment_info")
+        st.text_area("CTA *", key="CTA")
+        with st.expander("🔧 Optional tone & examples"):
+            st.text_area("Message style", key="message_style",
+                         placeholder="Be kind and friendly…")
+            st.text_area("Additional instructions", key="additional_instructions",
+                         placeholder="e.g. Mention the number weeks since their last practice")
+            st.text_area("Sample example", key="sample_example",
+                         placeholder="Hello! We have crafted…")
+        st.number_input("Number of samples", 1, 100, 20,
+                        key="number_of_samples")
+        # ─ Sequential messages
+        st.number_input("Sequential messages / user", 1, 10, 1,
+                        key="number_of_messages")
+        st.text_input("Segment name", key="segment_name",
+                      placeholder="no_recent_activity")
+        if st.session_state.number_of_messages > 1:
+            st.caption("Additional per-message instructions")
+            for i in range(1, st.session_state.number_of_messages + 1):
+                st.text_input(f"Message {i} instruction",
+                              key=f"instr_{i}")
+        # ─ Source feature selection
+        st.multiselect(
+            "Source features",
+            ["instrument", "weeks_since_last_interaction",
+             "birthday_reminder"],
+            default=["instrument"],
+            key="selected_source_features"
+        )
+        # ─ Rec-sys
+        st.checkbox("Include content recommendation", key="include_recommendation")
+        if st.session_state.include_recommendation:
+            st.multiselect(
+                "Recommendation types",
+                ["song", "workout", "quick_tips", "course"],
+                key="recsys_contents"
+            )
+        st.markdown("---")
+        generate = st.button("🚀 Generate messages")
+        st.session_state["generate_clicked"] = generate
+# ──────────────────────────────────────────────────────────────────────────────
+#  MAIN AREA  –  three tabs
+# ──────────────────────────────────────────────────────────────────────────────
+tab0, tab1, tab2 = st.tabs(
+    ["📊 Data preview", "🛠️ Configure", "📨 Results"])
+# ------------------------------------------------------------------ TAB 0 ---#
+with tab0:
+    st.header("📊 Data preview")
+    if st.session_state.data is not None:
+        st.dataframe(st.session_state.data.head(100))
+    else:
+        st.info("Upload a CSV to preview it here.")
+# ------------------------------------------------------------------ TAB 1 ---#
+with tab1:
+    st.header("🛠️ Configure & launch")
+    if st.session_state.data is None:
+        st.warning("Upload a CSV first ⬅")
+    elif not generate:
+        st.info("Adjust settings in the sidebar, then hit *Generate*.")
+    else:
+        st.success("Parameters captured – see **Results** tab.")
+# ------------------------------------------------------------------ TAB 2 ---#
+with tab2:
+    st.header("📨 Generated messages")
+    # Run generation only once per click
+    if st.session_state.generate_clicked and not st.session_state.generated:
+        # ─ simple validation
+        if not st.session_state.CTA.strip() or not st.session_state.segment_info.strip():
+            st.error("CTA and Segment info are mandatory 🚫")
+            st.stop()
+        # ─ build Snowflake session
+        conn = dict(
+            user=get_credential("snowflake_user"),
+            password=get_credential("snowflake_password"),
+            account=get_credential("snowflake_account"),
+            role=get_credential("snowflake_role"),
+            database=get_credential("snowflake_database"),
+            warehouse=get_credential("snowflake_warehouse"),
+            schema=get_credential("snowflake_schema")
+        )
+        config = load_config_("Config_files/message_system_config.json")
+        session = Session.builder.configs(conn).create()
+        # ─ prepare parameters
+        st.session_state.messaging_mode = (
+            "recsys_result" if st.session_state.include_recommendation
+            else "message"
+        )
+        st.session_state.involve_recsys_result = st.session_state.include_recommendation
+        st.session_state.instructionset = {
+            i: st.session_state.get(f"instr_{i}")
+            for i in range(1, st.session_state.number_of_messages + 1)
+            if st.session_state.get(f"instr_{i}", "").strip()
+        }
+        # ─ progress callback
+        prog = st.progress(0)
+        status = st.empty()
+        def cb(done, total):
+            pct = int(done / total * 100)
+            prog.progress(pct)
+            status.write(f"{pct}%")
+        permes = Permes()
+        df_msg = permes.create_personalize_messages(
+            session=session,
+            users=st.session_state.data,
+            brand=st.session_state.brand,
+            config_file=config,
+            openai_api_key=get_credential("OPENAI_API"),
+            CTA=st.session_state.CTA,
+            segment_info=st.session_state.segment_info,
+            number_of_samples=st.session_state.number_of_samples,
+            message_style=st.session_state.message_style,
+            sample_example=st.session_state.sample_example,
+            selected_input_features=st.session_state.selected_features,
+            selected_source_features=st.session_state.selected_source_features,
+            additional_instructions=st.session_state.additional_instructions,
+            platform=st.session_state.messaging_type,
+            involve_recsys_result=st.session_state.involve_recsys_result,
+            messaging_mode=st.session_state.messaging_mode,
+            identifier_column=st.session_state.identifier_column,
+            target_column=st.session_state.target_column,
+            recsys_contents=st.session_state.recsys_contents,
+            progress_callback=cb,
+            number_of_messages=st.session_state.number_of_messages,
+            instructionset=st.session_state.instructionset,
+            segment_name=st.session_state.segment_name
+        )
+        # ─ cache output
+        st.session_state.users_message = df_msg
+        st.session_state.csv_output = df_msg.to_csv(
+            index=False, encoding="utf-8-sig")
+        st.session_state.generated = True
+        prog.empty(); status.empty()
+        st.balloons()
+    # -------- show results (if any)
+    # -------- show results (if any)
+    if st.session_state.generated:
+        df = st.session_state.users_message
+        id_col = st.session_state.identifier_column
+        # expandable per-user cards
+        for i, (_, row) in enumerate(df.iterrows(), 1):
+            with st.expander(f"{i}. User ID: {row[id_col.lower()]}", expanded=(i == 1)):
+                st.write("##### 👤 Features")
+                feats = st.session_state.selected_source_features
+                cols = st.columns(3)
+                for idx, f in enumerate(feats):
+                    cols[idx % 3].markdown(f"**{f}**: {row.get(f, '—')}")
+                st.markdown("---")
+                st.write("##### ✉️ Messages")
+                try:
+                    blob = json.loads(row["message"])
+                    seq = (blob.get("messages_sequence", blob)
+                           if isinstance(blob, dict) else blob)
+                    for j, msg in enumerate(seq, 1):
+                        st.markdown(f"**{j}. {msg.get('header', '(no header)')}**")
+                        thumb = (msg.get("thumbnail_url")  # per-message
+                                 or row.get("thumbnail_url"))  # per-user fallback
+                        if thumb:
+                            st.image(thumb, width=150)
+                        # ---------------------------------------------------------
+                        st.markdown(msg.get("message", ""))
+                        st.markdown(f"[Read more]({msg.get('web_url_path', '#')})")
+                        st.markdown("---")
+                except Exception as e:
+                    st.error(f"Failed to parse JSON: {e}")

local_llm/LocalLM.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import json
+import time
+import torch
+import ollama
+class LocalLM:
+    def __init__(self, model):
+        # Initialize the Ollama client
+        self.client = ollama.Client()
+        self.model = model
+    # def get_llm_response(self, prompt):
+    #
+    #     # Send the query to the model
+    #     response = self.client.generate(model=self.model, prompt=prompt)
+    #     return response.response
+    def preprocess_and_parse_json(self, response):
+        # Remove any leading/trailing whitespace and newlines
+        if response.startswith('```json') and response.endswith('```'):
+            cleaned_response = response[len('```json'):-len('```')].strip()
+        # Parse the cleaned response into a JSON object
+        try:
+            json_object = json.loads(cleaned_response)
+            return json_object
+        except json.JSONDecodeError as e:
+            print(f"Failed to parse JSON: {e}")
+            return None
+    def get_llm_response(self, prompt, mode, max_retries=10):
+        """
+        Send the prompt to the LLM and get back the response.
+        Includes handling for GPU memory issues by clearing cache and waiting before retry.
+        """
+        for attempt in range(max_retries):
+            try:
+                # Try generating the response
+                response = self.client.generate(model=self.model, prompt=prompt)
+            except Exception as e:
+                # This catches errors like the connection being forcibly closed
+                print(f"Error on attempt {attempt + 1}: {e}.")
+                try:
+                    # Clear GPU cache if you're using PyTorch; this may help free up memory
+                    torch.cuda.empty_cache()
+                    print("Cleared GPU cache.")
+                except Exception as cache_err:
+                    print("Failed to clear GPU cache:", cache_err)
+                # Wait a bit before retrying to allow memory to recover
+                time.sleep(2)
+                continue
+            try:
+                tokens = {
+                    'prompt_tokens': 0,
+                    'completion_tokens': 0,
+                    'total_tokens': 0
+                }
+                try:
+                    output = self.preprocess_and_parse_json(response.response)
+                    if output is None:
+                        continue
+                    if mode == "rating":
+                        # Check if all keys and values are integers (or convertible to integers)
+                        all_int = True
+                        for k, v in output.items():
+                            try:
+                                int(k)
+                                int(v)
+                            except ValueError:
+                                all_int = False
+                                break
+                        if all_int:
+                            return output, tokens
+                        else:
+                            print(f"Keys and values are not integers on attempt {attempt + 1}. Retrying...")
+                            continue  # Continue to next attempt
+                    else:
+                        print(f"Invalid mode: {mode}")
+                        return None, tokens
+                except json.JSONDecodeError:
+                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
+            except Exception as parse_error:
+                print("Error processing output:", parse_error)
+        print("Max retries exceeded. Returning empty response.")
+        return [], {}

requirements.txt ADDED Viewed

Binary file (6.39 kB). View file