Messaging_system_variant_B

Sleeping

App Files Files Community

Danialebrat commited on May 20, 2025

Commit

514a1ba

1 Parent(s): 29b98c7

- Adding For you section as default

Browse files

Files changed (9) hide show

Data/not_active_drumeo_camp.csv +0 -0
Messaging_system/Homepage_Recommender.py +32 -0
Messaging_system/LLMR.py +6 -2
Messaging_system/Message_generator.py +3 -3
Messaging_system/Permes.py +7 -1
Messaging_system/PromptGenerator.py +19 -3
app.py +73 -30
app_V1.py +527 -0
messaging_main_test.py +205 -0

Data/not_active_drumeo_camp.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Messaging_system/Homepage_Recommender.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""
+This class is a Default recommender that redirect the user to For You Section.
+"""
+# -----------------------------------------------------------------------
+class DefaultRec:
+    def __init__(self, CoreConfig):
+        self.Core = CoreConfig
+        self.user = None
+        self.for_you_url = f"https://www.musora.com/{self.Core.brand.lower()}/lessons/recommended"
+        self.recommendation = "for_you"
+        self.recommendation_info = "Redirecting user to their personalized Recommendations"
+    def get_recommendations(self):
+        """
+        selecting the recommended content for each user
+        :return:
+        """
+        self.Core.users_df["recommendation"] = self.recommendation
+        self.Core.users_df["recommendation_info"] = self.recommendation_info
+        self.Core.users_df["recsys_result"] = self.for_you_url # URL to for you section
+        return self.Core.users_df

Messaging_system/LLMR.py CHANGED Viewed

@@ -11,7 +11,7 @@ from dotenv import load_dotenv
 import time
 import streamlit as st
 from tqdm import tqdm
 load_dotenv()
@@ -29,6 +29,7 @@ class LLMR:
         selecting the recommended content for each user
         :return:
         """
         self.Core.users_df["recommendation"] = None
         self.Core.users_df["recommendation_info"] = None
@@ -48,7 +49,10 @@ class LLMR:
             content_id, content_info, recsys_json, token = self._get_recommendation()
             if content_id is None:  # error in selecting a content to recommend
-                continue
             else:
                 # updating tokens

 import time
 import streamlit as st
 from tqdm import tqdm
+from Messaging_system.Homepage_Recommender import DefaultRec
 load_dotenv()
         selecting the recommended content for each user
         :return:
         """
+        default = DefaultRec(self.Core)
         self.Core.users_df["recommendation"] = None
         self.Core.users_df["recommendation_info"] = None
             content_id, content_info, recsys_json, token = self._get_recommendation()
             if content_id is None:  # error in selecting a content to recommend
+                self.Core.users_df.at[idx, "recommendation"] = default.recommendation
+                self.Core.users_df.at[idx, "recommendation_info"] = default.recommendation_info
+                self.Core.users_df.at[idx, "recsys_result"] = default.for_you_url
             else:
                 # updating tokens

Messaging_system/Message_generator.py CHANGED Viewed

@@ -103,7 +103,7 @@ class MessageGenerator:
         :param user: The user row
         :return: Parsed and enriched output as a JSON object
         """
-        if self.Core.involve_recsys_result:
             output_message = self.fetch_recommendation_data(user, message)
         elif self.Core.messaging_mode == "recommend_playlist":
             # adding playlist url to the message
@@ -119,11 +119,11 @@ class MessageGenerator:
                 }
         else:
-            # Only "message" is expected when involve_recsys_result is False and we are not recommending any other content from input
             if "message" not in message or "header" not in message:
                 print("LLM output is missing 'message'.")
                 return None
-            output_message = {"header": message["header"], "message": message["message"]}
         return json.dumps(output_message, ensure_ascii=False)

         :param user: The user row
         :return: Parsed and enriched output as a JSON object
         """
+        if self.Core.messaging_mode == "recsys_result":
             output_message = self.fetch_recommendation_data(user, message)
         elif self.Core.messaging_mode == "recommend_playlist":
             # adding playlist url to the message
                 }
         else:
+            # Only "message" is expected when messaging mode is message and we are not recommending any other content from input
             if "message" not in message or "header" not in message:
                 print("LLM output is missing 'message'.")
                 return None
+            output_message = {"header": message["header"], "message": message["message"], "web_url_path": user["recsys_result"]}
         return json.dumps(output_message, ensure_ascii=False)

Messaging_system/Permes.py CHANGED Viewed

@@ -12,6 +12,7 @@ import streamlit as st
 from Messaging_system.Message_generator import MessageGenerator
 from Messaging_system.PromptGenerator import PromptGenerator
 from Messaging_system.SnowFlakeConnection import SnowFlakeConn
@@ -135,10 +136,15 @@ class Permes:
         CoreConfig = datacollect.gather_data()
         # generating recommendations for users, if we want to include recommendations in the message
-        if CoreConfig.involve_recsys_result:
             Recommender = LLMR(CoreConfig)
             CoreConfig = Recommender.get_recommendations(progress_callback)
         # generating proper prompt for each user
         prompt = PromptGenerator(CoreConfig)
         CoreConfig = prompt.generate_prompts()

 from Messaging_system.Message_generator import MessageGenerator
 from Messaging_system.PromptGenerator import PromptGenerator
 from Messaging_system.SnowFlakeConnection import SnowFlakeConn
+from Messaging_system.Homepage_Recommender import DefaultRec
         CoreConfig = datacollect.gather_data()
         # generating recommendations for users, if we want to include recommendations in the message
+        if CoreConfig.involve_recsys_result and CoreConfig.messaging_mode != "message":
             Recommender = LLMR(CoreConfig)
             CoreConfig = Recommender.get_recommendations(progress_callback)
+        else:
+            # We only want to generate the message and redirect them to For You section or Homepage
+            Recommender = DefaultRec(CoreConfig)
+            CoreConfig = Recommender.get_recommendations()
         # generating proper prompt for each user
         prompt = PromptGenerator(CoreConfig)
         CoreConfig = prompt.generate_prompts()

Messaging_system/PromptGenerator.py CHANGED Viewed

@@ -62,11 +62,13 @@ class PromptGenerator:
         context = self.input_context()
         cta = self.CTA_instructions()
-        if self.Core.involve_recsys_result or self.Core.target_content is not None:
             if user["recommendation"] is not None or user["recommendation_info"] is not None:
                 recommendations_instructions = self.recommendations_instructions(user=user) + "\n"
         else:
-            recommendations_instructions = ""
         user_info = self.get_user_profile(user=user)
@@ -413,7 +415,7 @@ class PromptGenerator:
         :return:
         """
-        if self.Core.involve_recsys_result:
             recsys_task = """
     - Create a perfect message and the header following the instructions, using the user's information and the content that we want to recommend.
     - Use the instructions to include the recommended content in the message.
@@ -432,3 +434,17 @@ class PromptGenerator:
     """
         return instructions

         context = self.input_context()
         cta = self.CTA_instructions()
+        if (self.Core.involve_recsys_result and self.Core.messaging_mode !="message") or self.Core.target_content is not None:
             if user["recommendation"] is not None or user["recommendation_info"] is not None:
                 recommendations_instructions = self.recommendations_instructions(user=user) + "\n"
+            else:
+                recommendations_instructions = self.redirect_to_for_you()
         else:
+            recommendations_instructions = self.redirect_to_for_you()
         user_info = self.get_user_profile(user=user)
         :return:
         """
+        if self.Core.involve_recsys_result and self.Core.messaging_mode != "message":
             recsys_task = """
     - Create a perfect message and the header following the instructions, using the user's information and the content that we want to recommend.
     - Use the instructions to include the recommended content in the message.
     """
         return instructions
+    # =======================================================
+    def redirect_to_for_you(self):
+        """
+        instructions to redirect the user to For you section
+        :return:
+        """
+        instructions = f"""
+        ** Note: **
+        We don't recommend a specific conten and by opening the message, the user will be redirected to a page where we have personalized content recommendations for them.
+        \n
+        """
+        return instructions

app.py CHANGED Viewed

@@ -105,14 +105,14 @@ with st.sidebar:
         # ─ Brand
         st.selectbox(
-            "Brand",
             ["drumeo", "pianote", "guitareo", "singeo"],
-            key="brand"
         )
         # ─ Personalisation
         st.text_area("Segment info *", key="segment_info")
-        st.text_area("CTA *", key="CTA")
         with st.expander("🔧 Optional tone & examples"):
             st.text_area("Message style", key="message_style",
                          placeholder="Be kind and friendly…")
@@ -188,8 +188,8 @@ with tab2:
     if st.session_state.generate_clicked and not st.session_state.generated:
         # ─ simple validation
-        if not st.session_state.CTA.strip() or not st.session_state.segment_info.strip():
-            st.error("CTA and Segment info are mandatory 🚫")
             st.stop()
         # ─ build Snowflake session
@@ -261,40 +261,83 @@ with tab2:
         prog.empty(); status.empty()
         st.balloons()
-    # -------- show results (if any)
-    # -------- show results (if any)
     if st.session_state.generated:
         df = st.session_state.users_message
-        id_col = st.session_state.identifier_column
         # expandable per-user cards
-        for i, (_, row) in enumerate(df.iterrows(), 1):
-            with st.expander(f"{i}. User ID: {row[id_col.lower()]}", expanded=(i == 1)):
                 st.write("##### 👤 Features")
-                feats = st.session_state.selected_source_features
                 cols = st.columns(3)
-                for idx, f in enumerate(feats):
-                    cols[idx % 3].markdown(f"**{f}**: {row.get(f, '—')}")
                 st.markdown("---")
                 st.write("##### ✉️ Messages")
-                try:
-                    blob = json.loads(row["message"])
-                    seq = (blob.get("messages_sequence", blob)
-                           if isinstance(blob, dict) else blob)
-                    for j, msg in enumerate(seq, 1):
-                        st.markdown(f"**{j}. {msg.get('header', '(no header)')}**")
-                        thumb = (msg.get("thumbnail_url")  # per-message
-                                 or row.get("thumbnail_url"))  # per-user fallback
-                        if thumb:
-                            st.image(thumb, width=150)
-                        # ---------------------------------------------------------
-                        st.markdown(msg.get("message", ""))
-                        st.markdown(f"[Read more]({msg.get('web_url_path', '#')})")
                         st.markdown("---")
-                except Exception as e:
-                    st.error(f"Failed to parse JSON: {e}")

         # ─ Brand
         st.selectbox(
+            "Brand *",
             ["drumeo", "pianote", "guitareo", "singeo"],
+            key="brand",
         )
         # ─ Personalisation
         st.text_area("Segment info *", key="segment_info")
+        st.text_area("CTA (Call to Action) *", key="CTA")
         with st.expander("🔧 Optional tone & examples"):
             st.text_area("Message style", key="message_style",
                          placeholder="Be kind and friendly…")
     if st.session_state.generate_clicked and not st.session_state.generated:
         # ─ simple validation
+        if not st.session_state.CTA.strip() or not st.session_state.segment_info.strip() or not st.session_state.brand.strip():
+            st.error("CTA, Segment info, and brand are mandatory 🚫")
             st.stop()
         # ─ build Snowflake session
         prog.empty(); status.empty()
         st.balloons()
+    # =============================================================
     if st.session_state.generated:
         df = st.session_state.users_message
+        id_col = st.session_state.identifier_column or ""
+        id_col_lower = id_col.lower()
         # expandable per-user cards
+        for i, (_, row) in enumerate(df.iterrows(), start=1):
+            user_id = row.get(id_col_lower, "(no ID)")
+            with st.expander(f"{i}. User ID: {user_id}", expanded=(i == 1)):
+                # --- Features
                 st.write("##### 👤 Features")
+                feats = st.session_state.selected_source_features or []
                 cols = st.columns(3)
+                for idx, feature in enumerate(feats):
+                    val = row.get(feature, "—")
+                    cols[idx % 3].markdown(f"**{feature}**: {val}")
                 st.markdown("---")
+                # --- Messages
                 st.write("##### ✉️ Messages")
+                raw = row.get("message", "")
+                # try to parse JSON if it's a str
+                if isinstance(raw, str):
+                    try:
+                        blob = json.loads(raw)
+                    except json.JSONDecodeError:
+                        st.error(f"Could not parse JSON for user {user_id}")
+                        continue
+                elif isinstance(raw, dict) or isinstance(raw, list):
+                    blob = raw
+                else:
+                    blob = {}
+                # extract sequence
+                if isinstance(blob, dict):
+                    seq = blob.get("messages_sequence", [])
+                elif isinstance(blob, list):
+                    seq = blob
+                else:
+                    seq = []
+                # make sure it's a list
+                if not isinstance(seq, list):
+                    seq = [seq]
+                # render each message
+                for j, msg in enumerate(seq, start=1):
+                    if not isinstance(msg, dict):
+                        # if it's just a string or number, render it plainly
+                        st.markdown(f"**{j}. (no header)**")
+                        st.markdown(str(msg))
                         st.markdown("---")
+                        continue
+                    header = msg.get("header", "(no header)")
+                    st.markdown(f"**{j}. {header}**")
+                    # optional title
+                    title = msg.get("title")
+                    if title:
+                        st.markdown(f"**Title:** {title}")
+                    # thumbnail (per-message or fallback per-user)
+                    thumb = msg.get("thumbnail_url") or row.get("thumbnail_url")
+                    if thumb:
+                        st.image(thumb, width=150)
+                    # the main message body
+                    body = msg.get("message", "")
+                    st.markdown(body)
+                    # optional "read more" link
+                    url = msg.get("web_url_path")
+                    if url:
+                        st.markdown(f"[Read more]({url})")
+                    st.markdown("---")

app_V1.py ADDED Viewed

	@@ -0,0 +1,527 @@

+import html
+import json
+import os
+from io import StringIO
+import streamlit as st
+import pandas as pd
+from bs4 import BeautifulSoup
+from snowflake.snowpark import Session
+from Messaging_system.Permes import Permes
+from Messaging_system.context_validator import Validator
+from dotenv import load_dotenv
+load_dotenv()
+# -----------------------------------------------------------------------
+# Load CSV file
+@st.cache_data
+def load_data(file_path):
+    return pd.read_csv(file_path)
+# -----------------------------------------------------------------------
+def load_config_(file_path):
+    """
+    Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
+    :param file_path: local path to the JSON file
+    :return: JSON file
+    """
+    with open(file_path, 'r') as file:
+        return json.load(file)
+# -----------------------------------------------------------------------
+# Set page configuration and apply custom CSS for black and gold theme
+st.set_page_config(page_title="Personalized Message Generator", page_icon=":mailbox_with_mail:", layout="wide")
+st.markdown(
+    """
+    <style>
+    body {
+        background-color: #000000;
+        color: #FFD700;
+    }
+    .stButton > button {
+        background-color: #FFD700;
+        color: #000000;
+    }
+    h1, h2, h3, h4, h5, h6 {
+        color: #FFD700;
+    }
+    .section {
+        margin-bottom: 30px;
+    }
+    .input-label {
+        font-size: 18px;
+        font-weight: bold;
+        margin-top: 10px;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# -----------------------------------------------------------------------
+def filter_validated_users(users):
+    """
+    Filters the input DataFrame by removing rows where the 'valid' column has the value 'False'.
+    Parameters:
+    users (DataFrame): A pandas DataFrame with a 'valid' column containing strings 'True' or 'False'.
+    Returns:
+    DataFrame: A filtered DataFrame containing only rows where 'valid' is 'True'.
+    """
+    # Convert the 'valid' column to boolean for easier filtering
+    users['valid'] = users['valid'].map({'True': True, 'False': False})
+    # Filter the DataFrame to include only rows where 'valid' is True
+    filtered_users = users[users['valid']]
+    # Optional: Reset the index of the filtered DataFrame
+    filtered_users = filtered_users.reset_index(drop=True)
+    return filtered_users
+# -----------------------------------------------------------------------
+# --------------------------------------------------------------
+# --------------------------------------------------------------
+def clean_html_tags(users_df):
+    """
+    accept the data as a Pandas Dataframe and return the preprocessed dataframe.
+    This function has access to the columns that contain HTML tags and codes, Therefore it will apply cleaning
+    procedures to those columns.
+    functions to preprocess the data
+    :return: updates users_df
+    """
+    for col in users_df.columns:
+        # Apply the cleaning function to each cell in the column
+        users_df[col] = users_df[col].apply(clean_text)
+    return users_df
+# --------------------------------------------------------------
+def clean_text(text):
+    if isinstance(text, str):
+        # Unescape HTML entities
+        text = html.unescape(text)
+        # Parse HTML and get text
+        soup = BeautifulSoup(text, "html.parser")
+        return soup.get_text()
+    else:
+        return text
+# ----------------------------------------------------------------------------
+# Load OpenAI API key from Streamlit secrets
+openai_api_key = os.environ.get('OPENAI_API')
+st.session_state["openai_api_key"] = openai_api_key
+# ----------------------------------------------------------------------------
+# Main function
+def initialize_session_state():
+    # Initialize session state variables if not already set
+    st.session_state["involve_recsys_result"] = False
+    st.session_state["involve_last_interaction"] = False
+    st.session_state.valid_instructions = ""
+    st.session_state.invalid_instructions = ""
+    # Initialize session state variables if not already set
+    for key in [
+        "data", "brand","recsys_contents", "generated", "csv_output", "users_message", "messaging_mode",
+        "messaging_type", "target_column", "ugc_column", "identifier_column", "input_validator", "selected_input_features"
+        "selected_features", "additional_instructions", "segment_info", "message_style", "sample_example",
+        "CTA", "all_features", "number_of_messages", "instructionset", "segment_name", "number_of_samples",
+        "selected_source_features", "platform"
+    ]:
+        if key not in st.session_state:
+            st.session_state[key] = None
+def upload_csv_file():
+    st.header("Upload CSV File")
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        users = load_data(uploaded_file)
+        st.write(f"Data loaded from {uploaded_file.name}")
+        st.session_state.data = users
+        columns = users.columns.tolist()
+        st.subheader("Available Columns in Uploaded CSV")
+        st.write(columns)
+        return users
+    else:
+        return None
+def select_identifier_column(users):
+    st.header("Select Identifier Column")
+    columns = users.columns.tolist()
+    identifier_column = st.selectbox("Select the identifier column", columns)
+    st.session_state.identifier_column = identifier_column
+    st.markdown("---")
+def select_target_audience():
+    st.header("Select Target Audience")
+    options = ["drumeo", "pianote", "guitareo", "singeo"]
+    brand = st.selectbox("Choose the brand for the users", options)
+    st.session_state.brand = brand
+    st.markdown("---")
+def select_target_messaging_type():
+    st.header("Select Target Messaging Type")
+    messaging_type = st.selectbox("Choose the target messaging type", ["Push Notification", "In-App Notification"])
+    st.session_state.messaging_type = "push" if messaging_type == "Push Notification" else "app"
+    st.markdown("---")
+def input_personalization_parameters():
+    st.header("Personalization Parameters")
+    st.session_state.segment_info = st.text_area("Segment Info", "", placeholder="Tell us more about the users...")
+    st.session_state.CTA = st.text_area("CTA", "", placeholder="e.g., check out 'Inspired by your activity' that we have crafted just for you!")
+    st.session_state.message_style = st.text_area("Message Style", "", placeholder="(optional) e.g., be kind and friendly (it's better to be as specific as possible)")
+    st.session_state.sample_example = st.text_area("Sample Example", "", placeholder="(optional) e.g., Hello! We have crafted a perfect set of courses just for you!")
+    number_of_samples = st.text_input("Number of samples to generate messages", "20", placeholder="(optional) default is 20")
+    st.session_state.number_of_samples = int(number_of_samples) if number_of_samples else 20
+    st.markdown("---")
+def input_message_sequence_parameters():
+    """Collect settings for sequential message generation (new feature)."""
+    st.header("Sequential Messaging Parameters")
+    # Number of sequential messages
+    number_of_messages = st.number_input(
+        "Number of sequential messages to generate (per user)",
+        min_value=1, max_value=10, value=1, step=1, key="num_seq_msgs"
+    )
+    st.session_state.number_of_messages = number_of_messages
+    # Segment name for storage / tracking
+    segment_name = st.text_input(
+        "Segment Name", value="", placeholder="e.g., no_recent_activity", key="segment_name_input"
+    )
+    st.session_state.segment_name = segment_name
+    # Instruction set for each message
+    st.subheader("Instructions per Message")
+    st.caption("Provide additional tone or style instructions for each sequential message. Leave blank to inherit the main instructions.")
+    instructionset = {}
+    cols = st.columns(number_of_messages)
+    for i in range(1, number_of_messages + 1):
+        with cols[(i - 1) % number_of_messages]:
+            instr = st.text_input(
+                f"Message {i} instructions", value="", placeholder="e.g., Be Cheerful & Motivational", key=f"instr_{i}"
+            )
+            if instr.strip():
+                instructionset[i] = instr.strip()
+    # Save to session state
+    st.session_state.instructionset = instructionset
+    st.markdown("---")
+def select_features_from_source_info():
+    st.header("Select Features from Available Source Information")
+    available_features = ["first_name", "biography", "birthday_reminder", "goals", "Minutes_practiced", "Last_completed_content"]
+    selected_source_features = st.multiselect("Select features to use from available source information", available_features)
+    selected_source_features.append("instrument")
+    st.session_state.selected_source_features = selected_source_features
+    st.markdown("---")
+def select_features_from_input_file(users):
+    st.header("Select Features from your Input file")
+    columns = users.columns.tolist()
+    selected_features = st.multiselect("Select features to use in generated messages from the input file", columns)
+    st.session_state.selected_features = selected_features
+    st.markdown("---")
+def provide_additional_instructions():
+    st.header("Additional Instructions")
+    additional_instructions = st.text_area("Provide additional instructions on how to use selected features in the generated message", "")
+    st.session_state.additional_instructions = additional_instructions
+    st.markdown("---")
+def parse_user_generated_context(users):
+    st.header("Parsing User-Generated Context")
+    user_generated_context = st.checkbox("Do we have a user-generated context provided in the input that you wish to filter?")
+    st.session_state.user_generated_context = user_generated_context
+    if user_generated_context:
+        columns = users.columns.tolist()
+        ugc_column = st.selectbox("Select the column that contains User-Generated Context", columns)
+        st.session_state.ugc_column = ugc_column
+        st.subheader("Provide Additional Instructions for Validation (Optional)")
+        valid_instructions = st.text_area("Instructions for valid context", placeholder="Provide instructions for what constitutes valid context...")
+        invalid_instructions = st.text_area("Instructions for invalid context", placeholder="Provide instructions for what constitutes invalid context...")
+        st.session_state.valid_instructions = valid_instructions
+        st.session_state.invalid_instructions = invalid_instructions
+        input_validator = Validator(api_key=st.session_state.openai_api_key)
+        st.session_state.input_validator = input_validator
+    st.markdown("---")
+def include_content_recommendations():
+    st.header("Include Content Recommendations")
+    include_recommendation = st.checkbox("Would you like to include content in the message to recommend to the students?")
+    st.session_state.include_recommendation = include_recommendation
+    if include_recommendation:
+        recommendation_source = st.radio("Select recommendation source", ["Input File", "Musora Recommender System"])
+        st.session_state.recommendation_source = recommendation_source
+        if recommendation_source == "Musora Recommender System":
+            st.session_state.involve_recsys_result = True
+            st.session_state.messaging_mode = "recsys_result"
+            list_of_content_types = ["song", "workout", "quick_tips", "course"]
+            selected_content_types = st.multiselect("Select content_types that you would like to recommend", list_of_content_types)
+            st.session_state.recsys_contents = selected_content_types
+        else:
+            st.session_state.involve_recsys_result = False
+            st.session_state.messaging_mode = "message"
+            columns = st.session_state.data.columns.tolist()
+            target_column = st.selectbox("Select the target column for recommendations", columns)
+            st.session_state.target_column = target_column
+    else:
+        st.session_state.messaging_mode = "message"
+        st.session_state.target_column = None
+    st.markdown("---")
+def generate_personalized_messages(users):
+    st.header("Generate Personalized Messages")
+    if st.button("Generate Personalized Messages"):
+        if st.session_state.CTA.strip() == "" or st.session_state.segment_info.strip() == "":
+            st.error("CTA and Segment Info are mandatory fields and cannot be left empty.")
+        else:
+            conn = {
+                "user": os.environ.get("snowflake_user"),
+                "password": os.environ.get("snowflake_password"),
+                "account": os.environ.get("snowflake_account"),
+                "role": os.environ.get("snowflake_role"),
+                "database": os.environ.get("snowflake_database"),
+                "warehouse": os.environ.get("snowflake_warehouse"),
+                "schema": os.environ.get("snowflake_schema")
+            }
+            config_file_path = 'Config_files/message_system_config.json'
+            config_file = load_config_(config_file_path)
+            session = Session.builder.configs(conn).create()
+            if st.session_state.user_generated_context:
+                if st.session_state.valid_instructions.strip() or st.session_state.invalid_instructions.strip():
+                    st.session_state.input_validator.set_validator_instructions(
+                        valid_instructions=st.session_state.valid_instructions,
+                        invalid_instructions=st.session_state.invalid_instructions
+                    )
+                else:
+                    st.session_state.input_validator.set_validator_instructions()
+                # Create a progress bar
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                # Define a callback function to update the progress bar
+                def progress_callback(progress, total):
+                    percent_complete = int(progress / total * 100)
+                    progress_bar.progress(percent_complete)
+                    status_text.text(f"Validating user_generated_context: {percent_complete}%")
+                st.info("Validating user-generated content. This may take a few moments...")
+                users = st.session_state.input_validator.validate_dataframe(
+                    dataframe=users, target_column=st.session_state.ugc_column, progress_callback=progress_callback)
+                users = filter_validated_users(users)
+                st.success("User-generated content has been validated and filtered.")
+            st.session_state.all_features = st.session_state.selected_source_features + st.session_state.selected_features
+            if "Last_completed_content" in st.session_state.selected_source_features:
+                st.session_state.involve_last_interaction = True
+            else:
+                st.session_state.involve_last_interaction = False
+            # Create a progress bar
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            # Define a callback function to update the progress bar
+            def progress_callback(progress, total):
+                percent_complete = int(progress / total * 100)
+                progress_bar.progress(percent_complete)
+                status_text.text(f"Processing: {percent_complete}%")
+            permes = Permes()
+            users_message = permes.create_personalize_messages(
+                session=session,
+                users=users,
+                brand=st.session_state.brand,
+                config_file=config_file,
+                openai_api_key=os.environ.get('OPENAI_API'),
+                CTA=st.session_state.CTA,
+                segment_info=st.session_state.segment_info,
+                number_of_samples=st.session_state.number_of_samples,
+                message_style=st.session_state.message_style,
+                sample_example=st.session_state.sample_example,
+                selected_input_features=st.session_state.selected_features,
+                selected_source_features=st.session_state.selected_source_features,
+                additional_instructions=st.session_state.additional_instructions,
+                platform=st.session_state.messaging_type,
+                involve_last_interaction=st.session_state.involve_last_interaction,
+                involve_recsys_result=st.session_state.involve_recsys_result,
+                messaging_mode=st.session_state.messaging_mode,
+                identifier_column=st.session_state.identifier_column,
+                target_column=st.session_state.target_column,
+                recsys_contents=st.session_state.recsys_contents,
+                progress_callback=progress_callback,
+                # NEW PARAMETERS
+                number_of_messages=st.session_state.number_of_messages,
+                instructionset=st.session_state.instructionset,
+                segment_name=st.session_state.segment_name
+            )
+            # Clear the progress bar and status text after completion
+            progress_bar.empty()
+            status_text.empty()
+            csv_output = users_message.to_csv(encoding='utf-8-sig', index=False)
+            st.session_state.csv_output = csv_output
+            st.session_state.users_message = users_message
+            st.session_state.generated = True
+            st.success("Personalized messages have been generated.")
+    st.markdown("---")
+def download_generated_messages():
+    if st.session_state.get('generated', False):
+        st.header("Download Generated Messages")
+        # Suppose `df` is your final DataFrame
+        df = st.session_state.users_message  # or wherever your DataFrame is
+        # Write CSV to an in-memory buffer, with utf-8-sig encoding
+        csv_buffer = StringIO()
+        df.to_csv(csv_buffer, index=False, encoding='utf-8-sig')
+        csv_buffer.seek(0)
+        # Convert to bytes (this will include the UTF-8 BOM)
+        csv_bytes = csv_buffer.getvalue().encode('utf-8-sig')
+        # Provide the bytes to download_button
+        st.download_button(
+            label="Download output messages as a CSV file",
+            data=csv_bytes,
+            file_name='personalized_messages.csv',
+            mime='text/csv'
+        )
+def view_generated_messages():
+    # Only run if messages have been generated
+    if not st.session_state.get('generated', False):
+        return
+    st.title("🔔 Generated Push Notifications Review")
+    df = st.session_state.users_message
+    identifier = st.session_state.identifier_column.lower()
+    features = st.session_state.all_features
+    for idx, (_, user_row) in enumerate(df.iterrows(), start=1):
+        user_id = user_row.get(identifier, "N/A")
+        # Collapsible container per user
+        with st.expander(f"{idx}. User ID: {user_id}", expanded=(idx == 1)):
+            st.markdown("##### 👤 User Features")
+            # 3-column layout for user metadata
+            feature_cols = st.columns(3)
+            for i, feat in enumerate(features):
+                val = user_row.get(feat, "N/A")
+                feature_cols[i % 3].write(f"**{feat}**: {val}")
+            st.markdown("---")
+            st.markdown("##### 📝 Generated Messages")
+            raw = user_row.get('message', '[]')
+            try:
+                parsed = json.loads(raw)
+                # If it's the nested form {"messages_sequence": [ … ]}, grab the list inside.
+                if isinstance(parsed, dict) and 'messages_sequence' in parsed:
+                    messages = parsed['messages_sequence']
+                # If somehow it's already a list, leave it alone.
+                elif isinstance(parsed, list):
+                    messages = parsed
+                else:
+                    st.warning(
+                        "Unexpected JSON structure for messages; expected a list or {'messages_sequence': [...]}")
+                    messages = []
+            except json.JSONDecodeError:
+                st.error("Could not parse message JSON")
+                messages = []
+            # Display each push notification
+            for m_idx, msg in enumerate(messages, start=1):
+                c_img, c_text = st.columns([1, 3])
+                with c_img:
+                    thumb = msg.get('thumbnail_url')
+                    if thumb:
+                        st.image(thumb, width=80)
+                    else:
+                        st.write("No image")
+                with c_text:
+                    header = msg.get('header', '')
+                    body   = msg.get('message', '')
+                    link   = msg.get('web_url_path', '#')
+                    st.markdown(f"**{m_idx}. {header}**")
+                    st.markdown(body)
+                    st.markdown(f"[Read more →]({link})")
+                st.markdown("---")
+if __name__ == "__main__":
+    st.title("Personalized Message Generator")
+    # Initialize session state variables
+    initialize_session_state()
+    # Upload CSV File
+    users = upload_csv_file()
+    if users is not None:
+        # Proceed with the rest of the application
+        select_identifier_column(users)
+        select_target_audience()
+        select_target_messaging_type()
+        input_personalization_parameters()
+        input_message_sequence_parameters()
+        select_features_from_source_info()
+        select_features_from_input_file(users)
+        provide_additional_instructions()
+        parse_user_generated_context(users)
+        include_content_recommendations()
+        generate_personalized_messages(users)
+        download_generated_messages()
+        view_generated_messages()

messaging_main_test.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import html
+import json
+import os
+import pandas as pd
+from snowflake.snowpark import Session
+from bs4 import BeautifulSoup
+from Messaging_system.Permes import Permes
+import streamlit as st
+from Messaging_system.context_validator import Validator
+# --------------------------------------------------------------
+def load_config_(file_path):
+    """
+    Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
+    :param file_path: local path to the JSON file
+    :return: JSON file
+    """
+    with open(file_path, 'r') as file:
+        return json.load(file)
+# --------------------------------------------------------------
+def clean_html_tags(users_df):
+    """
+    accept the data as a Pandas Dataframe and return the preprocessed dataframe.
+    This function has access to the columns that contain HTML tags and codes, Therefore it will apply cleaning
+    procedures to those columns.
+    functions to preprocess the data
+    :return: updates users_df
+    """
+    for col in users_df.columns:
+        # Apply the cleaning function to each cell in the column
+        users_df[col] = users_df[col].apply(clean_text)
+    return users_df
+# --------------------------------------------------------------
+def clean_text(text):
+    if isinstance(text, str):
+        # Unescape HTML entities
+        text = html.unescape(text)
+        # Parse HTML and get text
+        soup = BeautifulSoup(text, "html.parser")
+        return soup.get_text()
+    else:
+        return text
+# =============================================================
+def get_credential(key):
+    return st.secrets.get(key) or os.getenv(key)
+# --------------------------------------------------------------
+def filter_validated_users(users):
+    """
+    Filters the input DataFrame by removing rows where the 'valid' column has the value 'False'.
+    Parameters:
+    users (DataFrame): A pandas DataFrame with a 'valid' column containing strings 'True' or 'False'.
+    Returns:
+    DataFrame: A filtered DataFrame containing only rows where 'valid' is 'True'.
+    """
+    # Convert the 'valid' column to boolean for easier filtering
+    users['valid'] = users['valid'].map({'True': True, 'False': False})
+    # Filter the DataFrame to include only rows where 'valid' is True
+    filtered_users = users[users['valid']]
+    # Optional: Reset the index of the filtered DataFrame
+    filtered_users = filtered_users.reset_index(drop=True)
+    return filtered_users
+# --------------------------------------------------------------
+if __name__ == "__main__":
+    # path to sample data
+    path = "Data/not_active_drumeo_camp.csv"
+    # loading sample data
+    users = pd.read_csv(path)
+    users = clean_html_tags(users)
+    config_file_path = 'Config_files/message_system_config.json'
+    config_file = load_config_(config_file_path)
+    openai_api_key = get_credential("OPENAI_API")
+    conn = dict(
+        user=get_credential("snowflake_user"),
+        password=get_credential("snowflake_password"),
+        account=get_credential("snowflake_account"),
+        role=get_credential("snowflake_role"),
+        database=get_credential("snowflake_database"),
+        warehouse=get_credential("snowflake_warehouse"),
+        schema=get_credential("snowflake_schema")
+    )
+    # --------------------
+    # #Do we need to validate user-generated context?
+    # user_generated_context = True
+    # input_validator = Validator(api_key=openai_api_key)
+    # input_validator.set_validator_instructions()
+    # users = input_validator.validate_dataframe(dataframe=users, target_column="forum_content")
+    # users = filter_validated_users(users)
+    # --------------------
+    session = Session.builder.configs(conn).create()
+    brand = "drumeo"
+    identifier_column = "user_id"
+    segment_info = """This is a Drumeo user who have been inactive for 30 days or more. This means
+    they have not taken any actions on the platform (e.g. lesson, songs, etc.) that would cause them to advance their
+    musical skills. We want to re-motivate the user so they can get back on track to reach their drumming goals"""
+    # sample inputs
+    CTA = """Re-engage the user by reminding them of their goals, and by recommending content that will get them back on track. """
+    # sample_example = """we have crafted a perfect set of courses just for you! come and check it out!"""
+    additional_instructions = """Include weeks_since _last_interaction in the message if you can create a better message to re-engage the user."""
+    recsys_contents = ["workout", "course", "quick_tips"]
+    # number_of_samples = users.shape[0]
+    number_of_samples = 5
+    # number of messages to generate
+    number_of_messages = 3
+    instructionset = {
+        1: "Be Cheerful & Motivational",
+        2: "Be Passive aggressive, sarcastic and mean",
+        3: "Be sad and disappointed"
+    }
+    involve_recsys_result = True
+    involve_last_interaction = False
+    # messaging_mode = "recommend_playlist"
+    sample_example = """
+    Below are sample messages from us. make the generated message close to our sound in terms of style, tune, and the way we write messages.
+    Example 1
+    header: The Drums Miss You, [first_name]
+    message: 🥁 It’s been a while. Jump back in with this quick lesson!
+    Example 2
+    Header: Time To Practice 🥁
+    message: A quick workout will help you reach your musical goals.
+    Example 3
+    header: 🥁 Don’t Stop The Beat
+    message: A few minutes of practice makes a huge difference. You’ll love this lesson!
+    Example 4
+    header: It’s Been A While
+    message: 🥁We haven’t seen you in 7 weeks. Get back on track with a quick lesson!
+    Example 5:
+    header: Miss Playing, Michael? 🥁
+    message: It's been 5 weeks! Jump back in with a cool workout to learn new beats!
+    """
+    sample_example = None
+    platform = "push"
+    selected_source_features = ["first_name", "weeks_since_last_interaction"]
+    selected_input_features = None
+    segment_name = "no_recent_activity"
+    permes = Permes()
+    users_message = permes.create_personalize_messages(session=session,
+                                                users=users,
+                                                brand=brand,
+                                                config_file=config_file,
+                                                openai_api_key=openai_api_key,
+                                                CTA=CTA,
+                                                segment_info=segment_info,
+                                                number_of_samples=number_of_samples,
+                                                number_of_messages=number_of_messages,
+                                                instructionset=instructionset,
+                                                selected_source_features=selected_source_features,
+                                                selected_input_features=selected_input_features,
+                                                additional_instructions=additional_instructions,
+                                                platform=platform,
+                                                involve_recsys_result=involve_recsys_result,
+                                                identifier_column=identifier_column,
+                                                recsys_contents=recsys_contents,
+                                                sample_example=sample_example,
+                                                segment_name=segment_name)
+    users_message.to_csv(f"drumeo_not_active_complete.csv", encoding='utf-8-sig', index=False)