| import html |
| import json |
| import os |
|
|
| import pandas as pd |
| from snowflake.snowpark import Session |
| from bs4 import BeautifulSoup |
| from Messaging_system.Permes import Permes |
| import streamlit as st |
|
|
| from Messaging_system.SnowFlakeConnection import SnowFlakeConn |
| from Messaging_system.context_validator import Validator |
|
|
|
|
| |
| def load_config_(file_path): |
| """ |
| Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters) |
| :param file_path: local path to the JSON file |
| :return: JSON file |
| """ |
| with open(file_path, 'r') as file: |
| return json.load(file) |
|
|
| |
| def clean_html_tags(users_df): |
| """ |
| accept the data as a Pandas Dataframe and return the preprocessed dataframe. |
| This function has access to the columns that contain HTML tags and codes, Therefore it will apply cleaning |
| procedures to those columns. |
| functions to preprocess the data |
| :return: updates users_df |
| """ |
|
|
| for col in users_df.columns: |
| |
| users_df[col] = users_df[col].apply(clean_text) |
|
|
| return users_df |
|
|
|
|
| |
| def clean_text(text): |
| if isinstance(text, str): |
| |
| text = html.unescape(text) |
| |
| soup = BeautifulSoup(text, "html.parser") |
| return soup.get_text() |
| else: |
| return text |
|
|
| |
| def get_credential(key): |
| return st.secrets.get(key) or os.getenv(key) |
|
|
| |
| def filter_validated_users(users): |
| """ |
| Filters the input DataFrame by removing rows where the 'valid' column has the value 'False'. |
| |
| Parameters: |
| users (DataFrame): A pandas DataFrame with a 'valid' column containing strings 'True' or 'False'. |
| |
| Returns: |
| DataFrame: A filtered DataFrame containing only rows where 'valid' is 'True'. |
| """ |
| |
| users['valid'] = users['valid'].map({'True': True, 'False': False}) |
|
|
| |
| filtered_users = users[users['valid']] |
|
|
| |
| filtered_users = filtered_users.reset_index(drop=True) |
|
|
| return filtered_users |
|
|
|
|
| |
| if __name__ == "__main__": |
| |
| |
| path = "Data/Test_users.csv" |
|
|
| |
| users = pd.read_csv(path) |
| |
|
|
| config_file_path = 'Config_files/message_system_config.json' |
| config_file = load_config_(config_file_path) |
|
|
| openai_api_key = get_credential("OPENAI_API") |
|
|
| conn = dict( |
| user=get_credential("snowflake_user"), |
| password=get_credential("snowflake_password"), |
| account=get_credential("snowflake_account"), |
| role=get_credential("snowflake_role"), |
| database=get_credential("snowflake_database"), |
| warehouse=get_credential("snowflake_warehouse"), |
| schema=get_credential("snowflake_schema") |
| ) |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| session = Session.builder.configs(conn).create() |
|
|
| brand = "singeo" |
| identifier_column = "email" |
|
|
| snowflake = SnowFlakeConn(session=session, brand=brand) |
| |
|
|
| segment_info = """Student who haven't practiced for a few days""" |
|
|
| |
|
|
| CTA = """The goal is to tell them to practice singing""" |
|
|
| |
| additional_instructions = None |
|
|
| recsys_contents = ["workout", "course", "quick_tips"] |
|
|
| |
| number_of_samples = 10 |
|
|
| |
| number_of_messages = 3 |
|
|
| ex1 = """ |
| Header: Sing your heart out |
| Message: Your next lesson is waiting. Get back to singing today! |
| |
| |
| Header: Here Comes The Sun |
| Message: A quick practice session will light up your day. Let’s get right back at it. |
| |
| Header: Ain’t No Mountain High Enough |
| Message: Daily practice makes you unstoppable. Time to build your skills! |
| """ |
|
|
| sample_example = ex1 |
|
|
| ex2 = """ |
| Header: Get Back On Track! |
| Message: Join thousands of singers in reaching their goals. Take a lesson today! |
| |
| |
| Header: It’s Been A While |
| Message: We haven’t seen you in a bit. Slide back into your practice sessions now. |
| |
| |
| Header: It Only Takes 10 Minutes |
| Message: Build your momentum and get you back into the groove! |
| |
| |
| Header: Let’s Sing! |
| Message: Haven’t practiced yet? This will get you going. |
| |
| """ |
|
|
| ex3 = """ |
| Header: We Miss Your Singing! |
| Message: You haven’t practiced for 25 days. It’s time to dive back in! |
| |
| |
| Header: Lost In Your Singing Journey? |
| Message: These lessons are curated just for you. Start singing today! |
| |
| """ |
|
|
| subsequent_examples = { |
| 2: ex2, |
| 3: ex3 |
| } |
|
|
| involve_recsys_result = True |
| involve_last_interaction = False |
|
|
| |
|
|
| platform = "push" |
|
|
| selected_source_features = None |
| selected_input_features = None |
|
|
| segment_name = "no_recent_activity" |
| permes = Permes() |
|
|
| |
|
|
| users_message = permes.create_personalize_messages(session=session, |
| model="gpt-4.1-mini", |
| users=users, |
| brand=brand, |
| config_file=config_file, |
| openai_api_key=openai_api_key, |
| segment_info=segment_info, |
| number_of_samples=number_of_samples, |
| number_of_messages=number_of_messages, |
| subsequent_examples = subsequent_examples, |
| platform=platform, |
| involve_recsys_result=involve_recsys_result, |
| identifier_column=identifier_column, |
| recsys_contents=recsys_contents, |
| sample_example=sample_example, |
| segment_name=segment_name, |
| personalization=True) |
|
|
| users_message.to_csv(f"Singeo_camp.csv", encoding='utf-8-sig', index=False) |
|
|
|
|