predata_deployment / common /UserProfile.py
elaineaishophouse's picture
Update common/UserProfile.py
dc840fa verified
import re
import datetime
import textwrap
from common.Config import Config
import pandas as pd
import numpy as np
# Category class
class Category:
def __init__(self):
self.Proactiveness_around_health = None
self.Satisfaction_level_with_current_health = None
self.Perceptions_Around_Home_Remedies = None
self.Perceptions_Around_Supplements = None
self.Health_Concern_1 = None
self.Health_Concern_2 = None
self.Health_Concern_3 = None
self.Bone_Health_Importance = None
self.Bone_Health_Rating = None
self.Physical_Activity = None
self.Price_Sensitivity = None
self.Propensity_To_Influence_With_Ads = None
self.Propensity_To_Be_Influenced_By_Others = None
self.Health_Conciousness = None
self.Brand_Trust_Loyalty = None
self.Household_Professional_Industry = None
self.Womens_Horlicks_Perception = None
def set_field(self, field_name, value):
if hasattr(self, field_name):
setattr(self, field_name, value)
else:
print(f"Field {field_name} does not exist in {self.__class__.__name__} class.")
def __repr__(self):
fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
def to_dict(self):
return {k: v for k, v in self.__dict__.items()}
# Demographics class
class Demographics:
def __init__(self):
self.Age_Group_classification = None
self.Age = None
self.Gender = None
self.Marital_Status = None
self.Family_Structure = None
self.Country_of_Current_Residence = None
self.Region_of_Current_Residence = None
self.Income_class = None
self.LSM = None
self.Country_of_Birth = None
self.Region_of_Birth = None
self.Education_Level = None
self.Number_of_Children = None
self.Health_Status = None
self.Occupation_Categories = None
self.Professional_Expertise_level = None
self.Lifestage_Segment = None
self.Parental_Status = None
self.Has_Dependents = None
def set_field(self, field_name, value):
if hasattr(self, field_name):
setattr(self, field_name, value)
else:
print(f"Field {field_name} does not exist in {self.__class__.__name__} class.")
def __repr__(self):
fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
def to_dict(self):
return {k: v for k, v in self.__dict__.items()}
# Preferences class
class Preferences:
def __init__(self):
self.Exercise_For_Bone_Health = None
self.Home_Remedies_For_Bone_Health = None
self.Limits_Caffeine_For_Bone_Health = None
self.Regular_Scans_For_Bone_Health = None
self.Consumes_HFDs_For_Bone_Health = None
self.Consumes_Supplements_For_Bone_Health = None
self.HFD_Brand_Awareness = None
self.HFD_Brand_Consumption = None
self.HFD_Usage_Period = None
self.HFD_Weekly_Usage = None
self.Recommended_Womens_Horlicks_Within_6_Months = None
self.Womens_Horlicks_Recommendations_Count = None
self.Aware_Of_Womens_Horlicks = None
self.Accompaniments_with_Horlicks = None
self.HFD_Preparation_Preference = None
self.HFD_Taste_Preference = None
self.Method_Of_Consuming_Horlicks = None
self.Consumes_Womens_Horlicks = None
self.Consumes_Horlicks_Lite = None
self.Health_Management_Proactivity = None
self.Wellness_Savvy_Influencer = None
self.Digital_Consumption_Propensity = None
self.Physical_Vitality_Index = None
self.Stress_Management_Capacity = None
self.Fitness_Level = None
def set_field(self, field_name, value):
if hasattr(self, field_name):
setattr(self, field_name, value)
else:
print(f"Field {field_name} does not exist in {self.__class__.__name__} class.")
def __repr__(self):
fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
def to_dict(self):
return {k: v for k, v in self.__dict__.items()}
# Values class
class Values:
def __init__(self):
self.Self_Direction = None
self.Stimulation = None
self.Hedonism = None
self.Achievement = None
self.Power = None
self.Security = None
self.Conformity = None
self.Tradition = None
self.Benevolence = None
self.Universalism = None
self.Assertiveness = None
self.Emotional_Responsiveness = None
self.Directness_Clarity = None
self.Focus = None
self.Detail_Orientation = None
def set_field(self, field_name, value):
if hasattr(self, field_name):
setattr(self, field_name, value)
else:
print(f"Field {field_name} does not exist in {self.__class__.__name__} class.")
def __repr__(self):
fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
def to_dict(self):
return {k: v for k, v in self.__dict__.items()}
@staticmethod
def read_from_excel(values_file):
# Load the Excel file into a pandas DataFrame
df = pd.read_excel(values_file)
# Create an instance of Values
values_instance = Values()
# Map each row from the filtered DataFrame to a corresponding field in the Tone instance
for _, row in df.iterrows():
value = row['Value'].replace(" ", "_") # Ensure field names match class attributes
score = row['Score']
# Use the set_field method to dynamically assign the field values
values_instance.set_field(value, score)
# Return the populated Values instance
return values_instance
# Style class
class Style:
def __init__(self):
self.Cultural_References = None
self.Descriptiveness = None
self.Sentence_Structure = None
self.Repetition = None
self.Dialogue_Usage = None
def set_field(self, field_name, value):
if hasattr(self, field_name):
setattr(self, field_name, value)
else:
print(f"Field {field_name} does not exist in {self.__class__.__name__} class.")
def __repr__(self):
fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
def to_dict(self):
return {k: v for k, v in self.__dict__.items()}
@staticmethod
def read_from_excel(style_tone_file):
# Load the Excel file into a pandas DataFrame
df = pd.read_excel(style_tone_file)
# Filter to keep only rows where Category is 'Style'
df_filtered = df[df['Category'] == 'Style']
# Create an instance of Style
style_instance = Style()
# Map each row from the filtered DataFrame to a corresponding field in the Style instance
for _, row in df_filtered.iterrows():
criterion = row['Criterion'].replace(" ", "_") # Ensure field names match class attributes
assessment = row['Assessment']
# Use the set_field method to dynamically assign the field values
style_instance.set_field(criterion, assessment)
# Return the populated Style instance
return style_instance
# Tone class
class Tone:
def __init__(self):
self.Emotional_Tone = None
self.Humor = None
self.Subjectivity_vs_Objectivity = None
self.Persuasiveness = None
self.Optimism_vs_Pessimism = None
self.Tone_Specificity = None
self.Tension_Level = None
self.Intensity = None
self.Cultural_Tone = None
def set_field(self, field_name, value):
if hasattr(self, field_name):
setattr(self, field_name, value)
else:
print(f"Field {field_name} does not exist in {self.__class__.__name__} class.")
def __repr__(self):
fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
def to_dict(self):
return {k: v for k, v in self.__dict__.items()}
@staticmethod
def read_from_excel(style_tone_file):
# Load the Excel file into a pandas DataFrame
df = pd.read_excel(style_tone_file)
# Filter to keep only rows where Category is 'Tone'
df_filtered = df[df['Category'] == 'Tone']
# Create an instance of Tone
tone_instance = Tone()
# Map each row from the filtered DataFrame to a corresponding field in the Tone instance
for _, row in df_filtered.iterrows():
criterion = row['Criterion'].replace(" ", "_") # Ensure field names match class attributes
assessment = row['Assessment']
# Use the set_field method to dynamically assign the field values
tone_instance.set_field(criterion, assessment)
# Return the populated Tone instance
return tone_instance
class FastFacts:
def __init__(self):
self.facts = []
def add_fact(self, fact):
if isinstance(fact, str):
self.facts.append(fact)
else:
print("Only strings are allowed as facts.")
def __repr__(self):
formatted_facts = ", ".join(f"<{fact}>" for fact in self.facts)
return f"{self.__class__.__name__}: {formatted_facts}"
def to_dict(self):
return {"facts": self.facts}
@staticmethod
def read_from_excel(fact_file):
# Read the Excel file
try:
df = pd.read_excel(fact_file)
facts_list = df["FastFacts"].tolist() # Assuming the facts are in a column named 'FastFacts'
# Create a FastFacts object and populate it with facts
fast_facts_obj = FastFacts()
for fact in facts_list:
fast_facts_obj.add_fact(fact)
return fast_facts_obj
except Exception as e:
print(f"An error occurred while reading from the Excel file: {e}")
return None
class UserProfile:
def __init__(self):
self.ID = None
self.name = None
self.transcript_file = None
self.demographics = Demographics() # Contains demographic-related fields
self.category = Category() # Contains health-related fields
self.preferences = Preferences() # Contains user preferences
self.values = Values() # Contains user values
self.style = Style() # Contains style-related fields
self.tone = Tone() # Contains tone-related fields
self.fast_facts = FastFacts() # Contains fast facts as a list of strings
def set_ID(self, ID):
self.ID = ID
def set_name(self, name):
self.name = name
def set_transcript_file(self, transcript_file):
self.transcript_file = transcript_file
def set_field(self, type, field_name, value):
if type == 'Demographics':
self.demographics.set_field(field_name, value)
elif type == 'Category':
self.category.set_field(field_name, value)
elif type == 'Preferences':
self.preferences.set_field(field_name, value)
elif type == 'Values':
self.values.set_field(field_name, value)
elif type == 'Style':
self.style.set_field(field_name, value)
elif type == 'Tone':
self.tone.set_field(field_name, value)
def set_demographics_field(self, field_name, value):
self.demographics.set_field(field_name, value)
def set_category_field(self, field_name, value):
self.category.set_field(field_name, value)
def set_preferences_field(self, field_name, value):
self.preferences.set_field(field_name, value)
def set_values_field(self, field_name, value):
self.values.set_field(field_name, value)
def set_style_field(self, field_name, value):
self.style.set_field(field_name, value)
def set_tone_field(self, field_name, value):
self.tone.set_field(field_name, value)
def set_fast_facts(self, facts_list):
if isinstance(facts_list, list) and all(isinstance(fact, str) for fact in facts_list):
self.fast_facts.facts = facts_list
else:
print("FastFacts must be a list of strings.")
def __repr__(self):
return (f"UserProfile(ID='{self.ID}', Name='{self.name}', Transcript File='{self.transcript_file}'\n"
f" Demographics={self.demographics},\n"
f" Category={self.category},\n"
f" Preferences={self.preferences},\n"
f" Values={self.values},\n"
f" Style={self.style},\n"
f" Tone={self.tone},\n"
f" FastFacts={self.fast_facts}\n"
f")")
def to_dict(self):
"""
Converts the UserProfile object to a dictionary for easy CSV export.
"""
user_dict = {
'ID': self.ID,
'Name': self.name,
'Transcript File': self.transcript_file,
}
# Convert category, demographics, preferences, values, style, tone, and fast facts fields to dict
user_dict.update(self.demographics.to_dict())
user_dict.update(self.category.to_dict())
user_dict.update(self.preferences.to_dict())
user_dict.update(self.values.to_dict())
user_dict.update(self.style.to_dict())
user_dict.update(self.tone.to_dict())
user_dict.update(self.fast_facts.to_dict())
return user_dict
@staticmethod
def write_user_profiles_to_excel(user_profiles, filename):
"""
Writes a list of UserProfile objects to an Excel file.
Args:
user_profiles (list): List of UserProfile objects.
filename (str): Path to the Excel file.
"""
if not user_profiles:
print("No user profiles to write.")
return
# Convert user profiles to a list of dictionaries
profiles_data = [user_profile.to_dict() for user_profile in user_profiles]
# Create a DataFrame
df = pd.DataFrame(profiles_data)
# Write the DataFrame to an Excel file
df.to_excel(filename, index=False)
print(f"User profiles successfully written to {filename}")
@staticmethod
def read_user_profiles_from_excel(filename):
"""
Reads a list of UserProfile objects from an Excel file.
Args:
filename (str): Path to the Excel file.
Returns:
list: List of UserProfile objects.
"""
user_profiles = []
# Read the Excel file into a DataFrame
df = pd.read_excel(filename)
# Iterate over the rows in the DataFrame
for _, row in df.iterrows():
user_profile = UserProfile()
# Set basic fields for UserProfile if they are present
if row.get('ID') is not None:
user_profile.set_ID(row.get('ID'))
if row.get('Name') is not None:
user_profile.set_name(row.get('Name'))
if row.get('Interview_Date') is not None:
user_profile.set_interview_date(row.get('Interview_Date'))
if row.get('Transcript_Input_File') is not None:
user_profile.set_transcript_file(row.get('Transcript_Input_File'))
# Set fields for Demographics if present
for field_name in user_profile.demographics.to_dict().keys():
value = row.get(field_name)
if value is not None and not (isinstance(value, float) and np.isnan(value)):
user_profile.set_demographics_field(field_name, value)
# Set fields for Category if present
for field_name in user_profile.category.to_dict().keys():
value = row.get(field_name)
if value is not None and not (isinstance(value, float) and np.isnan(value)):
user_profile.set_category_field(field_name, value)
# Set fields for Preferences if present
for field_name in user_profile.preferences.to_dict().keys():
value = row.get(field_name)
if value is not None and not (isinstance(value, float) and np.isnan(value)):
user_profile.set_preferences_field(field_name, value)
# Set fields for Values if present
for field_name in user_profile.values.to_dict().keys():
value = row.get(field_name)
if value is not None and not (isinstance(value, float) and np.isnan(value)):
user_profile.set_values_field(field_name, value)
# Set fields for Style if present
for field_name in user_profile.style.to_dict().keys():
value = row.get(field_name)
if value is not None and not (isinstance(value, float) and np.isnan(value)):
user_profile.set_style_field(field_name, value)
# Set fields for Tone if present
for field_name in user_profile.tone.to_dict().keys():
value = row.get(field_name)
if value is not None and not (isinstance(value, float) and np.isnan(value)):
user_profile.set_tone_field(field_name, value)
user_profiles.append(user_profile)
print(f"User profiles successfully read from {filename}")
return user_profiles
class UserProfileDetail:
def __init__(self, key, original_value, qa_check, value):
"""
Initialize a UserProfileDetail entry.
"""
self.key = key
self.original_value = original_value
self.qa_check = qa_check
self.value = value
def __repr__(self):
fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
@staticmethod
def filter_profiles(profiles, key=None, qa_check=None, value=None):
"""
Static method to filter user profiles by key, QA check status, or value.
Args:
profiles (list): List of UserProfileDetail objects.
key (str, optional): The key to filter by.
qa_check (str, optional): The QA check status to filter by.
value (str, optional): The value to filter by.
Returns:
list: A list of UserProfileDetail entries that match the criteria.
"""
return [
profile for profile in profiles
if (key is None or profile.key == key) and
(qa_check is None or profile.qa_check == qa_check) and
(value is None or profile.value == value)
]
@staticmethod
def generate_user_profiles(file_path):
"""
Static method to generate a list of UserProfileDetail entries from an Excel (.xlsx) file.
Args:
file_path (str): The path to the Excel file containing user profile entries.
Returns:
list: A list of UserProfileDetail objects generated from the file.
"""
# Read the Excel file
df = pd.read_excel(file_path)
profiles = []
for _, row in df.iterrows():
profile = UserProfileDetail(
key=row['Key'],
original_value=row['Value'],
qa_check=row['QA Check'],
value=row['Revised Value']
)
profiles.append(profile)
return profiles