import pandas as pd def preprocess_data(data: pd.DataFrame): """Clean and preprocess HR analytics data.""" # Convert date columns date_columns = ['Survey Date', 'StartDate', 'DOB'] for col in date_columns: if col in data.columns: data[col] = pd.to_datetime(data[col], errors='coerce') # Calculate Age from DOB if 'DOB' in data.columns: data['Age'] = (pd.to_datetime("today") - data['DOB']).dt.days // 365 # Convert Performance Score to Numeric def clean_performance_score(value): score_map = {"Exceeds": 5, "Fully Meets": 4, "Needs Improvement": 3, "PIP": 2} if isinstance(value, (int, float)): return value if isinstance(value, str): value = value.strip() return score_map.get(value, None) return None if 'Performance Score' in data.columns: data['Performance Score'] = data['Performance Score'].apply(clean_performance_score) data['Performance Score'] = pd.to_numeric(data['Performance Score'], errors='coerce') return data