Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| def preprocess_data(data: pd.DataFrame): | |
| """Clean and preprocess HR analytics data.""" | |
| # Convert date columns | |
| date_columns = ['Survey Date', 'StartDate', 'DOB'] | |
| for col in date_columns: | |
| if col in data.columns: | |
| data[col] = pd.to_datetime(data[col], errors='coerce') | |
| # Calculate Age from DOB | |
| if 'DOB' in data.columns: | |
| data['Age'] = (pd.to_datetime("today") - data['DOB']).dt.days // 365 | |
| # Convert Performance Score to Numeric | |
| def clean_performance_score(value): | |
| score_map = {"Exceeds": 5, "Fully Meets": 4, "Needs Improvement": 3, "PIP": 2} | |
| if isinstance(value, (int, float)): | |
| return value | |
| if isinstance(value, str): | |
| value = value.strip() | |
| return score_map.get(value, None) | |
| return None | |
| if 'Performance Score' in data.columns: | |
| data['Performance Score'] = data['Performance Score'].apply(clean_performance_score) | |
| data['Performance Score'] = pd.to_numeric(data['Performance Score'], errors='coerce') | |
| return data | |