File size: 1,088 Bytes
79d167d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd

def preprocess_data(data: pd.DataFrame):
    """Clean and preprocess HR analytics data."""
    
    # Convert date columns
    date_columns = ['Survey Date', 'StartDate', 'DOB']
    for col in date_columns:
        if col in data.columns:
            data[col] = pd.to_datetime(data[col], errors='coerce')

    # Calculate Age from DOB
    if 'DOB' in data.columns:
        data['Age'] = (pd.to_datetime("today") - data['DOB']).dt.days // 365

    # Convert Performance Score to Numeric
    def clean_performance_score(value):
        score_map = {"Exceeds": 5, "Fully Meets": 4, "Needs Improvement": 3, "PIP": 2}
        if isinstance(value, (int, float)):
            return value
        if isinstance(value, str):
            value = value.strip()
            return score_map.get(value, None)
        return None

    if 'Performance Score' in data.columns:
        data['Performance Score'] = data['Performance Score'].apply(clean_performance_score)
        data['Performance Score'] = pd.to_numeric(data['Performance Score'], errors='coerce')

    return data