|
|
|
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
class InputValidationError(Exception): |
|
|
"""Custom exception for input validation errors.""" |
|
|
pass |
|
|
|
|
|
|
|
|
def validate_and_prepare_input(input_df: pd.DataFrame, model): |
|
|
""" |
|
|
Validates input dataframe against model expected features. |
|
|
Returns a clean dataframe ready for prediction. |
|
|
""" |
|
|
|
|
|
if not isinstance(input_df, pd.DataFrame): |
|
|
raise InputValidationError("Input must be a pandas DataFrame.") |
|
|
|
|
|
|
|
|
try: |
|
|
expected_features = model.get_booster().feature_names |
|
|
except Exception: |
|
|
raise InputValidationError("Unable to retrieve model feature names.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
missing_cols = set(expected_features) - set(input_df.columns) |
|
|
if missing_cols: |
|
|
raise InputValidationError( |
|
|
f"Missing required columns: {list(missing_cols)}" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extra_cols = set(input_df.columns) - set(expected_features) |
|
|
if extra_cols: |
|
|
raise InputValidationError( |
|
|
f"Unexpected columns provided: {list(extra_cols)}" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for col in expected_features: |
|
|
if not pd.api.types.is_numeric_dtype(input_df[col]): |
|
|
raise InputValidationError( |
|
|
f"Column '{col}' must be numeric." |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
input_df = input_df[expected_features] |
|
|
|
|
|
return input_df |
|
|
|