| |
|
| | import pandas as pd |
| |
|
| |
|
| | class InputValidationError(Exception): |
| | """Custom exception for input validation errors.""" |
| | pass |
| |
|
| |
|
| | def validate_and_prepare_input(input_df: pd.DataFrame, model): |
| | """ |
| | Validates input dataframe against model expected features. |
| | Returns a clean dataframe ready for prediction. |
| | """ |
| |
|
| | if not isinstance(input_df, pd.DataFrame): |
| | raise InputValidationError("Input must be a pandas DataFrame.") |
| |
|
| | |
| | try: |
| | expected_features = model.get_booster().feature_names |
| | except Exception: |
| | raise InputValidationError("Unable to retrieve model feature names.") |
| |
|
| | |
| | |
| | |
| | missing_cols = set(expected_features) - set(input_df.columns) |
| | if missing_cols: |
| | raise InputValidationError( |
| | f"Missing required columns: {list(missing_cols)}" |
| | ) |
| |
|
| | |
| | |
| | |
| | extra_cols = set(input_df.columns) - set(expected_features) |
| | if extra_cols: |
| | raise InputValidationError( |
| | f"Unexpected columns provided: {list(extra_cols)}" |
| | ) |
| |
|
| | |
| | |
| | |
| | for col in expected_features: |
| | if not pd.api.types.is_numeric_dtype(input_df[col]): |
| | raise InputValidationError( |
| | f"Column '{col}' must be numeric." |
| | ) |
| |
|
| | |
| | |
| | |
| | input_df = input_df[expected_features] |
| |
|
| | return input_df |
| |
|