import pandas as pd import joblib from huggingface_hub import hf_hub_download from sklearn.impute import SimpleImputer import numpy as np # Define the Hugging Face repository ID and filenames REPO_ID = "DP1110/mlp-accessibility-model" MODEL_FILENAME = 'mlp_regressor_model.joblib' IMPUTER_FILENAME = 'simple_imputer.joblib' # Define the feature columns, matching the training data order FEATURE_COLUMNS = ['% ASF (Euclidean)', '% Built-Up Area', '% ASF (Network)', '% ASF from Bus Stops ', '% ASF from Bus Stops', '% ASF (Network) '] # Download the model and imputer from Hugging Face Hub try: model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME) imputer_path = hf_hub_download(repo_id=REPO_ID, filename=IMPUTER_FILENAME) except Exception as e: print('Error downloading files from Hugging Face Hub:', e) model_path = None imputer_path = None # Load the model and imputer loaded_mlp_model = None loaded_imputer = None if model_path: loaded_mlp_model = joblib.load(model_path) print('MLP model loaded from', model_path) if imputer_path: loaded_imputer = joblib.load(imputer_path) print('Imputer loaded from', imputer_path) def predict_accessibility_score(new_data_df: pd.DataFrame) -> pd.Series: """ Predicts the overall accessibility score for new, raw input data. Args: new_data_df (pd.DataFrame): A DataFrame containing new data with the same feature columns as the training data, before imputation. Returns: pd.Series: Predicted overall accessibility scores. """ if loaded_mlp_model is None or loaded_imputer is None: raise RuntimeError('Model or imputer not loaded. Cannot make predictions.') # Ensure the order of columns matches the training data # Handle cases where new_data_df might have different columns or order missing_cols = set(FEATURE_COLUMNS) - set(new_data_df.columns) for c in missing_cols: new_data_df[c] = np.nan # Or appropriate default value # Reorder columns to match the training features new_data_df = new_data_df[FEATURE_COLUMNS] # Apply the loaded imputer to handle missing values in new data new_data_imputed = loaded_imputer.transform(new_data_df) new_data_imputed_df = pd.DataFrame(new_data_imputed, columns=FEATURE_COLUMNS) # Make predictions using the loaded MLP model predictions = loaded_mlp_model.predict(new_data_imputed_df) return pd.Series(predictions, name='Predicted_Overall_Accessibility_Score') if __name__ == '__main__': print("\n--- Demonstrating prediction with sample data ---") # Create a sample DataFrame for new raw data. # This should mimic the structure of the features used for training. sample_data_dict = {} for i, col_name in enumerate(FEATURE_COLUMNS): # Assign arbitrary values for demonstration sample_data_dict[col_name] = [0.5 + (i * 0.005) % 0.1] # Varying slightly for demonstration new_sample_data = pd.DataFrame(sample_data_dict) # Make predictions using the defined function try: predictions = predict_accessibility_score(new_sample_data) # Display the new sample data and the predictions print("\n--- New Sample Data for Prediction ---") print(new_sample_data) print("\n--- Predicted Overall Accessibility Score ---") print(predictions) except Exception as e: print('Error during prediction:', e)