DP1110
/

mlp-accessibility-model

Joblib

Model card Files Files and versions

xet

Community

DP1110 commited on 19 days ago

Commit

d31755b

verified ·

1 Parent(s): 3cc1637

Upload inference_script.py with huggingface_hub

Browse files

Files changed (1) hide show

inference_script.py +90 -0

inference_script.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import pandas as pd
+import joblib
+from huggingface_hub import hf_hub_download
+from sklearn.impute import SimpleImputer
+import numpy as np
+# Define the Hugging Face repository ID and filenames
+REPO_ID = "DP1110/mlp-accessibility-model"
+MODEL_FILENAME = 'mlp_regressor_model.joblib'
+IMPUTER_FILENAME = 'simple_imputer.joblib'
+# Define the feature columns, matching the training data order
+FEATURE_COLUMNS = ['% ASF (Euclidean)', '% Built-Up Area', '% ASF (Network)', '% ASF from Bus Stops ', '% ASF from Bus Stops', '% ASF (Network) ']
+# Download the model and imputer from Hugging Face Hub
+try:
+    model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
+    imputer_path = hf_hub_download(repo_id=REPO_ID, filename=IMPUTER_FILENAME)
+except Exception as e:
+    print('Error downloading files from Hugging Face Hub:', e)
+    model_path = None
+    imputer_path = None
+# Load the model and imputer
+loaded_mlp_model = None
+loaded_imputer = None
+if model_path:
+    loaded_mlp_model = joblib.load(model_path)
+    print('MLP model loaded from', model_path)
+if imputer_path:
+    loaded_imputer = joblib.load(imputer_path)
+    print('Imputer loaded from', imputer_path)
+def predict_accessibility_score(new_data_df: pd.DataFrame) -> pd.Series:
+    """
+    Predicts the overall accessibility score for new, raw input data.
+    Args:
+        new_data_df (pd.DataFrame): A DataFrame containing new data with the same
+                                    feature columns as the training data, before imputation.
+    Returns:
+        pd.Series: Predicted overall accessibility scores.
+    """
+    if loaded_mlp_model is None or loaded_imputer is None:
+        raise RuntimeError('Model or imputer not loaded. Cannot make predictions.')
+    # Ensure the order of columns matches the training data
+    # Handle cases where new_data_df might have different columns or order
+    missing_cols = set(FEATURE_COLUMNS) - set(new_data_df.columns)
+    for c in missing_cols:
+        new_data_df[c] = np.nan  # Or appropriate default value
+    # Reorder columns to match the training features
+    new_data_df = new_data_df[FEATURE_COLUMNS]
+    # Apply the loaded imputer to handle missing values in new data
+    new_data_imputed = loaded_imputer.transform(new_data_df)
+    new_data_imputed_df = pd.DataFrame(new_data_imputed, columns=FEATURE_COLUMNS)
+    # Make predictions using the loaded MLP model
+    predictions = loaded_mlp_model.predict(new_data_imputed_df)
+    return pd.Series(predictions, name='Predicted_Overall_Accessibility_Score')
+if __name__ == '__main__':
+    print("\n--- Demonstrating prediction with sample data ---")
+    # Create a sample DataFrame for new raw data.
+    # This should mimic the structure of the features used for training.
+    sample_data_dict = {}
+    for i, col_name in enumerate(FEATURE_COLUMNS):
+        # Assign arbitrary values for demonstration
+        sample_data_dict[col_name] = [0.5 + (i * 0.005) % 0.1]  # Varying slightly for demonstration
+    new_sample_data = pd.DataFrame(sample_data_dict)
+    # Make predictions using the defined function
+    try:
+        predictions = predict_accessibility_score(new_sample_data)
+        # Display the new sample data and the predictions
+        print("\n--- New Sample Data for Prediction ---")
+        print(new_sample_data)
+        print("\n--- Predicted Overall Accessibility Score ---")
+        print(predictions)
+    except Exception as e:
+        print('Error during prediction:', e)