Upload inference_script.py with huggingface_hub
Browse files- inference_script.py +90 -0
inference_script.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import joblib
|
| 3 |
+
from huggingface_hub import hf_hub_download
|
| 4 |
+
from sklearn.impute import SimpleImputer
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
# Define the Hugging Face repository ID and filenames
|
| 8 |
+
REPO_ID = "DP1110/mlp-accessibility-model"
|
| 9 |
+
MODEL_FILENAME = 'mlp_regressor_model.joblib'
|
| 10 |
+
IMPUTER_FILENAME = 'simple_imputer.joblib'
|
| 11 |
+
|
| 12 |
+
# Define the feature columns, matching the training data order
|
| 13 |
+
FEATURE_COLUMNS = ['% ASF (Euclidean)', '% Built-Up Area', '% ASF (Network)', '% ASF from Bus Stops ', '% ASF from Bus Stops', '% ASF (Network) ']
|
| 14 |
+
|
| 15 |
+
# Download the model and imputer from Hugging Face Hub
|
| 16 |
+
try:
|
| 17 |
+
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
|
| 18 |
+
imputer_path = hf_hub_download(repo_id=REPO_ID, filename=IMPUTER_FILENAME)
|
| 19 |
+
except Exception as e:
|
| 20 |
+
print('Error downloading files from Hugging Face Hub:', e)
|
| 21 |
+
model_path = None
|
| 22 |
+
imputer_path = None
|
| 23 |
+
|
| 24 |
+
# Load the model and imputer
|
| 25 |
+
loaded_mlp_model = None
|
| 26 |
+
loaded_imputer = None
|
| 27 |
+
|
| 28 |
+
if model_path:
|
| 29 |
+
loaded_mlp_model = joblib.load(model_path)
|
| 30 |
+
print('MLP model loaded from', model_path)
|
| 31 |
+
|
| 32 |
+
if imputer_path:
|
| 33 |
+
loaded_imputer = joblib.load(imputer_path)
|
| 34 |
+
print('Imputer loaded from', imputer_path)
|
| 35 |
+
|
| 36 |
+
def predict_accessibility_score(new_data_df: pd.DataFrame) -> pd.Series:
|
| 37 |
+
"""
|
| 38 |
+
Predicts the overall accessibility score for new, raw input data.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
new_data_df (pd.DataFrame): A DataFrame containing new data with the same
|
| 42 |
+
feature columns as the training data, before imputation.
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
pd.Series: Predicted overall accessibility scores.
|
| 46 |
+
"""
|
| 47 |
+
if loaded_mlp_model is None or loaded_imputer is None:
|
| 48 |
+
raise RuntimeError('Model or imputer not loaded. Cannot make predictions.')
|
| 49 |
+
|
| 50 |
+
# Ensure the order of columns matches the training data
|
| 51 |
+
# Handle cases where new_data_df might have different columns or order
|
| 52 |
+
missing_cols = set(FEATURE_COLUMNS) - set(new_data_df.columns)
|
| 53 |
+
for c in missing_cols:
|
| 54 |
+
new_data_df[c] = np.nan # Or appropriate default value
|
| 55 |
+
|
| 56 |
+
# Reorder columns to match the training features
|
| 57 |
+
new_data_df = new_data_df[FEATURE_COLUMNS]
|
| 58 |
+
|
| 59 |
+
# Apply the loaded imputer to handle missing values in new data
|
| 60 |
+
new_data_imputed = loaded_imputer.transform(new_data_df)
|
| 61 |
+
new_data_imputed_df = pd.DataFrame(new_data_imputed, columns=FEATURE_COLUMNS)
|
| 62 |
+
|
| 63 |
+
# Make predictions using the loaded MLP model
|
| 64 |
+
predictions = loaded_mlp_model.predict(new_data_imputed_df)
|
| 65 |
+
|
| 66 |
+
return pd.Series(predictions, name='Predicted_Overall_Accessibility_Score')
|
| 67 |
+
|
| 68 |
+
if __name__ == '__main__':
|
| 69 |
+
print("\n--- Demonstrating prediction with sample data ---")
|
| 70 |
+
|
| 71 |
+
# Create a sample DataFrame for new raw data.
|
| 72 |
+
# This should mimic the structure of the features used for training.
|
| 73 |
+
sample_data_dict = {}
|
| 74 |
+
for i, col_name in enumerate(FEATURE_COLUMNS):
|
| 75 |
+
# Assign arbitrary values for demonstration
|
| 76 |
+
sample_data_dict[col_name] = [0.5 + (i * 0.005) % 0.1] # Varying slightly for demonstration
|
| 77 |
+
|
| 78 |
+
new_sample_data = pd.DataFrame(sample_data_dict)
|
| 79 |
+
|
| 80 |
+
# Make predictions using the defined function
|
| 81 |
+
try:
|
| 82 |
+
predictions = predict_accessibility_score(new_sample_data)
|
| 83 |
+
|
| 84 |
+
# Display the new sample data and the predictions
|
| 85 |
+
print("\n--- New Sample Data for Prediction ---")
|
| 86 |
+
print(new_sample_data)
|
| 87 |
+
print("\n--- Predicted Overall Accessibility Score ---")
|
| 88 |
+
print(predictions)
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print('Error during prediction:', e)
|