|
|
import pandas as pd |
|
|
import joblib |
|
|
from huggingface_hub import hf_hub_download |
|
|
from sklearn.impute import SimpleImputer |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
REPO_ID = "DP1110/mlp-accessibility-model" |
|
|
MODEL_FILENAME = 'mlp_regressor_model.joblib' |
|
|
IMPUTER_FILENAME = 'simple_imputer.joblib' |
|
|
|
|
|
|
|
|
FEATURE_COLUMNS = ['% ASF (Euclidean)', '% Built-Up Area', '% ASF (Network)', '% ASF from Bus Stops ', '% ASF from Bus Stops', '% ASF (Network) '] |
|
|
|
|
|
|
|
|
try: |
|
|
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME) |
|
|
imputer_path = hf_hub_download(repo_id=REPO_ID, filename=IMPUTER_FILENAME) |
|
|
except Exception as e: |
|
|
print('Error downloading files from Hugging Face Hub:', e) |
|
|
model_path = None |
|
|
imputer_path = None |
|
|
|
|
|
|
|
|
loaded_mlp_model = None |
|
|
loaded_imputer = None |
|
|
|
|
|
if model_path: |
|
|
loaded_mlp_model = joblib.load(model_path) |
|
|
print('MLP model loaded from', model_path) |
|
|
|
|
|
if imputer_path: |
|
|
loaded_imputer = joblib.load(imputer_path) |
|
|
print('Imputer loaded from', imputer_path) |
|
|
|
|
|
def predict_accessibility_score(new_data_df: pd.DataFrame) -> pd.Series: |
|
|
""" |
|
|
Predicts the overall accessibility score for new, raw input data. |
|
|
|
|
|
Args: |
|
|
new_data_df (pd.DataFrame): A DataFrame containing new data with the same |
|
|
feature columns as the training data, before imputation. |
|
|
|
|
|
Returns: |
|
|
pd.Series: Predicted overall accessibility scores. |
|
|
""" |
|
|
if loaded_mlp_model is None or loaded_imputer is None: |
|
|
raise RuntimeError('Model or imputer not loaded. Cannot make predictions.') |
|
|
|
|
|
|
|
|
|
|
|
missing_cols = set(FEATURE_COLUMNS) - set(new_data_df.columns) |
|
|
for c in missing_cols: |
|
|
new_data_df[c] = np.nan |
|
|
|
|
|
|
|
|
new_data_df = new_data_df[FEATURE_COLUMNS] |
|
|
|
|
|
|
|
|
new_data_imputed = loaded_imputer.transform(new_data_df) |
|
|
new_data_imputed_df = pd.DataFrame(new_data_imputed, columns=FEATURE_COLUMNS) |
|
|
|
|
|
|
|
|
predictions = loaded_mlp_model.predict(new_data_imputed_df) |
|
|
|
|
|
return pd.Series(predictions, name='Predicted_Overall_Accessibility_Score') |
|
|
|
|
|
if __name__ == '__main__': |
|
|
print("\n--- Demonstrating prediction with sample data ---") |
|
|
|
|
|
|
|
|
|
|
|
sample_data_dict = {} |
|
|
for i, col_name in enumerate(FEATURE_COLUMNS): |
|
|
|
|
|
sample_data_dict[col_name] = [0.5 + (i * 0.005) % 0.1] |
|
|
|
|
|
new_sample_data = pd.DataFrame(sample_data_dict) |
|
|
|
|
|
|
|
|
try: |
|
|
predictions = predict_accessibility_score(new_sample_data) |
|
|
|
|
|
|
|
|
print("\n--- New Sample Data for Prediction ---") |
|
|
print(new_sample_data) |
|
|
print("\n--- Predicted Overall Accessibility Score ---") |
|
|
print(predictions) |
|
|
except Exception as e: |
|
|
print('Error during prediction:', e) |
|
|
|