DP1110 commited on
Commit
d31755b
·
verified ·
1 Parent(s): 3cc1637

Upload inference_script.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference_script.py +90 -0
inference_script.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import joblib
3
+ from huggingface_hub import hf_hub_download
4
+ from sklearn.impute import SimpleImputer
5
+ import numpy as np
6
+
7
+ # Define the Hugging Face repository ID and filenames
8
+ REPO_ID = "DP1110/mlp-accessibility-model"
9
+ MODEL_FILENAME = 'mlp_regressor_model.joblib'
10
+ IMPUTER_FILENAME = 'simple_imputer.joblib'
11
+
12
+ # Define the feature columns, matching the training data order
13
+ FEATURE_COLUMNS = ['% ASF (Euclidean)', '% Built-Up Area', '% ASF (Network)', '% ASF from Bus Stops ', '% ASF from Bus Stops', '% ASF (Network) ']
14
+
15
+ # Download the model and imputer from Hugging Face Hub
16
+ try:
17
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
18
+ imputer_path = hf_hub_download(repo_id=REPO_ID, filename=IMPUTER_FILENAME)
19
+ except Exception as e:
20
+ print('Error downloading files from Hugging Face Hub:', e)
21
+ model_path = None
22
+ imputer_path = None
23
+
24
+ # Load the model and imputer
25
+ loaded_mlp_model = None
26
+ loaded_imputer = None
27
+
28
+ if model_path:
29
+ loaded_mlp_model = joblib.load(model_path)
30
+ print('MLP model loaded from', model_path)
31
+
32
+ if imputer_path:
33
+ loaded_imputer = joblib.load(imputer_path)
34
+ print('Imputer loaded from', imputer_path)
35
+
36
+ def predict_accessibility_score(new_data_df: pd.DataFrame) -> pd.Series:
37
+ """
38
+ Predicts the overall accessibility score for new, raw input data.
39
+
40
+ Args:
41
+ new_data_df (pd.DataFrame): A DataFrame containing new data with the same
42
+ feature columns as the training data, before imputation.
43
+
44
+ Returns:
45
+ pd.Series: Predicted overall accessibility scores.
46
+ """
47
+ if loaded_mlp_model is None or loaded_imputer is None:
48
+ raise RuntimeError('Model or imputer not loaded. Cannot make predictions.')
49
+
50
+ # Ensure the order of columns matches the training data
51
+ # Handle cases where new_data_df might have different columns or order
52
+ missing_cols = set(FEATURE_COLUMNS) - set(new_data_df.columns)
53
+ for c in missing_cols:
54
+ new_data_df[c] = np.nan # Or appropriate default value
55
+
56
+ # Reorder columns to match the training features
57
+ new_data_df = new_data_df[FEATURE_COLUMNS]
58
+
59
+ # Apply the loaded imputer to handle missing values in new data
60
+ new_data_imputed = loaded_imputer.transform(new_data_df)
61
+ new_data_imputed_df = pd.DataFrame(new_data_imputed, columns=FEATURE_COLUMNS)
62
+
63
+ # Make predictions using the loaded MLP model
64
+ predictions = loaded_mlp_model.predict(new_data_imputed_df)
65
+
66
+ return pd.Series(predictions, name='Predicted_Overall_Accessibility_Score')
67
+
68
+ if __name__ == '__main__':
69
+ print("\n--- Demonstrating prediction with sample data ---")
70
+
71
+ # Create a sample DataFrame for new raw data.
72
+ # This should mimic the structure of the features used for training.
73
+ sample_data_dict = {}
74
+ for i, col_name in enumerate(FEATURE_COLUMNS):
75
+ # Assign arbitrary values for demonstration
76
+ sample_data_dict[col_name] = [0.5 + (i * 0.005) % 0.1] # Varying slightly for demonstration
77
+
78
+ new_sample_data = pd.DataFrame(sample_data_dict)
79
+
80
+ # Make predictions using the defined function
81
+ try:
82
+ predictions = predict_accessibility_score(new_sample_data)
83
+
84
+ # Display the new sample data and the predictions
85
+ print("\n--- New Sample Data for Prediction ---")
86
+ print(new_sample_data)
87
+ print("\n--- Predicted Overall Accessibility Score ---")
88
+ print(predictions)
89
+ except Exception as e:
90
+ print('Error during prediction:', e)