DP1110
/

mlp-accessibility-model

Model card Files Files and versions

mlp-accessibility-model / inference_script.py

DP1110's picture

Upload inference_script.py with huggingface_hub

d31755b verified 19 days ago

history blame contribute delete

3.49 kB

	import pandas as pd
	import joblib
	from huggingface_hub import hf_hub_download
	from sklearn.impute import SimpleImputer
	import numpy as np

	# Define the Hugging Face repository ID and filenames
	REPO_ID = "DP1110/mlp-accessibility-model"
	MODEL_FILENAME = 'mlp_regressor_model.joblib'
	IMPUTER_FILENAME = 'simple_imputer.joblib'

	# Define the feature columns, matching the training data order
	FEATURE_COLUMNS = ['% ASF (Euclidean)', '% Built-Up Area', '% ASF (Network)', '% ASF from Bus Stops ', '% ASF from Bus Stops', '% ASF (Network) ']

	# Download the model and imputer from Hugging Face Hub
	try:
	model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
	imputer_path = hf_hub_download(repo_id=REPO_ID, filename=IMPUTER_FILENAME)
	except Exception as e:
	print('Error downloading files from Hugging Face Hub:', e)
	model_path = None
	imputer_path = None

	# Load the model and imputer
	loaded_mlp_model = None
	loaded_imputer = None

	if model_path:
	loaded_mlp_model = joblib.load(model_path)
	print('MLP model loaded from', model_path)

	if imputer_path:
	loaded_imputer = joblib.load(imputer_path)
	print('Imputer loaded from', imputer_path)

	def predict_accessibility_score(new_data_df: pd.DataFrame) -> pd.Series:
	"""
	Predicts the overall accessibility score for new, raw input data.

	Args:
	new_data_df (pd.DataFrame): A DataFrame containing new data with the same
	feature columns as the training data, before imputation.

	Returns:
	pd.Series: Predicted overall accessibility scores.
	"""
	if loaded_mlp_model is None or loaded_imputer is None:
	raise RuntimeError('Model or imputer not loaded. Cannot make predictions.')

	# Ensure the order of columns matches the training data
	# Handle cases where new_data_df might have different columns or order
	missing_cols = set(FEATURE_COLUMNS) - set(new_data_df.columns)
	for c in missing_cols:
	new_data_df[c] = np.nan # Or appropriate default value

	# Reorder columns to match the training features
	new_data_df = new_data_df[FEATURE_COLUMNS]

	# Apply the loaded imputer to handle missing values in new data
	new_data_imputed = loaded_imputer.transform(new_data_df)
	new_data_imputed_df = pd.DataFrame(new_data_imputed, columns=FEATURE_COLUMNS)

	# Make predictions using the loaded MLP model
	predictions = loaded_mlp_model.predict(new_data_imputed_df)

	return pd.Series(predictions, name='Predicted_Overall_Accessibility_Score')

	if __name__ == '__main__':
	print("\n--- Demonstrating prediction with sample data ---")

	# Create a sample DataFrame for new raw data.
	# This should mimic the structure of the features used for training.
	sample_data_dict = {}
	for i, col_name in enumerate(FEATURE_COLUMNS):
	# Assign arbitrary values for demonstration
	sample_data_dict[col_name] = [0.5 + (i * 0.005) % 0.1] # Varying slightly for demonstration

	new_sample_data = pd.DataFrame(sample_data_dict)

	# Make predictions using the defined function
	try:
	predictions = predict_accessibility_score(new_sample_data)

	# Display the new sample data and the predictions
	print("\n--- New Sample Data for Prediction ---")
	print(new_sample_data)
	print("\n--- Predicted Overall Accessibility Score ---")
	print(predictions)
	except Exception as e:
	print('Error during prediction:', e)