Spaces:

AmrGaberr
/

Injury_Prediction_System

Sleeping

App Files Files

Injury_Prediction_System / deployment /api /predict.py

AmrGaberr

Upload 10 files

05851ff verified 9 months ago

raw

history blame

5.88 kB

	import pandas as pd
	import numpy as np
	import joblib
	import os
	from recommendation import generate_recommendations

	# Define mappings for categorical variables (consistent with CalibrateLikelihood.ipynb)
	gender_mapping = {"Male": 0, "Female": 1}
	experience_mapping = {"Beginner": 0, "Intermediate": 1, "Advanced": 2, "Professional": 3}
	injury_type_mapping = {"None": 0, "Sprain": 1, "Ligament Tear": 2, "Tendonitis": 3, "Strain": 4, "Fracture": 5}
	sport_type_mapping = {"Football": 0, "Basketball": 1, "Swimming": 2, "Tennis": 3, "Running": 4}
	risk_level_mapping = {0: "High", 1: "Low", 2: "Medium"}

	# Define model directory using relative path
	MODEL_DIR = os.path.join(os.path.dirname(__file__), "..", "model")

	# Load models, encoders, and calibration threshold
	try:
	rf_model = joblib.load(os.path.join(MODEL_DIR, "rf_injury_model.pkl"))
	xgb_model = joblib.load(os.path.join(MODEL_DIR, "xgboost_injury_model.pkl"))
	calibrator = joblib.load(os.path.join(MODEL_DIR, "likelihood_calibrator.pkl"))
	rf_encoder = joblib.load(os.path.join(MODEL_DIR, "rf_target_encoder.pkl"))
	xgb_encoder = joblib.load(os.path.join(MODEL_DIR, "xgb_target_encoder.pkl"))
	low_threshold = joblib.load(os.path.join(MODEL_DIR, "calibration_threshold.pkl"))
	except FileNotFoundError as e:
	raise FileNotFoundError(f"Model file not found: {str(e)}. Ensure all model files are in {MODEL_DIR}.")

	# Verify encoder consistency
	if not (rf_encoder.classes_ == xgb_encoder.classes_).all():
	raise ValueError("RandomForest and XGBoost encoders have inconsistent class mappings.")

	def preprocess_data(data_dict):
	"""
	Preprocess the input data consistently with CalibrateLikelihood.ipynb.

	Args:
	data_dict (dict): Input dictionary containing athlete data.

	Returns:
	pd.DataFrame: Preprocessed features ready for prediction.
	"""
	try:
	df = pd.DataFrame([data_dict])

	df["Gender"] = df["Gender"].map(gender_mapping).fillna(0).astype(int)
	df["Sport_Type"] = df["Sport_Type"].map(sport_type_mapping).fillna(0).astype(int)
	df["Experience_Level"] = df["Experience_Level"].map(experience_mapping).fillna(0).astype(int)
	df["Previous_Injury_Type"] = df["Previous_Injury_Type"].fillna("None")
	df["Previous_Injury_Type"] = df["Previous_Injury_Type"].map(injury_type_mapping).fillna(0).astype(int)

	df["Total_Weekly_Training_Hours"] = df["Total_Weekly_Training_Hours"].replace(0, 0.1)

	df["Intensity_Ratio"] = df["High_Intensity_Training_Hours"] / df["Total_Weekly_Training_Hours"]
	df["Recovery_Per_Training"] = df["Recovery_Time_Between_Sessions"] / df["Total_Weekly_Training_Hours"]

	features = [
	"Age", "Gender", "Sport_Type", "Experience_Level", "Flexibility_Score",
	"Total_Weekly_Training_Hours", "High_Intensity_Training_Hours", "Strength_Training_Frequency",
	"Recovery_Time_Between_Sessions", "Training_Load_Score", "Sprint_Speed", "Endurance_Score",
	"Agility_Score", "Fatigue_Level", "Previous_Injury_Count", "Previous_Injury_Type",
	"Intensity_Ratio", "Recovery_Per_Training"
	]

	missing_features = [f for f in features if f not in df.columns]
	if missing_features:
	raise ValueError(f"Missing required features: {missing_features}")

	X = df[features]
	return X
	except Exception as e:
	raise Exception(f"Error in preprocessing data: {str(e)}")

	def predict_injury_risk(user_input: dict) -> dict:
	"""
	Predict injury risk using the ensemble of RandomForest and XGBoost models with calibrated probabilities.
	Uses a data-driven threshold from calibration to classify Low vs. Medium risks.

	Args:
	user_input (dict): Input dictionary containing athlete data.

	Returns:
	dict: Prediction results including risk level, likelihood, and recommendations.
	"""
	print("User Input Received:\n", user_input)

	# Preprocess input
	features = preprocess_data(user_input)
	print("FINAL Features After Preprocessing:", list(features.columns))
	print("Preprocessed Input:\n", features)

	# Predict probabilities from both models
	rf_probs = rf_model.predict_proba(features)
	xgb_probs = xgb_model.predict_proba(features)
	print("RandomForest Probabilities (High, Low, Medium):", rf_probs)
	print("XGBoost Probabilities (High, Low, Medium):", xgb_probs)

	# Ensemble average
	avg_probs = (rf_probs + xgb_probs) / 2
	predicted_class = np.argmax(avg_probs, axis=1)[0]
	confidence = np.max(avg_probs, axis=1)[0]
	predicted_label = rf_encoder.inverse_transform([predicted_class])[0]
	print("Ensemble Probabilities (High, Low, Medium):", avg_probs)
	print("Predicted Class:", predicted_class, "Label:", predicted_label, "Confidence:", confidence)

	# Calibrate the probability using the likelihood calibrator
	calib_data = pd.DataFrame({
	"prob_high": [avg_probs[0][0]],
	"prob_low": [avg_probs[0][1]],
	"prob_medium": [avg_probs[0][2]]
	})
	injury_likelihood = calibrator.predict_proba(calib_data)[:, 1][0] * 100
	print("Calibrated Injury Likelihood (%):", injury_likelihood)

	# Adjust prediction using dynamic threshold based on raw prob_low
	if avg_probs[0][1] > low_threshold and predicted_label != "Low":
	print(f"⚠️ Calibration adjustment: Low probability ({avg_probs[0][1]:.2f}) above threshold ({low_threshold:.2f}) — classifying as Low.")
	predicted_label = "Low"

	# Generate recommendations
	recommendations = generate_recommendations(user_input)

	return {
	"predicted_risk_level": predicted_label,
	"injury_likelihood_percent": round(injury_likelihood, 2),
	"model_class_probability": round(confidence * 100, 2),
	"recommendations": recommendations
	}