import gradio as gr import pickle import pandas as pd import numpy as np from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import BaggingClassifier, RandomForestClassifier from xgboost import XGBClassifier # --- Global Dictionaries & Lists --- LOADED_OBJECTS_LOGREG = {} LOADED_OBJECTS_TREE = {} MODEL_CACHE = {} logreg_ui_feature_order = [] tree_ui_feature_order = [] logreg_inputs_ui_components = [] tree_inputs_ui_components = [] # --- Default Sample Input Data --- # These should contain values for *all* features in their respective UI feature orders # For features that are often imputed, you can put a typical non-NaN value or np.nan if you want the default to show "blank then imputed" # (Gradio handles np.nan for gr.Number by showing it as blank) # Sample for Logistic Regression UI features # Keys should match elements in `logreg_ui_feature_order` DEFAULT_LOGREG_INPUTS = { 'AGE': 28, 'AVG_SPEED': 4.1, # Imputable 'DIST_MILES': 2.6, # Imputable 'DRIVE_FGA': 3.0, 'GP': 70, 'PACE': 99.5, 'PAINT_TOUCHES': 5.5, 'PLAYER_HEIGHT_INCHES': 78, # Imputable 'PLAYER_WEIGHT': 215, # Imputable 'POSS': 7100, 'PULL_UP_FG3A': 1.8 # Imputable # Add any other features that end up in logreg_ui_feature_order } # Sample for Tree Models UI features (this will be longer) # Keys should match elements in `tree_ui_feature_order` DEFAULT_TREE_INPUTS = { 'AGE': 26, 'PLAYER_HEIGHT_INCHES': 79, # Imputable 'PLAYER_WEIGHT': 220, # Imputable 'GP': 65, 'MIN': 32.0, 'USG_PCT': 0.22, 'PACE': 100.0, 'POSS': 7200, 'FGA_PG': 16.0, 'DRIVES': 11.0, 'DRIVE_FGA': 5.5, 'DRIVE_PASSES': 5.0, 'DIST_MILES': 2.7, # Imputable 'AVG_SPEED': 4.2, # Imputable 'PULL_UP_FGA': 5.5, 'PULL_UP_FG3A': 2.2, # Imputable 'TOUCHES': 65.0, 'FRONT_CT_TOUCHES': 33.0, 'AVG_SEC_PER_TOUCH': 2.8, 'AVG_DRIB_PER_TOUCH': 2.1, 'ELBOW_TOUCHES': 2.3, 'POST_TOUCHES': 3.5, 'PAINT_TOUCHES': 6.0 # Add all other features that are in tree_ui_feature_order } # --- Loading Functions (Modified to define UI feature orders earlier) --- def load_logreg_dependencies(): global logreg_ui_feature_order if not LOADED_OBJECTS_LOGREG: print("Loading Logistic Regression dependencies...") try: with open('simple_imputer_logreg.pkl', 'rb') as f: LOADED_OBJECTS_LOGREG['imputer'] = pickle.load(f) with open('cols_to_impute_logreg.pkl', 'rb') as f: LOADED_OBJECTS_LOGREG['cols_to_impute'] = pickle.load(f) with open('stepwise_features_logreg.pkl', 'rb') as f: LOADED_OBJECTS_LOGREG['features'] = pickle.load(f) with open('logisticregression.pkl', 'rb') as f: MODEL_CACHE['logistic_regression'] = pickle.load(f) _model_feats = LOADED_OBJECTS_LOGREG.get('features', []) _impute_cols = LOADED_OBJECTS_LOGREG.get('cols_to_impute', []) # All unique features needed for the UI, then sort for consistent order logreg_ui_feature_order = sorted(list(set(_model_feats + _impute_cols))) print("Logistic Regression dependencies loaded successfully.") print(f"LogReg UI features order set: {logreg_ui_feature_order}") return True except FileNotFoundError as e: # ... (rest of error handling) print(f"Error loading LogReg file: {e.filename}") gr.Warning(f"Required file for Logistic Regression not found: {e.filename}. This model will not work.") return False except Exception as e: print(f"Error loading LogReg dependencies: {e}") gr.Warning(f"Error loading dependencies for Logistic Regression: {e}") return False return True def load_tree_dependencies(): global tree_ui_feature_order if not LOADED_OBJECTS_TREE: print("Loading Tree-based model dependencies...") try: with open('simple_imputer_tree.pkl', 'rb') as f: LOADED_OBJECTS_TREE['imputer'] = pickle.load(f) with open('cols_to_impute_tree.pkl', 'rb') as f: LOADED_OBJECTS_TREE['cols_to_impute'] = pickle.load(f) with open('tree_model_feature_columns.pkl', 'rb') as f: LOADED_OBJECTS_TREE['features'] = pickle.load(f) tree_ui_feature_order = LOADED_OBJECTS_TREE['features'] tree_model_files = { # ... (model loading) "Optimized Decision Tree": "optimized_decision_tree_model.pkl", "Bagging Classifier": "bagging_classifier_model.pkl", "XGBoost Classifier": "xgboost_classifier_model.pkl", "Random Forest Classifier": "random_forest_classifier_model.pkl" } for name, path in tree_model_files.items(): with open(path, 'rb') as f: MODEL_CACHE[name.lower().replace(" ", "_")] = pickle.load(f) print("Tree-based model dependencies loaded successfully.") print(f"Tree UI features order set: {tree_ui_feature_order}") return True except FileNotFoundError as e: # ... (rest of error handling) print(f"Error loading Tree model file: {e.filename}") gr.Warning(f"Required file for Tree models not found: {e.filename}. Some tree models may not work.") return False except Exception as e: print(f"Error loading Tree dependencies: {e}") gr.Warning(f"Error loading dependencies for Tree models: {e}") return False return True LOGREG_READY = load_logreg_dependencies() TREE_READY = load_tree_dependencies() # --- Prediction Function (no changes needed here from the previous version for default values) --- def predict_injury(model_choice, *all_ui_values): # ... (Prediction function remains the same as the previous version) print(f"Model choice: {model_choice}") # all_ui_values contains values from logreg_inputs_ui_components then tree_inputs_ui_components raw_input_data = {} # This will hold {feature_name: value} for the chosen model's UI features if model_choice == "Logistic Regression": if not LOGREG_READY or 'logistic_regression' not in MODEL_CACHE: return "Logistic Regression model not loaded. Check logs.", "", "" num_logreg_ui_fields = len(logreg_ui_feature_order) current_args = all_ui_values[:num_logreg_ui_fields] current_ui_feature_order_for_dict_zip = logreg_ui_feature_order # Order of features as in UI imputer = LOADED_OBJECTS_LOGREG['imputer'] cols_to_impute_for_model = LOADED_OBJECTS_LOGREG['cols_to_impute'] final_model_features = LOADED_OBJECTS_LOGREG['features'] # Stepwise features model = MODEL_CACHE['logistic_regression'] raw_input_data = dict(zip(current_ui_feature_order_for_dict_zip, current_args)) elif model_choice in ["Optimized Decision Tree", "Bagging Classifier", "XGBoost Classifier", "Random Forest Classifier"]: if not TREE_READY or model_choice.lower().replace(" ", "_") not in MODEL_CACHE: return f"{model_choice} model not loaded. Check logs.", "", "" num_logreg_ui_fields = len(logreg_ui_feature_order) # num_tree_ui_fields = len(tree_ui_feature_order) # Not needed directly for slicing here current_args = all_ui_values[num_logreg_ui_fields:] # Get the rest of args for tree models current_ui_feature_order_for_dict_zip = tree_ui_feature_order # Order of features as in UI imputer = LOADED_OBJECTS_TREE['imputer'] cols_to_impute_for_model = LOADED_OBJECTS_TREE['cols_to_impute'] final_model_features = LOADED_OBJECTS_TREE['features'] # Full feature list model = MODEL_CACHE[model_choice.lower().replace(" ", "_")] raw_input_data = dict(zip(current_ui_feature_order_for_dict_zip, current_args)) else: return "Invalid model choice.", "", "" print(f"Raw input data mapped for model: {raw_input_data}") processed_values = {} for key, value in raw_input_data.items(): if isinstance(value, str) and value.strip() == "": processed_values[key] = np.nan elif value is None: # Gradio number input might return None if empty and no default processed_values[key] = np.nan else: try: processed_values[key] = float(value) except (ValueError, TypeError): if key not in cols_to_impute_for_model: return f"Invalid input for '{key}'. Please provide a number.", f"Error with input '{key}'", "" processed_values[key] = np.nan # Ensure it's np.nan for imputer df = pd.DataFrame([processed_values]) # Ensure df columns match the order expected by processing steps (usually current_ui_feature_order) # This ensures that when we select `cols_to_impute_for_model` or `final_model_features`, they exist. # We need to ensure all keys from current_ui_feature_order_for_dict_zip are columns in df df = df.reindex(columns=current_ui_feature_order_for_dict_zip) print("DataFrame before imputation:\n", df.head()) # Imputation try: imputable_cols_in_df = [col for col in cols_to_impute_for_model if col in df.columns] if imputable_cols_in_df: # Create a copy for transform to avoid SettingWithCopyWarning if df[imputable_cols_in_df] is a view df_subset_to_impute = df[imputable_cols_in_df].copy() transformed_subset = imputer.transform(df_subset_to_impute) df[imputable_cols_in_df] = transformed_subset print("DataFrame after imputation:\n", df.head()) except Exception as e: print(f"Error during imputation: {e}") return f"Error during data imputation: {e}", str(e), "" # Feature selection and ordering for the *final model input* try: final_df = df[final_model_features] print("Final DataFrame for model prediction:\n", final_df.head()) except KeyError as e: print(f"Error: Missing feature column for the model's final input: {e}") return f"Internal error: Missing expected feature {e}. Check model feature lists.", f"KeyError: {e}", "" except Exception as e: print(f"Error during final feature selection: {e}") return f"Error during data preparation: {e}", str(e), "" # Prediction try: pred_proba = model.predict_proba(final_df)[0] prediction = model.predict(final_df)[0] proba_injured = pred_proba[1] proba_not_injured = pred_proba[0] if prediction == 1: result_text = "Prediction: Likely Injured" probability_text = f"Probability of Injury: {proba_injured:.2%}\nProbability of No Injury: {proba_not_injured:.2%}" else: result_text = "Prediction: Likely Not Injured" probability_text = f"Probability of No Injury: {proba_not_injured:.2%}\nProbability of Injury: {proba_injured:.2%}" # Added a third output for more detailed info/status if needed return result_text, probability_text, "Prediction successful." except Exception as e: print(f"Error during prediction: {e}") return f"Error during prediction: {e}", str(e), "" # --- Create Gradio UI components with default values --- if LOGREG_READY and logreg_ui_feature_order: # Check if order is populated print(f"Generating UI components for LogReg with defaults. UI Features: {logreg_ui_feature_order}") for feature in logreg_ui_feature_order: label = feature.replace("_", " ").title() default_value = DEFAULT_LOGREG_INPUTS.get(feature, None) # Get default, or None if feature in LOADED_OBJECTS_LOGREG.get('cols_to_impute', []): label += " (can be blank for mean imputation)" logreg_inputs_ui_components.append( gr.Number(label=label, value=default_value, elem_id=f"logreg_{feature}") ) else: if not LOGREG_READY: print("Logistic Regression dependencies not loaded.") if not logreg_ui_feature_order: print("LogReg UI feature order not set.") if TREE_READY and tree_ui_feature_order: # Check if order is populated print(f"Generating UI components for Tree Models with defaults. UI Features: {tree_ui_feature_order}") for feature in tree_ui_feature_order: label = feature.replace("_", " ").title() default_value = DEFAULT_TREE_INPUTS.get(feature, None) if feature in LOADED_OBJECTS_TREE.get('cols_to_impute', []): label += " (can be blank for mean imputation)" tree_inputs_ui_components.append( gr.Number(label=label, value=default_value, elem_id=f"tree_{feature}") ) else: if not TREE_READY: print("Tree model dependencies not loaded.") if not tree_ui_feature_order: print("Tree UI feature order not set.") # --- Create Gradio Interface --- with gr.Blocks(theme=gr.themes.Soft()) as app: gr.Markdown("