Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| """Debug preprocessing pipeline""" | |
| import pickle | |
| import pandas as pd | |
| import numpy as np | |
| # Load models | |
| with open('models/rf_model.pkl', 'rb') as f: | |
| model = pickle.load(f) | |
| with open('models/scaler.pkl', 'rb') as f: | |
| scaler = pickle.load(f) | |
| with open('models/le_dict.pkl', 'rb') as f: | |
| le_dict = pickle.load(f) | |
| with open('models/feature_names.pkl', 'rb') as f: | |
| feature_names = pickle.load(f) | |
| print("Expected feature names:", feature_names) | |
| print("\nLE Dict keys:", list(le_dict.keys())) | |
| print("Scaler n_features:", scaler.n_features_in_) | |
| # Test input | |
| test_input = { | |
| 'A1_prefer_detail_not_big_picture': 0, | |
| 'A2_must_have_sameness': 0, | |
| 'A3_prefer_reading_systematically': 0, | |
| 'A4_feel_anxious_in_social': 0, | |
| 'A5_prefer_talking_one_to_one': 0, | |
| 'A6_notice_small_changes': 0, | |
| 'A7_trouble_focus_on_changing': 0, | |
| 'A8_often_daydream': 0, | |
| 'A9_focused_on_one_topic': 0, | |
| 'A10_difficult_small_talk': 0, | |
| 'age': 30, | |
| 'gender': 'M', | |
| 'ethnicity': 'White', | |
| 'jundice': 'no', | |
| 'autism_family_member': 'no', | |
| 'country': 'USA', | |
| 'used_app_before': 'no', | |
| 'screening_type': 'Questionnaire' | |
| } | |
| print("\n" + "="*70) | |
| print("STEP 1: Create DataFrame") | |
| df = pd.DataFrame([test_input]) | |
| print("Columns:", list(df.columns)) | |
| print("Shape:", df.shape) | |
| print("\n" + "="*70) | |
| print("STEP 2: Encode categorical variables") | |
| df_encoded = df.copy() | |
| for col in le_dict.keys(): | |
| if col in df_encoded.columns: | |
| val = df_encoded[col].values[0] | |
| print(f" {col}: '{val}' ->", end=" ") | |
| try: | |
| df_encoded[col] = le_dict[col].transform([val])[0] | |
| print(f"{df_encoded[col].values[0]} β") | |
| except Exception as e: | |
| print(f"ERROR: {e}") | |
| print("\nEncoded DataFrame:") | |
| print(df_encoded) | |
| print("\n" + "="*70) | |
| print("STEP 3: Scale numeric features") | |
| numeric_cols = ['age'] + [c for c in feature_names if c.startswith('A')] | |
| print("Numeric columns for scaling:", numeric_cols) | |
| # Check if all numeric cols exist | |
| for col in numeric_cols: | |
| if col not in df_encoded.columns: | |
| print(f" ERROR: {col} not in DataFrame!") | |
| else: | |
| print(f" {col}: {df_encoded[col].values[0]} β") | |
| print("\nScaling...") | |
| df_scaled = df_encoded.copy() | |
| try: | |
| df_scaled[numeric_cols] = scaler.transform(df_encoded[numeric_cols]) | |
| print("Scaling successful β") | |
| except Exception as e: | |
| print(f"Scaling ERROR: {e}") | |
| print(" Scaler expects these features:", scaler.get_feature_names_out() if hasattr(scaler, 'get_feature_names_out') else "N/A") | |
| print("\n" + "="*70) | |
| print("STEP 4: Select features in exact order") | |
| print("Required feature order:", feature_names) | |
| try: | |
| df_final = df_scaled[feature_names].copy() | |
| print("Feature selection successful β") | |
| print("Final shape:", df_final.shape) | |
| print("Final columns:", list(df_final.columns)) | |
| except Exception as e: | |
| print(f"Feature selection ERROR: {e}") | |
| print(" Available columns:", list(df_scaled.columns)) | |
| print("\n" + "="*70) | |
| print("STEP 5: Predict") | |
| try: | |
| pred = model.predict_proba(df_final)[0] | |
| print(f"Prediction successful β") | |
| print(f" No Autism: {pred[0]:.2%}") | |
| print(f" Autism: {pred[1]:.2%}") | |
| except Exception as e: | |
| print(f"Prediction ERROR: {e}") | |