Spaces:
Build error
Build error
File size: 3,384 Bytes
90bbde0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | #!/usr/bin/env python3
"""Debug preprocessing pipeline"""
import pickle
import pandas as pd
import numpy as np
# Load models
with open('models/rf_model.pkl', 'rb') as f:
model = pickle.load(f)
with open('models/scaler.pkl', 'rb') as f:
scaler = pickle.load(f)
with open('models/le_dict.pkl', 'rb') as f:
le_dict = pickle.load(f)
with open('models/feature_names.pkl', 'rb') as f:
feature_names = pickle.load(f)
print("Expected feature names:", feature_names)
print("\nLE Dict keys:", list(le_dict.keys()))
print("Scaler n_features:", scaler.n_features_in_)
# Test input
test_input = {
'A1_prefer_detail_not_big_picture': 0,
'A2_must_have_sameness': 0,
'A3_prefer_reading_systematically': 0,
'A4_feel_anxious_in_social': 0,
'A5_prefer_talking_one_to_one': 0,
'A6_notice_small_changes': 0,
'A7_trouble_focus_on_changing': 0,
'A8_often_daydream': 0,
'A9_focused_on_one_topic': 0,
'A10_difficult_small_talk': 0,
'age': 30,
'gender': 'M',
'ethnicity': 'White',
'jundice': 'no',
'autism_family_member': 'no',
'country': 'USA',
'used_app_before': 'no',
'screening_type': 'Questionnaire'
}
print("\n" + "="*70)
print("STEP 1: Create DataFrame")
df = pd.DataFrame([test_input])
print("Columns:", list(df.columns))
print("Shape:", df.shape)
print("\n" + "="*70)
print("STEP 2: Encode categorical variables")
df_encoded = df.copy()
for col in le_dict.keys():
if col in df_encoded.columns:
val = df_encoded[col].values[0]
print(f" {col}: '{val}' ->", end=" ")
try:
df_encoded[col] = le_dict[col].transform([val])[0]
print(f"{df_encoded[col].values[0]} ✓")
except Exception as e:
print(f"ERROR: {e}")
print("\nEncoded DataFrame:")
print(df_encoded)
print("\n" + "="*70)
print("STEP 3: Scale numeric features")
numeric_cols = ['age'] + [c for c in feature_names if c.startswith('A')]
print("Numeric columns for scaling:", numeric_cols)
# Check if all numeric cols exist
for col in numeric_cols:
if col not in df_encoded.columns:
print(f" ERROR: {col} not in DataFrame!")
else:
print(f" {col}: {df_encoded[col].values[0]} ✓")
print("\nScaling...")
df_scaled = df_encoded.copy()
try:
df_scaled[numeric_cols] = scaler.transform(df_encoded[numeric_cols])
print("Scaling successful ✓")
except Exception as e:
print(f"Scaling ERROR: {e}")
print(" Scaler expects these features:", scaler.get_feature_names_out() if hasattr(scaler, 'get_feature_names_out') else "N/A")
print("\n" + "="*70)
print("STEP 4: Select features in exact order")
print("Required feature order:", feature_names)
try:
df_final = df_scaled[feature_names].copy()
print("Feature selection successful ✓")
print("Final shape:", df_final.shape)
print("Final columns:", list(df_final.columns))
except Exception as e:
print(f"Feature selection ERROR: {e}")
print(" Available columns:", list(df_scaled.columns))
print("\n" + "="*70)
print("STEP 5: Predict")
try:
pred = model.predict_proba(df_final)[0]
print(f"Prediction successful ✓")
print(f" No Autism: {pred[0]:.2%}")
print(f" Autism: {pred[1]:.2%}")
except Exception as e:
print(f"Prediction ERROR: {e}")
|