Spaces:

harshith1411
/

autism-screening

Build error

App Files Files Community

autism-screening / debug_preprocessing.py

harshith1411

Upload 10 files

90bbde0 verified 2 months ago

raw

history blame contribute delete

3.38 kB

	#!/usr/bin/env python3
	"""Debug preprocessing pipeline"""

	import pickle
	import pandas as pd
	import numpy as np

	# Load models
	with open('models/rf_model.pkl', 'rb') as f:
	model = pickle.load(f)
	with open('models/scaler.pkl', 'rb') as f:
	scaler = pickle.load(f)
	with open('models/le_dict.pkl', 'rb') as f:
	le_dict = pickle.load(f)
	with open('models/feature_names.pkl', 'rb') as f:
	feature_names = pickle.load(f)

	print("Expected feature names:", feature_names)
	print("\nLE Dict keys:", list(le_dict.keys()))
	print("Scaler n_features:", scaler.n_features_in_)

	# Test input
	test_input = {
	'A1_prefer_detail_not_big_picture': 0,
	'A2_must_have_sameness': 0,
	'A3_prefer_reading_systematically': 0,
	'A4_feel_anxious_in_social': 0,
	'A5_prefer_talking_one_to_one': 0,
	'A6_notice_small_changes': 0,
	'A7_trouble_focus_on_changing': 0,
	'A8_often_daydream': 0,
	'A9_focused_on_one_topic': 0,
	'A10_difficult_small_talk': 0,
	'age': 30,
	'gender': 'M',
	'ethnicity': 'White',
	'jundice': 'no',
	'autism_family_member': 'no',
	'country': 'USA',
	'used_app_before': 'no',
	'screening_type': 'Questionnaire'
	}

	print("\n" + "="*70)
	print("STEP 1: Create DataFrame")
	df = pd.DataFrame([test_input])
	print("Columns:", list(df.columns))
	print("Shape:", df.shape)

	print("\n" + "="*70)
	print("STEP 2: Encode categorical variables")
	df_encoded = df.copy()
	for col in le_dict.keys():
	if col in df_encoded.columns:
	val = df_encoded[col].values[0]
	print(f" {col}: '{val}' ->", end=" ")
	try:
	df_encoded[col] = le_dict[col].transform([val])[0]
	print(f"{df_encoded[col].values[0]} ✓")
	except Exception as e:
	print(f"ERROR: {e}")

	print("\nEncoded DataFrame:")
	print(df_encoded)

	print("\n" + "="*70)
	print("STEP 3: Scale numeric features")
	numeric_cols = ['age'] + [c for c in feature_names if c.startswith('A')]
	print("Numeric columns for scaling:", numeric_cols)

	# Check if all numeric cols exist
	for col in numeric_cols:
	if col not in df_encoded.columns:
	print(f" ERROR: {col} not in DataFrame!")
	else:
	print(f" {col}: {df_encoded[col].values[0]} ✓")

	print("\nScaling...")
	df_scaled = df_encoded.copy()
	try:
	df_scaled[numeric_cols] = scaler.transform(df_encoded[numeric_cols])
	print("Scaling successful ✓")
	except Exception as e:
	print(f"Scaling ERROR: {e}")
	print(" Scaler expects these features:", scaler.get_feature_names_out() if hasattr(scaler, 'get_feature_names_out') else "N/A")

	print("\n" + "="*70)
	print("STEP 4: Select features in exact order")
	print("Required feature order:", feature_names)

	try:
	df_final = df_scaled[feature_names].copy()
	print("Feature selection successful ✓")
	print("Final shape:", df_final.shape)
	print("Final columns:", list(df_final.columns))
	except Exception as e:
	print(f"Feature selection ERROR: {e}")
	print(" Available columns:", list(df_scaled.columns))

	print("\n" + "="*70)
	print("STEP 5: Predict")
	try:
	pred = model.predict_proba(df_final)[0]
	print(f"Prediction successful ✓")
	print(f" No Autism: {pred[0]:.2%}")
	print(f" Autism: {pred[1]:.2%}")
	except Exception as e:
	print(f"Prediction ERROR: {e}")