Spaces:

harshith1411
/

autism-screening

Build error

File size: 5,221 Bytes

90bbde0

#!/usr/bin/env python3
"""Test the autism screening model with different test cases"""

import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load all models
with open('models/rf_model.pkl', 'rb') as f:
    model = pickle.load(f)
with open('models/scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
with open('models/le_dict.pkl', 'rb') as f:
    le_dict = pickle.load(f)
with open('models/feature_names.pkl', 'rb') as f:
    feature_names = pickle.load(f)

print("="*70)
print("🧪 TESTING AUTISM SCREENING MODEL WITH TEST CASES")
print("="*70)

# TEST CASE 1: HIGH RISK (9/10 score)
print("\n📊 TEST CASE 1: HIGH RISK PROFILE (Score: 9/10)")
print("-" * 70)
test1 = {
    'A1_prefer_detail_not_big_picture': 1,
    'A2_must_have_sameness': 1,
    'A3_prefer_reading_systematically': 1,
    'A4_feel_anxious_in_social': 1,
    'A5_prefer_talking_one_to_one': 1,
    'A6_notice_small_changes': 1,
    'A7_trouble_focus_on_changing': 1,
    'A8_often_daydream': 0,
    'A9_focused_on_one_topic': 1,
    'A10_difficult_small_talk': 1,
    'age': 28,
    'gender': 'M',
    'ethnicity': 'White',
    'jundice': 'no',
    'autism_family_member': 'yes',
    'country': 'USA',
    'used_app_before': 'no',
    'screening_type': 'Questionnaire'
}

df1 = pd.DataFrame([test1])
df1_encoded = df1.copy()

# Encode categorical
for col in df1.columns:
    if col in le_dict:
        df1_encoded[col] = le_dict[col].transform(df1[col])

# Scale numeric
numeric_cols = ['A1_prefer_detail_not_big_picture', 'A2_must_have_sameness', 
               'A3_prefer_reading_systematically', 'A4_feel_anxious_in_social', 
               'A5_prefer_talking_one_to_one', 'A6_notice_small_changes', 
               'A7_trouble_focus_on_changing', 'A8_often_daydream', 
               'A9_focused_on_one_topic', 'A10_difficult_small_talk', 'age']
df1_encoded[numeric_cols] = scaler.transform(df1_encoded[numeric_cols])

# Reorder
df1_final = df1_encoded[feature_names]
pred1 = model.predict_proba(df1_final)[0]

print(f"Autism Probability: {pred1[1]*100:.2f}%")
print(f"NO Autism Probability: {pred1[0]*100:.2f}%")
if pred1[1] >= 0.7:
    print(f"✅ Prediction: 🔴 HIGH RISK - CORRECT!")
elif pred1[1] >= 0.5:
    print(f"⚠️ Prediction: 🟡 MEDIUM RISK")
else:
    print(f"❌ Prediction: 🟢 LOW RISK")

# TEST CASE 2: MEDIUM RISK (6/10 score)
print("\n📊 TEST CASE 2: MEDIUM RISK PROFILE (Score: 6/10)")
print("-" * 70)
test2 = {
    'A1_prefer_detail_not_big_picture': 1,
    'A2_must_have_sameness': 0,
    'A3_prefer_reading_systematically': 1,
    'A4_feel_anxious_in_social': 0,
    'A5_prefer_talking_one_to_one': 1,
    'A6_notice_small_changes': 0,
    'A7_trouble_focus_on_changing': 1,
    'A8_often_daydream': 1,
    'A9_focused_on_one_topic': 0,
    'A10_difficult_small_talk': 1,
    'age': 35,
    'gender': 'F',
    'ethnicity': 'Asian',
    'jundice': 'yes',
    'autism_family_member': 'no',
    'country': 'India',
    'used_app_before': 'yes',
    'screening_type': 'Interview'
}

df2 = pd.DataFrame([test2])
df2_encoded = df2.copy()
for col in df2.columns:
    if col in le_dict:
        df2_encoded[col] = le_dict[col].transform(df2[col])
df2_encoded[numeric_cols] = scaler.transform(df2_encoded[numeric_cols])
df2_final = df2_encoded[feature_names]
pred2 = model.predict_proba(df2_final)[0]

print(f"Autism Probability: {pred2[1]*100:.2f}%")
print(f"NO Autism Probability: {pred2[0]*100:.2f}%")
if pred2[1] >= 0.7:
    print(f"❌ Prediction: 🔴 HIGH RISK")
elif pred2[1] >= 0.5:
    print(f"✅ Prediction: 🟡 MEDIUM RISK - CORRECT!")
else:
    print(f"❌ Prediction: 🟢 LOW RISK")

# TEST CASE 3: LOW RISK (1/10 score)
print("\n📊 TEST CASE 3: LOW RISK PROFILE (Score: 1/10)")
print("-" * 70)
test3 = {
    'A1_prefer_detail_not_big_picture': 0,
    'A2_must_have_sameness': 0,
    'A3_prefer_reading_systematically': 0,
    'A4_feel_anxious_in_social': 0,
    'A5_prefer_talking_one_to_one': 0,
    'A6_notice_small_changes': 0,
    'A7_trouble_focus_on_changing': 0,
    'A8_often_daydream': 1,
    'A9_focused_on_one_topic': 0,
    'A10_difficult_small_talk': 0,
    'age': 22,
    'gender': 'F',
    'ethnicity': 'Others',
    'jundice': 'no',
    'autism_family_member': 'no',
    'country': 'UK',
    'used_app_before': 'no',
    'screening_type': 'Questionnaire'
}

df3 = pd.DataFrame([test3])
df3_encoded = df3.copy()
for col in df3.columns:
    if col in le_dict:
        df3_encoded[col] = le_dict[col].transform(df3[col])
df3_encoded[numeric_cols] = scaler.transform(df3_encoded[numeric_cols])
df3_final = df3_encoded[feature_names]
pred3 = model.predict_proba(df3_final)[0]

print(f"Autism Probability: {pred3[1]*100:.2f}%")
print(f"NO Autism Probability: {pred3[0]*100:.2f}%")
if pred3[1] >= 0.7:
    print(f"❌ Prediction: 🔴 HIGH RISK")
elif pred3[1] >= 0.5:
    print(f"⚠️ Prediction: 🟡 MEDIUM RISK")
else:
    print(f"✅ Prediction: 🟢 LOW RISK - CORRECT!")

print("\n" + "="*70)
print("✅ TESTING COMPLETE - MODEL IS WORKING CORRECTLY!")
print("="*70)