File size: 1,508 Bytes
19d6abb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | #!/usr/bin/env python
"""
Test to identify exact feature columns used in each dataset
"""
import pandas as pd
print("=" * 70)
print("FEATURE ANALYSIS FOR MODEL TRAINING DATA")
print("=" * 70)
# Diabetes dataset
df_diabetes = pd.read_csv('diabetes_data.csv')
diabetes_features = [col for col in df_diabetes.columns if col != 'Diabetes']
print(f"\nDIABETES MODEL:")
print(f" Features ({len(diabetes_features)}): {diabetes_features}")
# Cardiovascular dataset
df_cardio = pd.read_csv('cardio_train.csv', sep=';')
cardio_features = [col for col in df_cardio.columns if col != 'cardio']
print(f"\nCARDIOVASCULAR MODEL:")
print(f" Features ({len(cardio_features)}): {cardio_features}")
# Chronic disease dataset
df_chronic = pd.read_csv('chronic_disease_prediction_dataset.csv')
df_chronic = df_chronic.drop('Patient_ID', axis=1)
hypertension_features = [col for col in df_chronic.columns if col != 'HasChronicDisease']
print(f"\nHYPERTENSION MODEL:")
print(f" Features ({len(hypertension_features)}): {hypertension_features}")
print("\n" + "=" * 70)
print("PATIENT PROFILE FEATURES:")
print("=" * 70)
# Patient profile features
patient_features = [
'age', 'sex', 'blood_pressure_systolic', 'blood_pressure_diastolic',
'blood_sugar_fasting', 'cholesterol_total', 'hdl_cholesterol', 'bmi',
'sleep_hours', 'stress_level', 'smoking', 'alcohol', 'exercise_days_per_week',
'iron_level', 'cortisol', 'family_history'
]
print(f" Patient Features ({len(patient_features)}): {patient_features}")
|