| |
| """ |
| Test to identify exact feature columns used in each dataset |
| """ |
|
|
| import pandas as pd |
|
|
| print("=" * 70) |
| print("FEATURE ANALYSIS FOR MODEL TRAINING DATA") |
| print("=" * 70) |
|
|
| |
| df_diabetes = pd.read_csv('diabetes_data.csv') |
| diabetes_features = [col for col in df_diabetes.columns if col != 'Diabetes'] |
| print(f"\nDIABETES MODEL:") |
| print(f" Features ({len(diabetes_features)}): {diabetes_features}") |
|
|
| |
| df_cardio = pd.read_csv('cardio_train.csv', sep=';') |
| cardio_features = [col for col in df_cardio.columns if col != 'cardio'] |
| print(f"\nCARDIOVASCULAR MODEL:") |
| print(f" Features ({len(cardio_features)}): {cardio_features}") |
|
|
| |
| df_chronic = pd.read_csv('chronic_disease_prediction_dataset.csv') |
| df_chronic = df_chronic.drop('Patient_ID', axis=1) |
| hypertension_features = [col for col in df_chronic.columns if col != 'HasChronicDisease'] |
| print(f"\nHYPERTENSION MODEL:") |
| print(f" Features ({len(hypertension_features)}): {hypertension_features}") |
|
|
| print("\n" + "=" * 70) |
| print("PATIENT PROFILE FEATURES:") |
| print("=" * 70) |
|
|
| |
| patient_features = [ |
| 'age', 'sex', 'blood_pressure_systolic', 'blood_pressure_diastolic', |
| 'blood_sugar_fasting', 'cholesterol_total', 'hdl_cholesterol', 'bmi', |
| 'sleep_hours', 'stress_level', 'smoking', 'alcohol', 'exercise_days_per_week', |
| 'iron_level', 'cortisol', 'family_history' |
| ] |
| print(f" Patient Features ({len(patient_features)}): {patient_features}") |
|
|