Spaces:
Sleeping
Sleeping
| import great_expectations as gx | |
| import pandas as pd | |
| from predicting_outcomes_in_heart_failure.config import ( | |
| ASSET_NAME, | |
| RAW_PATH, | |
| SOURCE_NAME, | |
| SUITE_NAME, | |
| ) | |
| from util import set_gx, show_results | |
| def run_test(): | |
| suite.add_expectation( | |
| gx.expectations.ExpectTableColumnCountToEqual(value=len(expected_columns)) | |
| ) | |
| for col in expected_columns: | |
| suite.add_expectation(gx.expectations.ExpectColumnToExist(column=col)) | |
| suite.add_expectation(gx.expectations.ExpectColumnValuesToNotBeNull(column=col)) | |
| for col in ["Age", "RestingBP", "Cholesterol", "MaxHR", "Oldpeak"]: | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeOfType( | |
| column=col, type_="float" if col == "Oldpeak" else "int" | |
| ) | |
| ) | |
| for col in ["Sex", "ChestPainType", "RestingECG", "ST_Slope"]: | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeOfType(column=col, type_="str") | |
| ) | |
| for col in ["FastingBS", "HeartDisease"]: | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeInSet(column=col, value_set=[0, 1]) | |
| ) | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeInSet(column="Sex", value_set=["M", "F"]) | |
| ) | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeInSet(column="ExerciseAngina", value_set=["N", "Y"]) | |
| ) | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeInSet( | |
| column="ST_Slope", value_set=["Flat", "Up", "Down"] | |
| ) | |
| ) | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeInSet( | |
| column="RestingECG", value_set=["Normal", "LVH", "ST"] | |
| ) | |
| ) | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeInSet( | |
| column="ChestPainType", value_set=["ASY", "NAP", "ATA", "TA"] | |
| ) | |
| ) | |
| suite.add_expectation( | |
| gx.expectations.ExpectCompoundColumnsToBeUnique(column_list=features_columns) | |
| ) | |
| suite.add_expectation( | |
| gx.expectations.ExpectCompoundColumnsToBeUnique(column_list=expected_columns) | |
| ) | |
| suite.add_expectation( | |
| gx.expectations.ExpectColumnValuesToBeBetween(column="Age", min_value=18) | |
| ) | |
| context.suites.add_or_update(suite) | |
| validation_definition = context.validation_definitions.add( | |
| gx.core.validation_definition.ValidationDefinition( | |
| name=ASSET_NAME + "_validation_raw", | |
| data=batch_definition, | |
| suite=suite, | |
| ) | |
| ) | |
| checkpoint = context.checkpoints.add( | |
| gx.checkpoint.checkpoint.Checkpoint( | |
| name=ASSET_NAME + "_checkpoint_raw", | |
| validation_definitions=[validation_definition], | |
| ) | |
| ) | |
| checkpoint_result = checkpoint.run(batch_parameters={"dataframe": df}) | |
| show_results(checkpoint_result) | |
| if __name__ == "__main__": | |
| df = pd.read_csv(RAW_PATH) | |
| context, suite, batch_definition = set_gx( | |
| SOURCE_NAME + "_raw", ASSET_NAME + "_raw", SUITE_NAME + "_raw" | |
| ) | |
| features_columns = [ | |
| "Age", | |
| "Sex", | |
| "ChestPainType", | |
| "RestingBP", | |
| "Cholesterol", | |
| "FastingBS", | |
| "RestingECG", | |
| "MaxHR", | |
| "ExerciseAngina", | |
| "Oldpeak", | |
| "ST_Slope", | |
| ] | |
| expected_columns = features_columns + ["HeartDisease"] | |
| run_test() | |