Spaces:
Sleeping
Sleeping
Update cancer.py
Browse files
cancer.py
CHANGED
|
@@ -15,26 +15,29 @@ from xgboost import XGBClassifier
|
|
| 15 |
def load_data():
|
| 16 |
return pd.read_csv('cancer_prediction_data (2).csv')
|
| 17 |
|
| 18 |
-
# Data Preprocessing
|
| 19 |
# Data Preprocessing
|
| 20 |
def preprocess_data(df):
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
|
| 25 |
preprocess = ColumnTransformer([
|
| 26 |
('num', Pipeline([
|
| 27 |
-
('imputer', SimpleImputer(strategy='mean')),
|
| 28 |
('scaler', StandardScaler())
|
| 29 |
-
]),
|
| 30 |
-
('
|
| 31 |
-
('imputer', SimpleImputer(strategy='most_frequent')),
|
| 32 |
-
('encoder',
|
| 33 |
-
]),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
], remainder='passthrough')
|
| 35 |
|
| 36 |
-
x = df.drop('Cancer_Present', axis=1)
|
| 37 |
-
y = df['Cancer_Present']
|
| 38 |
return train_test_split(x, y, test_size=0.2, random_state=23), preprocess
|
| 39 |
|
| 40 |
# Train Model
|
|
|
|
| 15 |
def load_data():
|
| 16 |
return pd.read_csv('cancer_prediction_data (2).csv')
|
| 17 |
|
|
|
|
| 18 |
# Data Preprocessing
|
| 19 |
def preprocess_data(df):
|
| 20 |
+
numeric = ['Age', 'Tumor_Size']
|
| 21 |
+
ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
|
| 22 |
+
nominal = ['Gender', 'Family_History', 'Smoking_History']
|
| 23 |
|
| 24 |
preprocess = ColumnTransformer([
|
| 25 |
('num', Pipeline([
|
| 26 |
+
('imputer', SimpleImputer(strategy='mean')),
|
| 27 |
('scaler', StandardScaler())
|
| 28 |
+
]), numeric),
|
| 29 |
+
('ord', Pipeline([
|
| 30 |
+
('imputer', SimpleImputer(strategy='most_frequent')),
|
| 31 |
+
('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
|
| 32 |
+
]), ordinal),
|
| 33 |
+
('nom', Pipeline([
|
| 34 |
+
('imputer', SimpleImputer(strategy='most_frequent')),
|
| 35 |
+
('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
|
| 36 |
+
]), nominal)
|
| 37 |
], remainder='passthrough')
|
| 38 |
|
| 39 |
+
x = df.drop('Cancer_Present', axis=1)
|
| 40 |
+
y = df['Cancer_Present']
|
| 41 |
return train_test_split(x, y, test_size=0.2, random_state=23), preprocess
|
| 42 |
|
| 43 |
# Train Model
|