Spaces:
Sleeping
Sleeping
Update cancer.py
Browse files
cancer.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.pipeline import Pipeline
|
| 4 |
-
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
| 5 |
from sklearn.impute import SimpleImputer
|
| 6 |
from sklearn.compose import ColumnTransformer
|
| 7 |
from sklearn.model_selection import train_test_split
|
|
@@ -13,22 +13,27 @@ from xgboost import XGBClassifier
|
|
| 13 |
|
| 14 |
# Load dataset
|
| 15 |
def load_data():
|
| 16 |
-
return pd.read_csv('
|
| 17 |
|
| 18 |
# Data Preprocessing
|
| 19 |
def preprocess_data(df):
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
preprocess = ColumnTransformer([
|
| 24 |
('num', Pipeline([
|
| 25 |
('imputer', SimpleImputer(strategy='mean')),
|
| 26 |
('scaler', StandardScaler())
|
| 27 |
-
]),
|
| 28 |
-
('
|
| 29 |
('imputer', SimpleImputer(strategy='most_frequent')),
|
| 30 |
-
('encoder',
|
| 31 |
-
]),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
], remainder='passthrough')
|
| 33 |
|
| 34 |
x = df.drop('Cancer_Present', axis=1)
|
|
@@ -44,10 +49,6 @@ def train_model(x_train, y_train, preprocess, model_name):
|
|
| 44 |
'Random Forest': RandomForestClassifier(),
|
| 45 |
'XGBoost': XGBClassifier()
|
| 46 |
}
|
| 47 |
-
|
| 48 |
-
if model_name not in models:
|
| 49 |
-
raise ValueError(f"Model '{model_name}' not recognized. Available models: {list(models.keys())}")
|
| 50 |
-
|
| 51 |
pipeline = Pipeline([
|
| 52 |
('preprocessor', preprocess),
|
| 53 |
('classifier', models[model_name])
|
|
@@ -62,26 +63,19 @@ with st.sidebar:
|
|
| 62 |
st.markdown("### Select Machine Learning Model")
|
| 63 |
model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
|
| 64 |
if st.button("Train Model"):
|
| 65 |
-
# Load and preprocess data
|
| 66 |
df = load_data()
|
| 67 |
(x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
st.session_state['trained_model'] = model
|
| 74 |
-
st.session_state['x_train_columns'] = x_train.columns # Save column names for future prediction
|
| 75 |
-
st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
|
| 76 |
-
except ValueError as e:
|
| 77 |
-
st.error(f"Error: {e}")
|
| 78 |
|
| 79 |
st.title("🎗️ Cancer Prediction")
|
| 80 |
|
| 81 |
st.markdown("""<style>.big-font {font-size:20px !important;}</style>
|
| 82 |
<p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
|
| 83 |
|
| 84 |
-
# Patient input fields
|
| 85 |
col1, col2 = st.columns(2)
|
| 86 |
with col1:
|
| 87 |
age = st.slider("Age", 18, 100, 30)
|
|
@@ -91,27 +85,20 @@ with col1:
|
|
| 91 |
|
| 92 |
with col2:
|
| 93 |
smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
|
| 94 |
-
alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low',
|
| 95 |
-
exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly',
|
| 96 |
-
gender = st.selectbox("Gender", [
|
| 97 |
family_history = st.selectbox("Family History", ["No", "Yes"])
|
| 98 |
|
| 99 |
input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
|
| 100 |
alcohol_consumption, exercise_frequency, gender, family_history]]
|
| 101 |
|
| 102 |
-
# Predict cancer presence
|
| 103 |
if st.button("Predict Cancer Presence"):
|
| 104 |
if 'trained_model' in st.session_state:
|
| 105 |
model = st.session_state['trained_model']
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
# Prepare input data for prediction
|
| 109 |
-
input_df = pd.DataFrame(input_data, columns=x_train_columns)
|
| 110 |
-
|
| 111 |
-
# Align input data with the model's expected columns
|
| 112 |
input_transformed = model.named_steps['preprocessor'].transform(input_df)
|
| 113 |
-
|
| 114 |
-
# Make prediction
|
| 115 |
prediction = model.named_steps['classifier'].predict(input_transformed)
|
| 116 |
|
| 117 |
if prediction[0] == 1:
|
|
@@ -121,4 +108,4 @@ if st.button("Predict Cancer Presence"):
|
|
| 121 |
st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
|
| 122 |
st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
|
| 123 |
else:
|
| 124 |
-
st.error("Please train a model first!")
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.pipeline import Pipeline
|
| 4 |
+
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
|
| 5 |
from sklearn.impute import SimpleImputer
|
| 6 |
from sklearn.compose import ColumnTransformer
|
| 7 |
from sklearn.model_selection import train_test_split
|
|
|
|
| 13 |
|
| 14 |
# Load dataset
|
| 15 |
def load_data():
|
| 16 |
+
return pd.read_csv('cancer_prediction_data (2).csv')
|
| 17 |
|
| 18 |
# Data Preprocessing
|
| 19 |
def preprocess_data(df):
|
| 20 |
+
numeric = ['Age', 'Tumor_Size']
|
| 21 |
+
ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
|
| 22 |
+
nominal = ['Gender', 'Family_History', 'Smoking_History']
|
| 23 |
|
| 24 |
preprocess = ColumnTransformer([
|
| 25 |
('num', Pipeline([
|
| 26 |
('imputer', SimpleImputer(strategy='mean')),
|
| 27 |
('scaler', StandardScaler())
|
| 28 |
+
]), numeric),
|
| 29 |
+
('ord', Pipeline([
|
| 30 |
('imputer', SimpleImputer(strategy='most_frequent')),
|
| 31 |
+
('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
|
| 32 |
+
]), ordinal),
|
| 33 |
+
('nom', Pipeline([
|
| 34 |
+
('imputer', SimpleImputer(strategy='most_frequent')),
|
| 35 |
+
('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
|
| 36 |
+
]), nominal)
|
| 37 |
], remainder='passthrough')
|
| 38 |
|
| 39 |
x = df.drop('Cancer_Present', axis=1)
|
|
|
|
| 49 |
'Random Forest': RandomForestClassifier(),
|
| 50 |
'XGBoost': XGBClassifier()
|
| 51 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
pipeline = Pipeline([
|
| 53 |
('preprocessor', preprocess),
|
| 54 |
('classifier', models[model_name])
|
|
|
|
| 63 |
st.markdown("### Select Machine Learning Model")
|
| 64 |
model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
|
| 65 |
if st.button("Train Model"):
|
|
|
|
| 66 |
df = load_data()
|
| 67 |
(x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
|
| 68 |
+
model = train_model(x_train, y_train, preprocess, model_name)
|
| 69 |
+
accuracy = model.score(x_test, y_test)
|
| 70 |
+
st.session_state['trained_model'] = model
|
| 71 |
+
st.session_state['x_train'] = x_train
|
| 72 |
+
st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
st.title("🎗️ Cancer Prediction")
|
| 75 |
|
| 76 |
st.markdown("""<style>.big-font {font-size:20px !important;}</style>
|
| 77 |
<p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
|
| 78 |
|
|
|
|
| 79 |
col1, col2 = st.columns(2)
|
| 80 |
with col1:
|
| 81 |
age = st.slider("Age", 18, 100, 30)
|
|
|
|
| 85 |
|
| 86 |
with col2:
|
| 87 |
smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
|
| 88 |
+
alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
|
| 89 |
+
exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
|
| 90 |
+
gender = st.selectbox("Gender", [0, 1])
|
| 91 |
family_history = st.selectbox("Family History", ["No", "Yes"])
|
| 92 |
|
| 93 |
input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
|
| 94 |
alcohol_consumption, exercise_frequency, gender, family_history]]
|
| 95 |
|
|
|
|
| 96 |
if st.button("Predict Cancer Presence"):
|
| 97 |
if 'trained_model' in st.session_state:
|
| 98 |
model = st.session_state['trained_model']
|
| 99 |
+
x_train = st.session_state['x_train']
|
| 100 |
+
input_df = pd.DataFrame(input_data, columns=x_train.columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
input_transformed = model.named_steps['preprocessor'].transform(input_df)
|
|
|
|
|
|
|
| 102 |
prediction = model.named_steps['classifier'].predict(input_transformed)
|
| 103 |
|
| 104 |
if prediction[0] == 1:
|
|
|
|
| 108 |
st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
|
| 109 |
st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
|
| 110 |
else:
|
| 111 |
+
st.error("Please train a model first!")
|