varshitha22 commited on
Commit
370eb34
·
verified ·
1 Parent(s): bf257f0

Update cancer.py

Browse files
Files changed (1) hide show
  1. cancer.py +24 -37
cancer.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  import pandas as pd
3
  from sklearn.pipeline import Pipeline
4
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
5
  from sklearn.impute import SimpleImputer
6
  from sklearn.compose import ColumnTransformer
7
  from sklearn.model_selection import train_test_split
@@ -13,22 +13,27 @@ from xgboost import XGBClassifier
13
 
14
  # Load dataset
15
  def load_data():
16
- return pd.read_csv('https://huggingface.co/spaces/varshitha22/Cancer_Prediction/resolve/main/cancer_prediction_data%20(2).csv')
17
 
18
  # Data Preprocessing
19
  def preprocess_data(df):
20
- categorical_features = df.select_dtypes(include=['object']).columns
21
- numerical_features = df.select_dtypes(include=['int64', 'float64']).columns
 
22
 
23
  preprocess = ColumnTransformer([
24
  ('num', Pipeline([
25
  ('imputer', SimpleImputer(strategy='mean')),
26
  ('scaler', StandardScaler())
27
- ]), numerical_features),
28
- ('cat', Pipeline([
29
  ('imputer', SimpleImputer(strategy='most_frequent')),
30
- ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
31
- ]), categorical_features)
 
 
 
 
32
  ], remainder='passthrough')
33
 
34
  x = df.drop('Cancer_Present', axis=1)
@@ -44,10 +49,6 @@ def train_model(x_train, y_train, preprocess, model_name):
44
  'Random Forest': RandomForestClassifier(),
45
  'XGBoost': XGBClassifier()
46
  }
47
-
48
- if model_name not in models:
49
- raise ValueError(f"Model '{model_name}' not recognized. Available models: {list(models.keys())}")
50
-
51
  pipeline = Pipeline([
52
  ('preprocessor', preprocess),
53
  ('classifier', models[model_name])
@@ -62,26 +63,19 @@ with st.sidebar:
62
  st.markdown("### Select Machine Learning Model")
63
  model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
64
  if st.button("Train Model"):
65
- # Load and preprocess data
66
  df = load_data()
67
  (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
68
-
69
- # Train model
70
- try:
71
- model = train_model(x_train, y_train, preprocess, model_name)
72
- accuracy = model.score(x_test, y_test)
73
- st.session_state['trained_model'] = model
74
- st.session_state['x_train_columns'] = x_train.columns # Save column names for future prediction
75
- st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
76
- except ValueError as e:
77
- st.error(f"Error: {e}")
78
 
79
  st.title("🎗️ Cancer Prediction")
80
 
81
  st.markdown("""<style>.big-font {font-size:20px !important;}</style>
82
  <p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
83
 
84
- # Patient input fields
85
  col1, col2 = st.columns(2)
86
  with col1:
87
  age = st.slider("Age", 18, 100, 30)
@@ -91,27 +85,20 @@ with col1:
91
 
92
  with col2:
93
  smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
94
- alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low', 'Moderate', 'High'])
95
- exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly', 'Never'])
96
- gender = st.selectbox("Gender", ['Male', "Female"])
97
  family_history = st.selectbox("Family History", ["No", "Yes"])
98
 
99
  input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
100
  alcohol_consumption, exercise_frequency, gender, family_history]]
101
 
102
- # Predict cancer presence
103
  if st.button("Predict Cancer Presence"):
104
  if 'trained_model' in st.session_state:
105
  model = st.session_state['trained_model']
106
- X_train_columns = st.session_state['x_train_columns'] # Get saved column names
107
-
108
- # Prepare input data for prediction
109
- input_df = pd.DataFrame(input_data, columns=x_train_columns)
110
-
111
- # Align input data with the model's expected columns
112
  input_transformed = model.named_steps['preprocessor'].transform(input_df)
113
-
114
- # Make prediction
115
  prediction = model.named_steps['classifier'].predict(input_transformed)
116
 
117
  if prediction[0] == 1:
@@ -121,4 +108,4 @@ if st.button("Predict Cancer Presence"):
121
  st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
122
  st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
123
  else:
124
- st.error("Please train a model first!")
 
1
  import streamlit as st
2
  import pandas as pd
3
  from sklearn.pipeline import Pipeline
4
+ from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
5
  from sklearn.impute import SimpleImputer
6
  from sklearn.compose import ColumnTransformer
7
  from sklearn.model_selection import train_test_split
 
13
 
14
  # Load dataset
15
  def load_data():
16
+ return pd.read_csv('cancer_prediction_data (2).csv')
17
 
18
  # Data Preprocessing
19
  def preprocess_data(df):
20
+ numeric = ['Age', 'Tumor_Size']
21
+ ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
22
+ nominal = ['Gender', 'Family_History', 'Smoking_History']
23
 
24
  preprocess = ColumnTransformer([
25
  ('num', Pipeline([
26
  ('imputer', SimpleImputer(strategy='mean')),
27
  ('scaler', StandardScaler())
28
+ ]), numeric),
29
+ ('ord', Pipeline([
30
  ('imputer', SimpleImputer(strategy='most_frequent')),
31
+ ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
32
+ ]), ordinal),
33
+ ('nom', Pipeline([
34
+ ('imputer', SimpleImputer(strategy='most_frequent')),
35
+ ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
36
+ ]), nominal)
37
  ], remainder='passthrough')
38
 
39
  x = df.drop('Cancer_Present', axis=1)
 
49
  'Random Forest': RandomForestClassifier(),
50
  'XGBoost': XGBClassifier()
51
  }
 
 
 
 
52
  pipeline = Pipeline([
53
  ('preprocessor', preprocess),
54
  ('classifier', models[model_name])
 
63
  st.markdown("### Select Machine Learning Model")
64
  model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
65
  if st.button("Train Model"):
 
66
  df = load_data()
67
  (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
68
+ model = train_model(x_train, y_train, preprocess, model_name)
69
+ accuracy = model.score(x_test, y_test)
70
+ st.session_state['trained_model'] = model
71
+ st.session_state['x_train'] = x_train
72
+ st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
 
 
 
 
 
73
 
74
  st.title("🎗️ Cancer Prediction")
75
 
76
  st.markdown("""<style>.big-font {font-size:20px !important;}</style>
77
  <p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
78
 
 
79
  col1, col2 = st.columns(2)
80
  with col1:
81
  age = st.slider("Age", 18, 100, 30)
 
85
 
86
  with col2:
87
  smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
88
+ alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
89
+ exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
90
+ gender = st.selectbox("Gender", [0, 1])
91
  family_history = st.selectbox("Family History", ["No", "Yes"])
92
 
93
  input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
94
  alcohol_consumption, exercise_frequency, gender, family_history]]
95
 
 
96
  if st.button("Predict Cancer Presence"):
97
  if 'trained_model' in st.session_state:
98
  model = st.session_state['trained_model']
99
+ x_train = st.session_state['x_train']
100
+ input_df = pd.DataFrame(input_data, columns=x_train.columns)
 
 
 
 
101
  input_transformed = model.named_steps['preprocessor'].transform(input_df)
 
 
102
  prediction = model.named_steps['classifier'].predict(input_transformed)
103
 
104
  if prediction[0] == 1:
 
108
  st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
109
  st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
110
  else:
111
+ st.error("Please train a model first!")