varshitha22 commited on
Commit
54d3c5a
·
verified ·
1 Parent(s): b1ff86e

Update cancer.py

Browse files
Files changed (1) hide show
  1. cancer.py +53 -40
cancer.py CHANGED
@@ -13,68 +13,81 @@ from xgboost import XGBClassifier
13
 
14
  # Load dataset
15
  def load_data():
16
- return pd.read_csv('cancer_prediction_data (2).csv')
17
 
18
  # Data Preprocessing
19
  def preprocess_data(df):
20
- numeric = ['Age', 'Tumor_Size']
21
- ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
22
- nominal = ['Gender', 'Family_History', 'Smoking_History']
23
 
24
  preprocess = ColumnTransformer([
25
  ('num', Pipeline([
26
  ('imputer', SimpleImputer(strategy='mean')),
27
  ('scaler', StandardScaler())
28
- ]), numeric),
29
- ('ord', Pipeline([
30
  ('imputer', SimpleImputer(strategy='most_frequent')),
31
- ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
32
- ]), ordinal),
33
- ('nom', Pipeline([
34
- ('imputer', SimpleImputer(strategy='most_frequent')),
35
- ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
36
- ]), nominal)
37
  ], remainder='passthrough')
38
 
39
- x = df.drop('Cancer_Present', axis=1)
40
  y = df['Cancer_Present']
41
- return train_test_split(x, y, test_size=0.2, random_state=23), preprocess
42
-
43
- # Train Model
44
- def train_model(x_train, y_train, preprocess, model_name):
45
- models = {
46
- 'Decision Tree': DecisionTreeClassifier(),
47
- 'Logistic Regression': LogisticRegression(),
48
- 'KNN': KNeighborsClassifier(),
49
- 'Random Forest': RandomForestClassifier(),
50
- 'XGBoost': XGBClassifier()
51
- }
52
  pipeline = Pipeline([
53
  ('preprocessor', preprocess),
54
- ('classifier', models[model_name])
55
  ])
56
- pipeline.fit(x_train, y_train)
57
  return pipeline
58
 
59
  # Streamlit UI
60
  st.set_page_config(page_title='Cancer Prediction App', layout='wide')
61
 
62
  with st.sidebar:
 
63
  st.markdown("### Select Machine Learning Model")
64
  model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
65
- if st.button("Train Model"):
66
- df = load_data()
67
- (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
68
- model = train_model(x_train, y_train, preprocess, model_name)
69
- accuracy = model.score(x_test, y_test)
70
- st.session_state['trained_model'] = model
71
- st.session_state['x_train'] = x_train
72
- st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
73
 
74
- st.title("🎗️ Cancer Prediction")
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- st.markdown("""<style>.big-font {font-size:20px !important;}</style>
77
- <p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  col1, col2 = st.columns(2)
80
  with col1:
@@ -87,7 +100,7 @@ with col2:
87
  smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
88
  alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
89
  exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
90
- gender = st.selectbox("Gender", [0, 1])
91
  family_history = st.selectbox("Family History", ["No", "Yes"])
92
 
93
  input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
@@ -96,8 +109,8 @@ input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
96
  if st.button("Predict Cancer Presence"):
97
  if 'trained_model' in st.session_state:
98
  model = st.session_state['trained_model']
99
- x_train = st.session_state['x_train']
100
- input_df = pd.DataFrame(input_data, columns=x_train.columns)
101
  input_transformed = model.named_steps['preprocessor'].transform(input_df)
102
  prediction = model.named_steps['classifier'].predict(input_transformed)
103
 
 
13
 
14
  # Load dataset
15
  def load_data():
16
+ return pd.read_csv('.csv')
17
 
18
  # Data Preprocessing
19
  def preprocess_data(df):
20
+ categorical_features = df.select_dtypes(include=['object']).columns
21
+ numerical_features = df.select_dtypes(include=['int64', 'float64']).columns
 
22
 
23
  preprocess = ColumnTransformer([
24
  ('num', Pipeline([
25
  ('imputer', SimpleImputer(strategy='mean')),
26
  ('scaler', StandardScaler())
27
+ ]), numerical_features),
28
+ ('cat', Pipeline([
29
  ('imputer', SimpleImputer(strategy='most_frequent')),
30
+ ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
31
+ ]), categorical_features)
 
 
 
 
32
  ], remainder='passthrough')
33
 
34
+ X = df.drop('Cancer_Present', axis=1)
35
  y = df['Cancer_Present']
36
+ return train_test_split(X, y, test_size=0.2, random_state=23), preprocess
37
+
38
+ # Train Models
39
+ def train_model(model, X_train, y_train, preprocess):
 
 
 
 
 
 
 
40
  pipeline = Pipeline([
41
  ('preprocessor', preprocess),
42
+ ('classifier', model)
43
  ])
44
+ pipeline.fit(X_train, y_train)
45
  return pipeline
46
 
47
  # Streamlit UI
48
  st.set_page_config(page_title='Cancer Prediction App', layout='wide')
49
 
50
  with st.sidebar:
51
+ st.image('https://via.placeholder.com/300x150.png?text=Cancer+Prediction')
52
  st.markdown("### Select Machine Learning Model")
53
  model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
 
 
 
 
 
 
 
 
54
 
55
+ # Load Data
56
+ df = load_data()
57
+ (X_train, X_test, y_train, y_test), preprocess = preprocess_data(df)
58
+
59
+ # Define the models
60
+ models = {
61
+ 'Decision Tree': DecisionTreeClassifier(),
62
+ 'Logistic Regression': LogisticRegression(),
63
+ 'KNN': KNeighborsClassifier(),
64
+ 'Random Forest': RandomForestClassifier(),
65
+ 'XGBoost': XGBClassifier()
66
+ }
67
 
68
+ best_accuracy = 0
69
+ best_model = None
70
+
71
+ # Train and evaluate the selected model
72
+ if st.button("Train Model"):
73
+ st.write("Training the model...")
74
+ model = models[model_name]
75
+ pipeline = train_model(model, X_train, y_train, preprocess)
76
+ accuracy = pipeline.score(X_test, y_test)
77
+ st.session_state['trained_model'] = pipeline
78
+
79
+ if accuracy > best_accuracy:
80
+ best_accuracy = accuracy
81
+ best_model = model_name
82
+
83
+ st.success(f"Model Trained! Accuracy: {accuracy:.2f}")
84
+
85
+ # Show the best model and its accuracy
86
+ if best_model:
87
+ st.write(f"The best model so far is **{best_model}** with an accuracy of **{best_accuracy:.2f}**")
88
+
89
+ # Input form for prediction
90
+ st.title("🎗️ Cancer Prediction")
91
 
92
  col1, col2 = st.columns(2)
93
  with col1:
 
100
  smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
101
  alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
102
  exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
103
+ gender = st.selectbox("Gender", ['Male', "Female"])
104
  family_history = st.selectbox("Family History", ["No", "Yes"])
105
 
106
  input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
 
109
  if st.button("Predict Cancer Presence"):
110
  if 'trained_model' in st.session_state:
111
  model = st.session_state['trained_model']
112
+ X_train = st.session_state['X_train']
113
+ input_df = pd.DataFrame(input_data, columns=X_train.columns)
114
  input_transformed = model.named_steps['preprocessor'].transform(input_df)
115
  prediction = model.named_steps['classifier'].predict(input_transformed)
116