varshitha22 commited on
Commit
23e27fd
·
verified ·
1 Parent(s): 90e17d8

Update cancer.py

Browse files
Files changed (1) hide show
  1. cancer.py +33 -41
cancer.py CHANGED
@@ -13,7 +13,7 @@ from xgboost import XGBClassifier
13
 
14
  # Load dataset
15
  def load_data():
16
- return pd.read_csv("https://huggingface.co/spaces/varshitha22/Cancer_Prediction/resolve/main/cancer_prediction_data%20(2).csv")
17
 
18
  # Data Preprocessing
19
  def preprocess_data(df):
@@ -35,9 +35,8 @@ def preprocess_data(df):
35
  y = df['Cancer_Present']
36
  return train_test_split(X, y, test_size=0.2, random_state=23), preprocess
37
 
38
- # Train Models
39
  # Train Model
40
- def train_model(x_train, y_train, preprocess, model_name):
41
  models = {
42
  'Decision Tree': DecisionTreeClassifier(),
43
  'Logistic Regression': LogisticRegression(),
@@ -45,11 +44,16 @@ def train_model(x_train, y_train, preprocess, model_name):
45
  'Random Forest': RandomForestClassifier(),
46
  'XGBoost': XGBClassifier()
47
  }
 
 
 
 
48
  pipeline = Pipeline([
49
  ('preprocessor', preprocess),
50
  ('classifier', models[model_name])
51
  ])
52
- pipeline.fit(x_train, y_train)
 
53
 
54
  # Streamlit UI
55
  st.set_page_config(page_title='Cancer Prediction App', layout='wide')
@@ -58,44 +62,27 @@ with st.sidebar:
58
  st.image('https://via.placeholder.com/300x150.png?text=Cancer+Prediction')
59
  st.markdown("### Select Machine Learning Model")
60
  model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- # Load Data
63
- df = load_data()
64
- (X_train, X_test, y_train, y_test), preprocess = preprocess_data(df)
65
-
66
- # Define the models
67
- models = {
68
- 'Decision Tree': DecisionTreeClassifier(),
69
- 'Logistic Regression': LogisticRegression(),
70
- 'KNN': KNeighborsClassifier(),
71
- 'Random Forest': RandomForestClassifier(),
72
- 'XGBoost': XGBClassifier()
73
- }
74
-
75
- best_accuracy = 0
76
- best_model = None
77
-
78
- # Train and evaluate the selected model
79
- if st.button("Train Model"):
80
- st.write("Training the model...")
81
- model = models[model_name]
82
- pipeline = train_model(model, X_train, y_train, preprocess)
83
- accuracy = pipeline.score(X_test, y_test)
84
- st.session_state['trained_model'] = pipeline
85
-
86
- if accuracy > best_accuracy:
87
- best_accuracy = accuracy
88
- best_model = model_name
89
-
90
- st.success(f"Model Trained! Accuracy: {accuracy:.2f}")
91
-
92
- # Show the best model and its accuracy
93
- if best_model:
94
- st.write(f"The best model so far is **{best_model}** with an accuracy of **{best_accuracy:.2f}**")
95
-
96
- # Input form for prediction
97
  st.title("🎗️ Cancer Prediction")
98
 
 
 
 
 
99
  col1, col2 = st.columns(2)
100
  with col1:
101
  age = st.slider("Age", 18, 100, 30)
@@ -105,20 +92,25 @@ with col1:
105
 
106
  with col2:
107
  smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
108
- alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
109
- exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
110
  gender = st.selectbox("Gender", ['Male', "Female"])
111
  family_history = st.selectbox("Family History", ["No", "Yes"])
112
 
113
  input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
114
  alcohol_consumption, exercise_frequency, gender, family_history]]
115
 
 
116
  if st.button("Predict Cancer Presence"):
117
  if 'trained_model' in st.session_state:
118
  model = st.session_state['trained_model']
119
  X_train = st.session_state['X_train']
 
 
120
  input_df = pd.DataFrame(input_data, columns=X_train.columns)
121
  input_transformed = model.named_steps['preprocessor'].transform(input_df)
 
 
122
  prediction = model.named_steps['classifier'].predict(input_transformed)
123
 
124
  if prediction[0] == 1:
 
13
 
14
  # Load dataset
15
  def load_data():
16
+ return pd.read_csv('cancer_prediction_data (2).csv')
17
 
18
  # Data Preprocessing
19
  def preprocess_data(df):
 
35
  y = df['Cancer_Present']
36
  return train_test_split(X, y, test_size=0.2, random_state=23), preprocess
37
 
 
38
  # Train Model
39
+ def train_model(X_train, y_train, preprocess, model_name):
40
  models = {
41
  'Decision Tree': DecisionTreeClassifier(),
42
  'Logistic Regression': LogisticRegression(),
 
44
  'Random Forest': RandomForestClassifier(),
45
  'XGBoost': XGBClassifier()
46
  }
47
+
48
+ if model_name not in models:
49
+ raise ValueError(f"Model '{model_name}' not recognized. Available models: {list(models.keys())}")
50
+
51
  pipeline = Pipeline([
52
  ('preprocessor', preprocess),
53
  ('classifier', models[model_name])
54
  ])
55
+ pipeline.fit(X_train, y_train)
56
+ return pipeline
57
 
58
  # Streamlit UI
59
  st.set_page_config(page_title='Cancer Prediction App', layout='wide')
 
62
  st.image('https://via.placeholder.com/300x150.png?text=Cancer+Prediction')
63
  st.markdown("### Select Machine Learning Model")
64
  model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
65
+ if st.button("Train Model"):
66
+ # Load and preprocess data
67
+ df = load_data()
68
+ (X_train, X_test, y_train, y_test), preprocess = preprocess_data(df)
69
+
70
+ # Train model
71
+ try:
72
+ model = train_model(X_train, y_train, preprocess, model_name)
73
+ accuracy = model.score(X_test, y_test)
74
+ st.session_state['trained_model'] = model
75
+ st.session_state['X_train'] = X_train
76
+ st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
77
+ except ValueError as e:
78
+ st.error(f"Error: {e}")
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  st.title("🎗️ Cancer Prediction")
81
 
82
+ st.markdown("""<style>.big-font {font-size:20px !important;}</style>
83
+ <p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
84
+
85
+ # Patient input fields
86
  col1, col2 = st.columns(2)
87
  with col1:
88
  age = st.slider("Age", 18, 100, 30)
 
92
 
93
  with col2:
94
  smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
95
+ alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low', 'Moderate', 'High'])
96
+ exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly', 'Never'])
97
  gender = st.selectbox("Gender", ['Male', "Female"])
98
  family_history = st.selectbox("Family History", ["No", "Yes"])
99
 
100
  input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
101
  alcohol_consumption, exercise_frequency, gender, family_history]]
102
 
103
+ # Predict cancer presence
104
  if st.button("Predict Cancer Presence"):
105
  if 'trained_model' in st.session_state:
106
  model = st.session_state['trained_model']
107
  X_train = st.session_state['X_train']
108
+
109
+ # Prepare input data for prediction
110
  input_df = pd.DataFrame(input_data, columns=X_train.columns)
111
  input_transformed = model.named_steps['preprocessor'].transform(input_df)
112
+
113
+ # Make prediction
114
  prediction = model.named_steps['classifier'].predict(input_transformed)
115
 
116
  if prediction[0] == 1: