Spaces:

varshitha22
/

Cancer_Prediction

Sleeping

App Files Files Community

varshitha22 commited on Feb 25, 2025

Commit

54d3c5a

verified ·

1 Parent(s): b1ff86e

Update cancer.py

Browse files

Files changed (1) hide show

cancer.py +53 -40

cancer.py CHANGED Viewed

@@ -13,68 +13,81 @@ from xgboost import XGBClassifier
 # Load dataset
 def load_data():
-    return pd.read_csv('cancer_prediction_data (2).csv')
 # Data Preprocessing
 def preprocess_data(df):
-    numeric = ['Age', 'Tumor_Size']
-    ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
-    nominal = ['Gender', 'Family_History', 'Smoking_History']
     preprocess = ColumnTransformer([
         ('num', Pipeline([
             ('imputer', SimpleImputer(strategy='mean')),
             ('scaler', StandardScaler())
-        ]), numeric),
-        ('ord', Pipeline([
             ('imputer', SimpleImputer(strategy='most_frequent')),
-            ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
-        ]), ordinal),
-        ('nom', Pipeline([
-            ('imputer', SimpleImputer(strategy='most_frequent')),
-            ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
-        ]), nominal)
     ], remainder='passthrough')
-    x = df.drop('Cancer_Present', axis=1)
     y = df['Cancer_Present']
-    return train_test_split(x, y, test_size=0.2, random_state=23), preprocess
-# Train Model
-def train_model(x_train, y_train, preprocess, model_name):
-    models = {
-        'Decision Tree': DecisionTreeClassifier(),
-        'Logistic Regression': LogisticRegression(),
-        'KNN': KNeighborsClassifier(),
-        'Random Forest': RandomForestClassifier(),
-        'XGBoost': XGBClassifier()
-    }
     pipeline = Pipeline([
         ('preprocessor', preprocess),
-        ('classifier', models[model_name])
     ])
-    pipeline.fit(x_train, y_train)
     return pipeline
 # Streamlit UI
 st.set_page_config(page_title='Cancer Prediction App', layout='wide')
 with st.sidebar:
     st.markdown("### Select Machine Learning Model")
     model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
-    if st.button("Train Model"):
-        df = load_data()
-        (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
-        model = train_model(x_train, y_train, preprocess, model_name)
-        accuracy = model.score(x_test, y_test)
-        st.session_state['trained_model'] = model
-        st.session_state['x_train'] = x_train
-        st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
-st.title("🎗️ Cancer Prediction")
-st.markdown("""<style>.big-font {font-size:20px !important;}</style>
-<p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
 col1, col2 = st.columns(2)
 with col1:
@@ -87,7 +100,7 @@ with col2:
     smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
     alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
     exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
-    gender = st.selectbox("Gender", [0, 1])
     family_history = st.selectbox("Family History", ["No", "Yes"])
 input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
@@ -96,8 +109,8 @@ input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
 if st.button("Predict Cancer Presence"):
     if 'trained_model' in st.session_state:
         model = st.session_state['trained_model']
-        x_train = st.session_state['x_train']
-        input_df = pd.DataFrame(input_data, columns=x_train.columns)
         input_transformed = model.named_steps['preprocessor'].transform(input_df)
         prediction = model.named_steps['classifier'].predict(input_transformed)

 # Load dataset
 def load_data():
+    return pd.read_csv('.csv')
 # Data Preprocessing
 def preprocess_data(df):
+    categorical_features = df.select_dtypes(include=['object']).columns
+    numerical_features = df.select_dtypes(include=['int64', 'float64']).columns
     preprocess = ColumnTransformer([
         ('num', Pipeline([
             ('imputer', SimpleImputer(strategy='mean')),
             ('scaler', StandardScaler())
+        ]), numerical_features),
+        ('cat', Pipeline([
             ('imputer', SimpleImputer(strategy='most_frequent')),
+            ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
+        ]), categorical_features)
     ], remainder='passthrough')
+    X = df.drop('Cancer_Present', axis=1)
     y = df['Cancer_Present']
+    return train_test_split(X, y, test_size=0.2, random_state=23), preprocess
+# Train Models
+def train_model(model, X_train, y_train, preprocess):
     pipeline = Pipeline([
         ('preprocessor', preprocess),
+        ('classifier', model)
     ])
+    pipeline.fit(X_train, y_train)
     return pipeline
 # Streamlit UI
 st.set_page_config(page_title='Cancer Prediction App', layout='wide')
 with st.sidebar:
+    st.image('https://via.placeholder.com/300x150.png?text=Cancer+Prediction')
     st.markdown("### Select Machine Learning Model")
     model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
+# Load Data
+df = load_data()
+(X_train, X_test, y_train, y_test), preprocess = preprocess_data(df)
+# Define the models
+models = {
+    'Decision Tree': DecisionTreeClassifier(),
+    'Logistic Regression': LogisticRegression(),
+    'KNN': KNeighborsClassifier(),
+    'Random Forest': RandomForestClassifier(),
+    'XGBoost': XGBClassifier()
+}
+best_accuracy = 0
+best_model = None
+# Train and evaluate the selected model
+if st.button("Train Model"):
+    st.write("Training the model...")
+    model = models[model_name]
+    pipeline = train_model(model, X_train, y_train, preprocess)
+    accuracy = pipeline.score(X_test, y_test)
+    st.session_state['trained_model'] = pipeline
+    if accuracy > best_accuracy:
+        best_accuracy = accuracy
+        best_model = model_name
+    st.success(f"Model Trained! Accuracy: {accuracy:.2f}")
+# Show the best model and its accuracy
+if best_model:
+    st.write(f"The best model so far is **{best_model}** with an accuracy of **{best_accuracy:.2f}**")
+# Input form for prediction
+st.title("🎗️ Cancer Prediction")
 col1, col2 = st.columns(2)
 with col1:
     smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
     alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
     exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
+    gender = st.selectbox("Gender", ['Male', "Female"])
     family_history = st.selectbox("Family History", ["No", "Yes"])
 input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
 if st.button("Predict Cancer Presence"):
     if 'trained_model' in st.session_state:
         model = st.session_state['trained_model']
+        X_train = st.session_state['X_train']
+        input_df = pd.DataFrame(input_data, columns=X_train.columns)
         input_transformed = model.named_steps['preprocessor'].transform(input_df)
         prediction = model.named_steps['classifier'].predict(input_transformed)