Spaces:

varshitha22
/

Cancer_Prediction

Sleeping

App Files Files Community

varshitha22 commited on Feb 25, 2025

Commit

370eb34

verified ·

1 Parent(s): bf257f0

Update cancer.py

Browse files

Files changed (1) hide show

cancer.py +24 -37

cancer.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st
 import pandas as pd
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.impute import SimpleImputer
 from sklearn.compose import ColumnTransformer
 from sklearn.model_selection import train_test_split
@@ -13,22 +13,27 @@ from xgboost import XGBClassifier
 # Load dataset
 def load_data():
-    return pd.read_csv('https://huggingface.co/spaces/varshitha22/Cancer_Prediction/resolve/main/cancer_prediction_data%20(2).csv')
 # Data Preprocessing
 def preprocess_data(df):
-    categorical_features = df.select_dtypes(include=['object']).columns
-    numerical_features = df.select_dtypes(include=['int64', 'float64']).columns
     preprocess = ColumnTransformer([
         ('num', Pipeline([
             ('imputer', SimpleImputer(strategy='mean')),
             ('scaler', StandardScaler())
-        ]), numerical_features),
-        ('cat', Pipeline([
             ('imputer', SimpleImputer(strategy='most_frequent')),
-            ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
-        ]), categorical_features)
     ], remainder='passthrough')
     x = df.drop('Cancer_Present', axis=1)
@@ -44,10 +49,6 @@ def train_model(x_train, y_train, preprocess, model_name):
         'Random Forest': RandomForestClassifier(),
         'XGBoost': XGBClassifier()
     }
-    if model_name not in models:
-        raise ValueError(f"Model '{model_name}' not recognized. Available models: {list(models.keys())}")
     pipeline = Pipeline([
         ('preprocessor', preprocess),
         ('classifier', models[model_name])
@@ -62,26 +63,19 @@ with st.sidebar:
     st.markdown("### Select Machine Learning Model")
     model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
     if st.button("Train Model"):
-        # Load and preprocess data
         df = load_data()
         (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
-        # Train model
-        try:
-            model = train_model(x_train, y_train, preprocess, model_name)
-            accuracy = model.score(x_test, y_test)
-            st.session_state['trained_model'] = model
-            st.session_state['x_train_columns'] = x_train.columns  # Save column names for future prediction
-            st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
-        except ValueError as e:
-            st.error(f"Error: {e}")
 st.title("🎗️ Cancer Prediction")
 st.markdown("""<style>.big-font {font-size:20px !important;}</style>
 <p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
-# Patient input fields
 col1, col2 = st.columns(2)
 with col1:
     age = st.slider("Age", 18, 100, 30)
@@ -91,27 +85,20 @@ with col1:
 with col2:
     smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
-    alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low', 'Moderate', 'High'])
-    exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly', 'Never'])
-    gender = st.selectbox("Gender", ['Male', "Female"])
     family_history = st.selectbox("Family History", ["No", "Yes"])
 input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
                alcohol_consumption, exercise_frequency, gender, family_history]]
-# Predict cancer presence
 if st.button("Predict Cancer Presence"):
     if 'trained_model' in st.session_state:
         model = st.session_state['trained_model']
-        X_train_columns = st.session_state['x_train_columns']  # Get saved column names
-        # Prepare input data for prediction
-        input_df = pd.DataFrame(input_data, columns=x_train_columns)
-        # Align input data with the model's expected columns
         input_transformed = model.named_steps['preprocessor'].transform(input_df)
-        # Make prediction
         prediction = model.named_steps['classifier'].predict(input_transformed)
         if prediction[0] == 1:
@@ -121,4 +108,4 @@ if st.button("Predict Cancer Presence"):
             st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
             st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
     else:
-        st.error("Please train a model first!")

 import streamlit as st
 import pandas as pd
 from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
 from sklearn.impute import SimpleImputer
 from sklearn.compose import ColumnTransformer
 from sklearn.model_selection import train_test_split
 # Load dataset
 def load_data():
+    return pd.read_csv('cancer_prediction_data (2).csv')
 # Data Preprocessing
 def preprocess_data(df):
+    numeric = ['Age', 'Tumor_Size']
+    ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
+    nominal = ['Gender', 'Family_History', 'Smoking_History']
     preprocess = ColumnTransformer([
         ('num', Pipeline([
             ('imputer', SimpleImputer(strategy='mean')),
             ('scaler', StandardScaler())
+        ]), numeric),
+        ('ord', Pipeline([
             ('imputer', SimpleImputer(strategy='most_frequent')),
+            ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
+        ]), ordinal),
+        ('nom', Pipeline([
+            ('imputer', SimpleImputer(strategy='most_frequent')),
+            ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
+        ]), nominal)
     ], remainder='passthrough')
     x = df.drop('Cancer_Present', axis=1)
         'Random Forest': RandomForestClassifier(),
         'XGBoost': XGBClassifier()
     }
     pipeline = Pipeline([
         ('preprocessor', preprocess),
         ('classifier', models[model_name])
     st.markdown("### Select Machine Learning Model")
     model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
     if st.button("Train Model"):
         df = load_data()
         (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
+        model = train_model(x_train, y_train, preprocess, model_name)
+        accuracy = model.score(x_test, y_test)
+        st.session_state['trained_model'] = model
+        st.session_state['x_train'] = x_train
+        st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
 st.title("🎗️ Cancer Prediction")
 st.markdown("""<style>.big-font {font-size:20px !important;}</style>
 <p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
 col1, col2 = st.columns(2)
 with col1:
     age = st.slider("Age", 18, 100, 30)
 with col2:
     smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
+    alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
+    exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
+    gender = st.selectbox("Gender", [0, 1])
     family_history = st.selectbox("Family History", ["No", "Yes"])
 input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
                alcohol_consumption, exercise_frequency, gender, family_history]]
 if st.button("Predict Cancer Presence"):
     if 'trained_model' in st.session_state:
         model = st.session_state['trained_model']
+        x_train = st.session_state['x_train']
+        input_df = pd.DataFrame(input_data, columns=x_train.columns)
         input_transformed = model.named_steps['preprocessor'].transform(input_df)
         prediction = model.named_steps['classifier'].predict(input_transformed)
         if prediction[0] == 1:
             st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
             st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
     else:
+        st.error("Please train a model first!")