Spaces:

varshitha22
/

Cancer_Prediction

Sleeping

App Files Files Community

varshitha22 commited on Feb 25, 2025

Commit

23e27fd

verified ·

1 Parent(s): 90e17d8

Update cancer.py

Browse files

Files changed (1) hide show

cancer.py +33 -41

cancer.py CHANGED Viewed

@@ -13,7 +13,7 @@ from xgboost import XGBClassifier
 # Load dataset
 def load_data():
-    return pd.read_csv("https://huggingface.co/spaces/varshitha22/Cancer_Prediction/resolve/main/cancer_prediction_data%20(2).csv")
 # Data Preprocessing
 def preprocess_data(df):
@@ -35,9 +35,8 @@ def preprocess_data(df):
     y = df['Cancer_Present']
     return train_test_split(X, y, test_size=0.2, random_state=23), preprocess
-# Train Models
 # Train Model
-def train_model(x_train, y_train, preprocess, model_name):
     models = {
         'Decision Tree': DecisionTreeClassifier(),
         'Logistic Regression': LogisticRegression(),
@@ -45,11 +44,16 @@ def train_model(x_train, y_train, preprocess, model_name):
         'Random Forest': RandomForestClassifier(),
         'XGBoost': XGBClassifier()
     }
     pipeline = Pipeline([
         ('preprocessor', preprocess),
         ('classifier', models[model_name])
     ])
-    pipeline.fit(x_train, y_train)
 # Streamlit UI
 st.set_page_config(page_title='Cancer Prediction App', layout='wide')
@@ -58,44 +62,27 @@ with st.sidebar:
     st.image('https://via.placeholder.com/300x150.png?text=Cancer+Prediction')
     st.markdown("### Select Machine Learning Model")
     model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
-# Load Data
-df = load_data()
-(X_train, X_test, y_train, y_test), preprocess = preprocess_data(df)
-# Define the models
-models = {
-    'Decision Tree': DecisionTreeClassifier(),
-    'Logistic Regression': LogisticRegression(),
-    'KNN': KNeighborsClassifier(),
-    'Random Forest': RandomForestClassifier(),
-    'XGBoost': XGBClassifier()
-}
-best_accuracy = 0
-best_model = None
-# Train and evaluate the selected model
-if st.button("Train Model"):
-    st.write("Training the model...")
-    model = models[model_name]
-    pipeline = train_model(model, X_train, y_train, preprocess)
-    accuracy = pipeline.score(X_test, y_test)
-    st.session_state['trained_model'] = pipeline
-    if accuracy > best_accuracy:
-        best_accuracy = accuracy
-        best_model = model_name
-    st.success(f"Model Trained! Accuracy: {accuracy:.2f}")
-# Show the best model and its accuracy
-if best_model:
-    st.write(f"The best model so far is **{best_model}** with an accuracy of **{best_accuracy:.2f}**")
-# Input form for prediction
 st.title("🎗️ Cancer Prediction")
 col1, col2 = st.columns(2)
 with col1:
     age = st.slider("Age", 18, 100, 30)
@@ -105,20 +92,25 @@ with col1:
 with col2:
     smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
-    alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
-    exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
     gender = st.selectbox("Gender", ['Male', "Female"])
     family_history = st.selectbox("Family History", ["No", "Yes"])
 input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
                alcohol_consumption, exercise_frequency, gender, family_history]]
 if st.button("Predict Cancer Presence"):
     if 'trained_model' in st.session_state:
         model = st.session_state['trained_model']
         X_train = st.session_state['X_train']
         input_df = pd.DataFrame(input_data, columns=X_train.columns)
         input_transformed = model.named_steps['preprocessor'].transform(input_df)
         prediction = model.named_steps['classifier'].predict(input_transformed)
         if prediction[0] == 1:

 # Load dataset
 def load_data():
+    return pd.read_csv('cancer_prediction_data (2).csv')
 # Data Preprocessing
 def preprocess_data(df):
     y = df['Cancer_Present']
     return train_test_split(X, y, test_size=0.2, random_state=23), preprocess
 # Train Model
+def train_model(X_train, y_train, preprocess, model_name):
     models = {
         'Decision Tree': DecisionTreeClassifier(),
         'Logistic Regression': LogisticRegression(),
         'Random Forest': RandomForestClassifier(),
         'XGBoost': XGBClassifier()
     }
+    if model_name not in models:
+        raise ValueError(f"Model '{model_name}' not recognized. Available models: {list(models.keys())}")
     pipeline = Pipeline([
         ('preprocessor', preprocess),
         ('classifier', models[model_name])
     ])
+    pipeline.fit(X_train, y_train)
+    return pipeline
 # Streamlit UI
 st.set_page_config(page_title='Cancer Prediction App', layout='wide')
     st.image('https://via.placeholder.com/300x150.png?text=Cancer+Prediction')
     st.markdown("### Select Machine Learning Model")
     model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
+    if st.button("Train Model"):
+        # Load and preprocess data
+        df = load_data()
+        (X_train, X_test, y_train, y_test), preprocess = preprocess_data(df)
+        # Train model
+        try:
+            model = train_model(X_train, y_train, preprocess, model_name)
+            accuracy = model.score(X_test, y_test)
+            st.session_state['trained_model'] = model
+            st.session_state['X_train'] = X_train
+            st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
+        except ValueError as e:
+            st.error(f"Error: {e}")
 st.title("🎗️ Cancer Prediction")
+st.markdown("""<style>.big-font {font-size:20px !important;}</style>
+<p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
+# Patient input fields
 col1, col2 = st.columns(2)
 with col1:
     age = st.slider("Age", 18, 100, 30)
 with col2:
     smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
+    alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low', 'Moderate', 'High'])
+    exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly', 'Never'])
     gender = st.selectbox("Gender", ['Male', "Female"])
     family_history = st.selectbox("Family History", ["No", "Yes"])
 input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
                alcohol_consumption, exercise_frequency, gender, family_history]]
+# Predict cancer presence
 if st.button("Predict Cancer Presence"):
     if 'trained_model' in st.session_state:
         model = st.session_state['trained_model']
         X_train = st.session_state['X_train']
+        # Prepare input data for prediction
         input_df = pd.DataFrame(input_data, columns=X_train.columns)
         input_transformed = model.named_steps['preprocessor'].transform(input_df)
+        # Make prediction
         prediction = model.named_steps['classifier'].predict(input_transformed)
         if prediction[0] == 1: