Spaces:

varshitha22
/

Cancer_Prediction

Sleeping

App Files Files Community

varshitha22 commited on Feb 25, 2025

Commit

a41dba3

verified ·

1 Parent(s): b4a2e0d

Create cancer.py

Browse files

Files changed (1) hide show

cancer.py +106 -0

cancer.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
+from sklearn.impute import SimpleImputer
+from sklearn.compose import ColumnTransformer
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import RandomForestClassifier
+from xgboost import XGBClassifier
+# Load dataset
+def load_data():
+    return pd.read_csv('cancer_prediction_data (2).csv')
+# Data Preprocessing
+def preprocess_data(df):
+    numeric = ['Age', 'Tumor_Size']
+    ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
+    nominal = ['Gender', 'Family_History', 'Smoking_History']
+    preprocess = ColumnTransformer([
+        ('num', Pipeline([
+            ('imputer', SimpleImputer(strategy='mean')),
+            ('scaler', StandardScaler())
+        ]), numeric),
+        ('ord', Pipeline([
+            ('imputer', SimpleImputer(strategy='most_frequent')),
+            ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
+        ]), ordinal),
+        ('nom', Pipeline([
+            ('imputer', SimpleImputer(strategy='most_frequent')),
+            ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
+        ]), nominal)
+    ], remainder='passthrough')
+    X = df.drop('Cancer_Present', axis=1)
+    y = df['Cancer_Present']
+    return train_test_split(X, y, test_size=0.2, random_state=23), preprocess
+# Train Model
+def train_model(X_train, y_train, preprocess, model_name):
+    models = {
+        'Decision Tree': DecisionTreeClassifier(),
+        'Logistic Regression': LogisticRegression(),
+        'KNN': KNeighborsClassifier(),
+        'Random Forest': RandomForestClassifier(),
+        'XGBoost': XGBClassifier()
+    }
+    pipeline = Pipeline([
+        ('preprocessor', preprocess),
+        ('classifier', models[model_name])
+    ])
+    pipeline.fit(X_train, y_train)
+    return pipeline
+# Streamlit UI
+st.set_page_config(page_title='Cancer Prediction App', layout='wide')
+with st.sidebar:
+    st.markdown("### Select Machine Learning Model")
+    model_name = st.radio("Choose a Model", ['Decision Tree','Logistic Regression', 'KNN','Random Forest', 'XGBoost']])
+    if st.button("Train Model"):
+        df = load_data()
+        (X_train, X_test, y_train, y_test), preprocess = preprocess_data(df)
+        model = train_model(X_train, y_train, preprocess, model_name)
+        accuracy = model.score(X_test, y_test)
+        st.session_state['trained_model'] = model
+        st.session_state['X_train'] = X_train
+        st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
+st.title("🎗️ Cancer Prediction Using Machine Learning 🎗️")
+st.markdown("""<style>.big-font {font-size:20px !important;}</style>
+<p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
+col1, col2 = st.columns(2)
+with col1:
+    age = st.slider("Age", 18, 100, 30)
+    tumor_size = st.slider("Tumor Size", 1.0, 10.0, 5.0)
+    tumor_grade = st.radio("Tumor Grade", ['High', 'Low', 'Medium'])
+    symptoms_severity = st.radio("Symptoms Severity", ['Mild', 'Moderate', 'Severe'])
+with col2:
+    smoking_history = st.radio("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
+    alcohol_consumption = st.radio("Alcohol Consumption", ['Low', 'Moderate', 'High'])
+    exercise_frequency = st.radio("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly', 'Never'])
+    gender = st.radio("Gender", ['Male', "Female"])
+    family_history = st.radio("Family History", ["No", "Yes"])
+input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
+               alcohol_consumption, exercise_frequency, gender, family_history]]
+if st.button("Predict Cancer Presence"):
+    if 'trained_model' in st.session_state:
+        model = st.session_state['trained_model']
+        X_train = st.session_state['X_train']
+        input_df = pd.DataFrame(input_data, columns=X_train.columns)
+        input_transformed = model.named_steps['preprocessor'].transform(input_df)
+        prediction = model.named_steps['classifier'].predict(input_transformed)
+        st.markdown(f"*Prediction Result: {'🟥 Positive' if prediction[0] == 1 else '🟩 Negative'}*")
+    else:
+        st.error("Please train a model first!")