Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.ensemble import RandomForestClassifier | |
| from xgboost import XGBClassifier | |
| # Load dataset | |
| def load_data(): | |
| return pd.read_csv('cancer_prediction_data (2).csv') | |
| # Data Preprocessing | |
| def preprocess_data(df): | |
| numeric = ['Age', 'Tumor_Size'] | |
| ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency'] | |
| nominal = ['Gender', 'Family_History', 'Smoking_History'] | |
| preprocess = ColumnTransformer([ | |
| ('num', Pipeline([ | |
| ('imputer', SimpleImputer(strategy='mean')), | |
| ('scaler', StandardScaler()) | |
| ]), numeric), | |
| ('ord', Pipeline([ | |
| ('imputer', SimpleImputer(strategy='most_frequent')), | |
| ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)) | |
| ]), ordinal), | |
| ('nom', Pipeline([ | |
| ('imputer', SimpleImputer(strategy='most_frequent')), | |
| ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore')) | |
| ]), nominal) | |
| ], remainder='passthrough') | |
| x = df.drop('Cancer_Present', axis=1) | |
| y = df['Cancer_Present'] | |
| return train_test_split(x, y, test_size=0.2, random_state=23), preprocess | |
| # Train Model | |
| def train_model(x_train, y_train, preprocess, model_name): | |
| models = { | |
| 'Decision Tree': DecisionTreeClassifier(), | |
| 'Logistic Regression': LogisticRegression(), | |
| 'KNN': KNeighborsClassifier(), | |
| 'Random Forest': RandomForestClassifier(), | |
| 'XGBoost': XGBClassifier() | |
| } | |
| pipeline = Pipeline([ | |
| ('preprocessor', preprocess), | |
| ('classifier', models[model_name]) | |
| ]) | |
| pipeline.fit(x_train, y_train) | |
| return pipeline | |
| # Streamlit UI | |
| st.set_page_config(page_title='Cancer Prediction App', layout='wide') | |
| with st.sidebar: | |
| st.markdown("### Select Machine Learning Model") | |
| model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost']) | |
| if st.button("Train Model"): | |
| df = load_data() | |
| (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df) | |
| model = train_model(x_train, y_train, preprocess, model_name) | |
| accuracy = model.score(x_test, y_test) | |
| st.session_state['trained_model'] = model | |
| st.session_state['x_train'] = x_train | |
| st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}") | |
| st.title("ποΈ Cancer Prediction") | |
| st.markdown("""<style>.big-font {font-size:20px !important;}</style> | |
| <p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| age = st.slider("Age", 18, 100, 30) | |
| tumor_size = st.slider("Tumor Size", 1.0, 10.0, 5.0) | |
| tumor_grade = st.selectbox("Tumor Grade", ['High', 'Low', 'Medium']) | |
| symptoms_severity = st.selectbox("Symptoms Severity", ['Mild', 'Moderate', 'Severe']) | |
| with col2: | |
| smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker']) | |
| alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High']) | |
| exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never']) | |
| gender = st.selectbox("Gender", [0, 1]) | |
| family_history = st.selectbox("Family History", ["No", "Yes"]) | |
| input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history, | |
| alcohol_consumption, exercise_frequency, gender, family_history]] | |
| if st.button("Predict Cancer Presence"): | |
| if 'trained_model' in st.session_state: | |
| model = st.session_state['trained_model'] | |
| x_train = st.session_state['x_train'] | |
| # Create DataFrame for input | |
| input_df = pd.DataFrame(input_data, columns=x_train.columns) | |
| # Convert numeric inputs explicitly to float | |
| for col in ['Age', 'Tumor_Size']: | |
| input_df[col] = pd.to_numeric(input_df[col], errors='coerce') | |
| # Apply preprocessing | |
| input_transformed = model.named_steps['preprocessor'].transform(input_df) | |
| # Make prediction | |
| prediction = model.named_steps['classifier'].predict(input_transformed) | |
| if prediction[0] == 1: | |
| st.markdown("<h3 style='color: red;'>Cancer Prediction: Positive π₯</h3>", unsafe_allow_html=True) | |
| st.write("Unfortunately, the model predicts the presence of cancer. Please consult a doctor for further advice.") | |
| else: | |
| st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative π©</h3>", unsafe_allow_html=True) | |
| st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!") | |
| else: | |
| st.error("Please train a model first!") | |