import streamlit as st import pandas as pd from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestClassifier from xgboost import XGBClassifier # Load dataset def load_data(): return pd.read_csv('cancer_prediction_data (2).csv') # Data Preprocessing def preprocess_data(df): numeric = ['Age', 'Tumor_Size'] ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency'] nominal = ['Gender', 'Family_History', 'Smoking_History'] preprocess = ColumnTransformer([ ('num', Pipeline([ ('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler()) ]), numeric), ('ord', Pipeline([ ('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)) ]), ordinal), ('nom', Pipeline([ ('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore')) ]), nominal) ], remainder='passthrough') x = df.drop('Cancer_Present', axis=1) y = df['Cancer_Present'] return train_test_split(x, y, test_size=0.2, random_state=23), preprocess # Train Model def train_model(x_train, y_train, preprocess, model_name): models = { 'Decision Tree': DecisionTreeClassifier(), 'Logistic Regression': LogisticRegression(), 'KNN': KNeighborsClassifier(), 'Random Forest': RandomForestClassifier(), 'XGBoost': XGBClassifier() } pipeline = Pipeline([ ('preprocessor', preprocess), ('classifier', models[model_name]) ]) pipeline.fit(x_train, y_train) return pipeline # Streamlit UI st.set_page_config(page_title='Cancer Prediction App', layout='wide') with st.sidebar: st.markdown("### Select Machine Learning Model") model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost']) if st.button("Train Model"): df = load_data() (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df) model = train_model(x_train, y_train, preprocess, model_name) accuracy = model.score(x_test, y_test) st.session_state['trained_model'] = model st.session_state['x_train'] = x_train st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}") st.title("🎗️ Cancer Prediction") st.markdown("""
Provide patient details below to predict cancer presence:
""", unsafe_allow_html=True) col1, col2 = st.columns(2) with col1: age = st.slider("Age", 18, 100, 30) tumor_size = st.slider("Tumor Size", 1.0, 10.0, 5.0) tumor_grade = st.selectbox("Tumor Grade", ['High', 'Low', 'Medium']) symptoms_severity = st.selectbox("Symptoms Severity", ['Mild', 'Moderate', 'Severe']) with col2: smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker']) alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High']) exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never']) gender = st.selectbox("Gender", [0, 1]) family_history = st.selectbox("Family History", ["No", "Yes"]) input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history, alcohol_consumption, exercise_frequency, gender, family_history]] if st.button("Predict Cancer Presence"): if 'trained_model' in st.session_state: model = st.session_state['trained_model'] x_train = st.session_state['x_train'] # Create DataFrame for input input_df = pd.DataFrame(input_data, columns=x_train.columns) # Convert numeric inputs explicitly to float for col in ['Age', 'Tumor_Size']: input_df[col] = pd.to_numeric(input_df[col], errors='coerce') # Apply preprocessing input_transformed = model.named_steps['preprocessor'].transform(input_df) # Make prediction prediction = model.named_steps['classifier'].predict(input_transformed) if prediction[0] == 1: st.markdown("