import streamlit as st import pandas as pd import numpy as np from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier # Set dark theme and page config st.set_page_config(page_title="Cancer Prediction", page_icon="🩺", layout="centered") st.markdown(""" """, unsafe_allow_html=True) # Load dataset def load_data(): df = pd.read_csv('cancer_prediction_data (2).csv') return df # Data Preprocessing def preprocess_data(df): numeric = ['Age', 'Tumor_Size'] ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency'] nominal = ['Gender', 'Family_History', 'Smoking_History'] numeric_preprocess = Pipeline([ ('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler()) ]) ordinal_preprocess = Pipeline([ ('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)) ]) nominal_preprocess = Pipeline([ ('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore')) ]) preprocess = ColumnTransformer([ ('num', numeric_preprocess, numeric), ('ord', ordinal_preprocess, ordinal), ('nom', nominal_preprocess, nominal) ], remainder='passthrough') X = df.drop('Cancer_Present', axis=1) y = df['Cancer_Present'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23) return X_train, X_test, y_train, y_test, preprocess # Train Models def train_model(X_train, y_train, preprocess, model_name): models = { 'Decision Tree': DecisionTreeClassifier(), 'SVM': SVC(), 'Logistic Regression': LogisticRegression(), 'KNN': KNeighborsClassifier() } model = models[model_name] pipeline = Pipeline([ ('preprocessor', preprocess), ('classifier', model) ]) pipeline.fit(X_train, y_train) return pipeline # Streamlit UI st.markdown("