import streamlit as st import pandas as pd import numpy as np from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier # Set dark theme and page config st.set_page_config(page_title="Cancer Prediction", page_icon="🩺", layout="centered") st.markdown(""" """, unsafe_allow_html=True) # Load dataset def load_data(): df = pd.read_csv('cancer_prediction_data (2).csv') return df # Data Preprocessing def preprocess_data(df): numeric = ['Age', 'Tumor_Size'] ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency'] nominal = ['Gender', 'Family_History', 'Smoking_History'] numeric_preprocess = Pipeline([ ('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler()) ]) ordinal_preprocess = Pipeline([ ('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)) ]) nominal_preprocess = Pipeline([ ('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore')) ]) preprocess = ColumnTransformer([ ('num', numeric_preprocess, numeric), ('ord', ordinal_preprocess, ordinal), ('nom', nominal_preprocess, nominal) ], remainder='passthrough') X = df.drop('Cancer_Present', axis=1) y = df['Cancer_Present'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23) return X_train, X_test, y_train, y_test, preprocess # Train Models def train_model(X_train, y_train, preprocess, model_name): models = { 'Decision Tree': DecisionTreeClassifier(), 'SVM': SVC(), 'Logistic Regression': LogisticRegression(), 'KNN': KNeighborsClassifier() } model = models[model_name] pipeline = Pipeline([ ('preprocessor', preprocess), ('classifier', model) ]) pipeline.fit(X_train, y_train) return pipeline # Streamlit UI st.markdown("

🩺 Cancer Prediction 🩺

", unsafe_allow_html=True) st.write("An intelligent system for early cancer detection using machine learning.") df = load_data() X_train, X_test, y_train, y_test, preprocess = preprocess_data(df) st.subheader("🔬 Choose a Machine Learning Model") model_name = st.selectbox("", ['Decision Tree', 'SVM', 'Logistic Regression', 'KNN']) if st.button("🚀 Train & Evaluate Model"): model = train_model(X_train, y_train, preprocess, model_name) accuracy = model.score(X_test, y_test) st.success(f"Model trained successfully! Accuracy: {accuracy:.2f}") st.session_state['trained_model'] = model # Prediction Section st.markdown("

🔍 Make a Prediction

", unsafe_allow_html=True) age = st.number_input("📅 Age", min_value=18, max_value=100, value=30) tumor_size = st.number_input("🧬 Tumor Size (cm)", min_value=1.0, max_value=10.0, value=5.0) smoking_history = st.radio("🚬 Smoking History", ['Non-Smoker', 'Former Smoker', 'Current Smoker']) alcohol_consumption = st.selectbox("🍷 Alcohol Consumption", ['None', 'Low', 'Moderate', 'High']) tumor_grade = st.selectbox("Tumor Grade", [1, 2, 3]) symptoms_severity = st.selectbox("Symptoms Severity", [1, 2, 3]) exercise_frequency = st.selectbox("Exercise Frequency", ['Never', 'Rarely', 'Occasionally', 'Regularly']) gender = st.radio("Gender", ['Male', 'Female']) family_history = st.radio("Family History", ['No', 'Yes']) input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history, alcohol_consumption, exercise_frequency, gender, family_history]] if st.button("🔮 Predict Cancer Presence"): if 'trained_model' in st.session_state: model = st.session_state['trained_model'] input_df = pd.DataFrame(input_data, columns=X_train.columns) input_transformed = model.named_steps['preprocessor'].transform(input_df) prediction = model.named_steps['classifier'].predict(input_transformed) st.write("Cancer Prediction:", "✅ Positive" if prediction[0] == 1 else "❌ Negative") else: st.error("Please train a model first!")