Cancer_Prediction / cancer.py
varshitha22's picture
Update cancer.py
d287fb3 verified
import streamlit as st
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
# Load dataset
def load_data():
return pd.read_csv('cancer_prediction_data (2).csv')
# Data Preprocessing
def preprocess_data(df):
numeric = ['Age', 'Tumor_Size']
ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
nominal = ['Gender', 'Family_History', 'Smoking_History']
preprocess = ColumnTransformer([
('num', Pipeline([
('imputer', SimpleImputer(strategy='mean')),
('scaler', StandardScaler())
]), numeric),
('ord', Pipeline([
('imputer', SimpleImputer(strategy='most_frequent')),
('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
]), ordinal),
('nom', Pipeline([
('imputer', SimpleImputer(strategy='most_frequent')),
('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
]), nominal)
], remainder='passthrough')
x = df.drop('Cancer_Present', axis=1)
y = df['Cancer_Present']
return train_test_split(x, y, test_size=0.2, random_state=23), preprocess
# Train Model
def train_model(x_train, y_train, preprocess, model_name):
models = {
'Decision Tree': DecisionTreeClassifier(),
'Logistic Regression': LogisticRegression(),
'KNN': KNeighborsClassifier(),
'Random Forest': RandomForestClassifier(),
'XGBoost': XGBClassifier()
}
pipeline = Pipeline([
('preprocessor', preprocess),
('classifier', models[model_name])
])
pipeline.fit(x_train, y_train)
return pipeline
# Streamlit UI
st.set_page_config(page_title='Cancer Prediction App', layout='wide')
with st.sidebar:
st.markdown("### Select Machine Learning Model")
model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
if st.button("Train Model"):
df = load_data()
(x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
model = train_model(x_train, y_train, preprocess, model_name)
accuracy = model.score(x_test, y_test)
st.session_state['trained_model'] = model
st.session_state['x_train'] = x_train
st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
st.title("πŸŽ—οΈ Cancer Prediction")
st.markdown("""<style>.big-font {font-size:20px !important;}</style>
<p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
col1, col2 = st.columns(2)
with col1:
age = st.slider("Age", 18, 100, 30)
tumor_size = st.slider("Tumor Size", 1.0, 10.0, 5.0)
tumor_grade = st.selectbox("Tumor Grade", ['High', 'Low', 'Medium'])
symptoms_severity = st.selectbox("Symptoms Severity", ['Mild', 'Moderate', 'Severe'])
with col2:
smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
gender = st.selectbox("Gender", [0, 1])
family_history = st.selectbox("Family History", ["No", "Yes"])
input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
alcohol_consumption, exercise_frequency, gender, family_history]]
if st.button("Predict Cancer Presence"):
if 'trained_model' in st.session_state:
model = st.session_state['trained_model']
x_train = st.session_state['x_train']
# Create DataFrame for input
input_df = pd.DataFrame(input_data, columns=x_train.columns)
# Convert numeric inputs explicitly to float
for col in ['Age', 'Tumor_Size']:
input_df[col] = pd.to_numeric(input_df[col], errors='coerce')
# Apply preprocessing
input_transformed = model.named_steps['preprocessor'].transform(input_df)
# Make prediction
prediction = model.named_steps['classifier'].predict(input_transformed)
if prediction[0] == 1:
st.markdown("<h3 style='color: red;'>Cancer Prediction: Positive πŸŸ₯</h3>", unsafe_allow_html=True)
st.write("Unfortunately, the model predicts the presence of cancer. Please consult a doctor for further advice.")
else:
st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
else:
st.error("Please train a model first!")