pranayshivagoud's picture
Update app.py
1ffda74 verified
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from xgboost import XGBClassifier
# Define columns
numeric = ['Age', 'Tumor_Size']
ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
nominal = ['Gender', 'Family_History', 'Smoking_History']
# Define preprocessing pipelines
numeric_preprocess = Pipeline([
('Mean Imputation', SimpleImputer(strategy='mean')),
('Scaling', StandardScaler())
])
ordinal_preprocess = Pipeline([
('Mode Imputation', SimpleImputer(strategy='most_frequent')),
('Encoding', OrdinalEncoder())
])
nominal_preprocess = Pipeline([
('Mode Imputation', SimpleImputer(strategy='most_frequent')),
('Encoding', OneHotEncoder(sparse_output=False))
])
preprocess = ColumnTransformer([
('Numerical Transformer', numeric_preprocess, numeric),
('Ordinal Transformer', ordinal_preprocess, ordinal),
('Nominal Transformer', nominal_preprocess, nominal)
], remainder='passthrough')
# Load dataset and train model
df = pd.read_csv("cancer_prediction_data (2).csv")
x = df.drop('Cancer_Present', axis=1)
y = df['Cancer_Present']
xgb = Pipeline([
('Data Preprocessing', preprocess),
('Algorithm', XGBClassifier())
])
xgb.fit(x, y)
# Streamlit UI
st.set_page_config(page_title="Cancer Prediction App", page_icon="🩺", layout="centered")
st.title("πŸ”¬ Cancer Prediction App")
st.write("This application predicts the likelihood of having cancer based on various health parameters.")
# User input fields
age = st.number_input("πŸ“… Age", min_value=1, max_value=120, value=30)
gender = st.selectbox("⚧️ Gender", ["Male", "Female"])
tumor_size = st.number_input("πŸ“ Tumor Size (0-10)", min_value=0.0, max_value=10.0, value=5.0)
tumor_grade = st.selectbox("πŸ”’ Tumor Grade", ["Low", "Medium", "High"])
symptoms_severity = st.selectbox("πŸ€• Symptoms Severity", ["Mild", "Moderate", "Severe"])
family_history = st.selectbox("πŸ‘¨β€πŸ‘©β€πŸ‘§ Family History", ["Yes", "No"])
smoking_history = st.selectbox("🚬 Smoking History", ["Non-Smoker", "Former Smoker", "Current Smoker"])
alcohol_consumption = st.selectbox("🍷 Alcohol Consumption", ["Low", "Moderate", "High"])
exercise_frequency = st.selectbox("πŸ‹οΈ Exercise Frequency", ["Never", "Occasionally", "Rarely", "Regularly"])
# Predict button
if st.button("πŸ” Predict Cancer"):
input_data = pd.DataFrame([[age, gender, tumor_size, tumor_grade, symptoms_severity,
family_history, smoking_history, alcohol_consumption, exercise_frequency]],
columns=x.columns)
prediction = xgb.predict(input_data)[0]
if prediction == 0:
st.success("βœ… Not a Cancer")
else:
st.error("⚠️ Cancer Detected! Please consult a doctor.")
st.write("πŸ” This model is trained using XGBoost and provides predictions based on health data.")