Spaces:
Sleeping
Sleeping
File size: 3,997 Bytes
e156807 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | import streamlit as st
import pandas as pd
import warnings
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
# Filter out warnings
warnings.filterwarnings("ignore")
st.title("Cancer Prediction App")
st.markdown("### Powered by Innomatics Research Lab")
# Load dataset
@st.cache_data
def load_data():
return pd.read_csv('cancer_prediction_data (2).csv')
data = load_data()
# Ensure target column exists
target_col = 'Cancer_Present'
if target_col not in data.columns:
st.error(f"Target column '{target_col}' not found in data!")
st.stop()
# Split features and target
X = data.drop(columns=[target_col])
y = data[target_col]
# Define feature categories
numerical_features = ['Age', 'Tumor_Size']
categorical_features = ['Gender', 'Tumor_Grade', 'Symptoms_Severity', 'Family_History',
'Smoking_History', 'Alcohol_Consumption', 'Exercise_Frequency']
# Preprocessing pipeline
def create_preprocessing_pipeline():
numerical_pipeline = Pipeline([
('imputer', SimpleImputer(strategy='mean')),
('scaler', StandardScaler())
])
categorical_pipeline = Pipeline([
('imputer', SimpleImputer(strategy='most_frequent')),
('encoder', OneHotEncoder(handle_unknown='ignore'))
])
return ColumnTransformer([
('num', numerical_pipeline, numerical_features),
('cat', categorical_pipeline, categorical_features)
])
preprocess = create_preprocessing_pipeline()
# Sidebar - Select Algorithm
st.sidebar.header("Model Selection")
algorithm = st.sidebar.radio("Choose an Algorithm", ["SVM", "Random Forest", "Gradient Boosting"])
# Train different models
model_dict = {
"SVM": SVC(),
"Random Forest": RandomForestClassifier(),
"Gradient Boosting": GradientBoostingClassifier()
}
# Create pipeline
pipeline = Pipeline([
('preprocessing', preprocess),
('classifier', model_dict[algorithm])
])
# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)
accuracy = pipeline.score(X_test, y_test)
st.sidebar.write(f"**{algorithm} Accuracy:** {accuracy * 100:.2f}%")
# Sidebar - User input
def user_input_features():
age = st.sidebar.slider("Age", 0, 120, 50)
tumor_size = st.sidebar.slider("Tumor Size", 0.0, 100.0, 5.0)
gender = st.sidebar.selectbox("Gender", ["Male", "Female"])
tumor_grade = st.sidebar.selectbox("Tumor Grade", ["Low", "Medium", "High"])
symptoms_severity = st.sidebar.selectbox("Symptoms Severity", ["Mild", "Moderate", "Severe"])
family_history = st.sidebar.selectbox("Family History", ["Yes", "No"])
smoking_history = st.sidebar.selectbox("Smoking History", ["Current Smoker", "Non-Smoker"])
alcohol_consumption = st.sidebar.selectbox("Alcohol Consumption", ["Low", "Moderate", "High"])
exercise_frequency = st.sidebar.selectbox("Exercise Frequency", ["Never", "Rarely", "Occasionally", "Often"])
return pd.DataFrame({
'Age': [age],
'Tumor_Size': [tumor_size],
'Gender': [gender],
'Tumor_Grade': [tumor_grade],
'Symptoms_Severity': [symptoms_severity],
'Family_History': [family_history],
'Smoking_History': [smoking_history],
'Alcohol_Consumption': [alcohol_consumption],
'Exercise_Frequency': [exercise_frequency]
})
st.sidebar.markdown("### Patient Data Input")
input_df = user_input_features()
st.subheader("User Input Data")
st.write(input_df)
# Prediction
if st.button("Predict Cancer Presence"):
prediction = pipeline.predict(input_df)
result = "Cancer Detected" if prediction[0] == 1 else "No Cancer Detected"
st.subheader(f"### Prediction: {result}")
|