File size: 4,863 Bytes
9089a18
 
 
 
 
 
 
 
 
 
 
 
 
28d1dfd
b990dea
28d1dfd
 
b990dea
28d1dfd
 
 
 
 
9089a18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e882ed4
9089a18
 
 
 
 
 
 
 
 
df7caa7
28d1dfd
9089a18
 
 
 
28d1dfd
 
9089a18
28d1dfd
9089a18
 
28d1dfd
9089a18
 
 
28d1dfd
 
 
 
 
9089a18
28d1dfd
 
 
 
 
9089a18
 
 
 
b990dea
9089a18
 
 
 
 
28d1dfd
 
9089a18
e882ed4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

# Set dark theme and page config
st.set_page_config(page_title="Cancer Prediction", page_icon="๐Ÿฉบ", layout="centered")
st.markdown("""
    <style>
    body { background-color: #121212; color: white; }
    .stButton>button { background-color: #ff4b4b; color: white; }
    .stSelectbox, .stRadio, .stNumberInput, .stSlider { color: white; }
    </style>
""", unsafe_allow_html=True)

# Load dataset
def load_data():
    df = pd.read_csv('cancer_prediction_data (2).csv')
    return df

# Data Preprocessing
def preprocess_data(df):
    numeric = ['Age', 'Tumor_Size']
    ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
    nominal = ['Gender', 'Family_History', 'Smoking_History']
    
    numeric_preprocess = Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    ordinal_preprocess = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
    ])
    nominal_preprocess = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
    ])
    
    preprocess = ColumnTransformer([
        ('num', numeric_preprocess, numeric),
        ('ord', ordinal_preprocess, ordinal),
        ('nom', nominal_preprocess, nominal)
    ], remainder='passthrough')
    
    X = df.drop('Cancer_Present', axis=1)
    y = df['Cancer_Present']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)
    return X_train, X_test, y_train, y_test, preprocess

# Train Models
def train_model(X_train, y_train, preprocess, model_name):
    models = {
        'Decision Tree': DecisionTreeClassifier(),
        'SVM': SVC(),
        'Logistic Regression': LogisticRegression(),
        'KNN': KNeighborsClassifier()
    }
    
    model = models[model_name]
    pipeline = Pipeline([
        ('preprocessor', preprocess),
        ('classifier', model)
    ])
    pipeline.fit(X_train, y_train)
    return pipeline

# Streamlit UI
st.markdown("<h1 style='text-align: center; color: red;'>๐Ÿฉบ Cancer Prediction ๐Ÿฉบ</h1>", unsafe_allow_html=True)
st.write("An intelligent system for early cancer detection using machine learning.")

df = load_data()
X_train, X_test, y_train, y_test, preprocess = preprocess_data(df)

st.subheader("๐Ÿ”ฌ Choose a Machine Learning Model")
model_name = st.selectbox("", ['Decision Tree', 'SVM', 'Logistic Regression', 'KNN'])

if st.button("๐Ÿš€ Train & Evaluate Model"):
    model = train_model(X_train, y_train, preprocess, model_name)
    accuracy = model.score(X_test, y_test)
    st.success(f"Model trained successfully! Accuracy: {accuracy:.2f}")
    st.session_state['trained_model'] = model

# Prediction Section
st.markdown("<h2 style='color: red;'>๐Ÿ” Make a Prediction</h2>", unsafe_allow_html=True)
age = st.number_input("๐Ÿ“… Age", min_value=18, max_value=100, value=30)
tumor_size = st.number_input("๐Ÿงฌ Tumor Size (cm)", min_value=1.0, max_value=10.0, value=5.0)
smoking_history = st.radio("๐Ÿšฌ Smoking History", ['Non-Smoker', 'Former Smoker', 'Current Smoker'])
alcohol_consumption = st.selectbox("๐Ÿท Alcohol Consumption", ['None', 'Low', 'Moderate', 'High'])

tumor_grade = st.selectbox("Tumor Grade", [1, 2, 3])
symptoms_severity = st.selectbox("Symptoms Severity", [1, 2, 3])
exercise_frequency = st.selectbox("Exercise Frequency", ['Never', 'Rarely', 'Occasionally', 'Regularly'])
gender = st.radio("Gender", ['Male', 'Female'])
family_history = st.radio("Family History", ['No', 'Yes'])

input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history, 
               alcohol_consumption, exercise_frequency, gender, family_history]]

if st.button("๐Ÿ”ฎ Predict Cancer Presence"):
    if 'trained_model' in st.session_state:
        model = st.session_state['trained_model']
        input_df = pd.DataFrame(input_data, columns=X_train.columns)
        input_transformed = model.named_steps['preprocessor'].transform(input_df)
        prediction = model.named_steps['classifier'].predict(input_transformed)
        
        st.write("Cancer Prediction:", "โœ… Positive" if prediction[0] == 1 else "โŒ Negative")
    else:
        st.error("Please train a model first!")