varshitha22 commited on
Commit
a41dba3
·
verified ·
1 Parent(s): b4a2e0d

Create cancer.py

Browse files
Files changed (1) hide show
  1. cancer.py +106 -0
cancer.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.pipeline import Pipeline
5
+ from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
6
+ from sklearn.impute import SimpleImputer
7
+ from sklearn.compose import ColumnTransformer
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.tree import DecisionTreeClassifier
10
+ from sklearn.linear_model import LogisticRegression
11
+ from sklearn.neighbors import KNeighborsClassifier
12
+ from sklearn.ensemble import RandomForestClassifier
13
+ from xgboost import XGBClassifier
14
+ # Load dataset
15
+ def load_data():
16
+ return pd.read_csv('cancer_prediction_data (2).csv')
17
+
18
+ # Data Preprocessing
19
+ def preprocess_data(df):
20
+ numeric = ['Age', 'Tumor_Size']
21
+ ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
22
+ nominal = ['Gender', 'Family_History', 'Smoking_History']
23
+
24
+ preprocess = ColumnTransformer([
25
+ ('num', Pipeline([
26
+ ('imputer', SimpleImputer(strategy='mean')),
27
+ ('scaler', StandardScaler())
28
+ ]), numeric),
29
+ ('ord', Pipeline([
30
+ ('imputer', SimpleImputer(strategy='most_frequent')),
31
+ ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
32
+ ]), ordinal),
33
+ ('nom', Pipeline([
34
+ ('imputer', SimpleImputer(strategy='most_frequent')),
35
+ ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
36
+ ]), nominal)
37
+ ], remainder='passthrough')
38
+
39
+ X = df.drop('Cancer_Present', axis=1)
40
+ y = df['Cancer_Present']
41
+ return train_test_split(X, y, test_size=0.2, random_state=23), preprocess
42
+
43
+ # Train Model
44
+ def train_model(X_train, y_train, preprocess, model_name):
45
+ models = {
46
+ 'Decision Tree': DecisionTreeClassifier(),
47
+ 'Logistic Regression': LogisticRegression(),
48
+ 'KNN': KNeighborsClassifier(),
49
+ 'Random Forest': RandomForestClassifier(),
50
+ 'XGBoost': XGBClassifier()
51
+ }
52
+ pipeline = Pipeline([
53
+ ('preprocessor', preprocess),
54
+ ('classifier', models[model_name])
55
+ ])
56
+ pipeline.fit(X_train, y_train)
57
+ return pipeline
58
+
59
+ # Streamlit UI
60
+ st.set_page_config(page_title='Cancer Prediction App', layout='wide')
61
+
62
+ with st.sidebar:
63
+ st.markdown("### Select Machine Learning Model")
64
+ model_name = st.radio("Choose a Model", ['Decision Tree','Logistic Regression', 'KNN','Random Forest', 'XGBoost']])
65
+ if st.button("Train Model"):
66
+ df = load_data()
67
+ (X_train, X_test, y_train, y_test), preprocess = preprocess_data(df)
68
+ model = train_model(X_train, y_train, preprocess, model_name)
69
+ accuracy = model.score(X_test, y_test)
70
+ st.session_state['trained_model'] = model
71
+ st.session_state['X_train'] = X_train
72
+ st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")
73
+
74
+ st.title("🎗️ Cancer Prediction Using Machine Learning 🎗️")
75
+
76
+ st.markdown("""<style>.big-font {font-size:20px !important;}</style>
77
+ <p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)
78
+
79
+ col1, col2 = st.columns(2)
80
+ with col1:
81
+ age = st.slider("Age", 18, 100, 30)
82
+ tumor_size = st.slider("Tumor Size", 1.0, 10.0, 5.0)
83
+ tumor_grade = st.radio("Tumor Grade", ['High', 'Low', 'Medium'])
84
+ symptoms_severity = st.radio("Symptoms Severity", ['Mild', 'Moderate', 'Severe'])
85
+
86
+ with col2:
87
+ smoking_history = st.radio("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
88
+ alcohol_consumption = st.radio("Alcohol Consumption", ['Low', 'Moderate', 'High'])
89
+ exercise_frequency = st.radio("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly', 'Never'])
90
+ gender = st.radio("Gender", ['Male', "Female"])
91
+ family_history = st.radio("Family History", ["No", "Yes"])
92
+
93
+ input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
94
+ alcohol_consumption, exercise_frequency, gender, family_history]]
95
+
96
+ if st.button("Predict Cancer Presence"):
97
+ if 'trained_model' in st.session_state:
98
+ model = st.session_state['trained_model']
99
+ X_train = st.session_state['X_train']
100
+ input_df = pd.DataFrame(input_data, columns=X_train.columns)
101
+ input_transformed = model.named_steps['preprocessor'].transform(input_df)
102
+ prediction = model.named_steps['classifier'].predict(input_transformed)
103
+ st.markdown(f"*Prediction Result: {'🟥 Positive' if prediction[0] == 1 else '🟩 Negative'}*")
104
+ else:
105
+ st.error("Please train a model first!")
106
+