DasariHarshitha commited on
Commit
9089a18
·
verified ·
1 Parent(s): ed5f77e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -107
app.py CHANGED
@@ -1,108 +1,110 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- from sklearn.pipeline import Pipeline
5
- from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
6
- from sklearn.impute import SimpleImputer
7
- from sklearn.compose import ColumnTransformer
8
- from sklearn.model_selection import train_test_split
9
- from sklearn.tree import DecisionTreeClassifier
10
- from sklearn.svm import SVC
11
- from sklearn.linear_model import LogisticRegression
12
- from sklearn.neighbors import KNeighborsClassifier
13
-
14
- # Load dataset
15
- def load_data():
16
- df = pd.read_csv('cancer_prediction_data (2).csv')
17
- return df
18
-
19
- # Data Preprocessing
20
- def preprocess_data(df):
21
- numeric = ['Age', 'Tumor_Size']
22
- ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
23
- nominal = ['Gender', 'Family_History', 'Smoking_History']
24
-
25
- # Pipelines
26
- numeric_preprocess = Pipeline([
27
- ('imputer', SimpleImputer(strategy='mean')),
28
- ('scaler', StandardScaler())
29
- ])
30
- ordinal_preprocess = Pipeline([
31
- ('imputer', SimpleImputer(strategy='most_frequent')),
32
- ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
33
- ])
34
- nominal_preprocess = Pipeline([
35
- ('imputer', SimpleImputer(strategy='most_frequent')),
36
- ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
37
- ])
38
-
39
- # Column Transformer
40
- preprocess = ColumnTransformer([
41
- ('num', numeric_preprocess, numeric),
42
- ('ord', ordinal_preprocess, ordinal),
43
- ('nom', nominal_preprocess, nominal)
44
- ], remainder='passthrough')
45
-
46
- X = df.drop('Cancer_Present', axis=1)
47
- y = df['Cancer_Present']
48
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)
49
- return X_train, X_test, y_train, y_test, preprocess
50
-
51
- # Train Models
52
- def train_model(X_train, y_train, preprocess, model_name):
53
- models = {
54
- 'Decision Tree': DecisionTreeClassifier(),
55
- 'SVM': SVC(),
56
- 'Logistic Regression': LogisticRegression(),
57
- 'KNN': KNeighborsClassifier()
58
- }
59
-
60
- model = models[model_name]
61
- pipeline = Pipeline([
62
- ('preprocessor', preprocess),
63
- ('classifier', model)
64
- ])
65
- pipeline.fit(X_train, y_train)
66
- return pipeline
67
-
68
- # Streamlit UI
69
- st.title("Cancer Prediction Using Machine Learning")
70
- df = load_data()
71
- X_train, X_test, y_train, y_test, preprocess = preprocess_data(df)
72
-
73
- model_name = st.selectbox("Select Model", ['Decision Tree', 'SVM', 'Logistic Regression', 'KNN'])
74
-
75
- if st.button("Train Model"):
76
- model = train_model(X_train, y_train, preprocess, model_name)
77
- accuracy = model.score(X_test, y_test)
78
- st.write(f"Model Accuracy: {accuracy:.2f}")
79
- st.session_state['trained_model'] = model
80
- st.success("Model trained successfully!")
81
-
82
- # Prediction Section
83
- st.header("Make a Prediction")
84
- age = st.number_input("Age", min_value=18, max_value=100, value=30)
85
- tumor_size = st.number_input("Tumor Size", min_value=1.0, max_value=10.0, value=5.0)
86
- tumor_grade = st.selectbox("Tumor Grade", [1, 2, 3])
87
- symptoms_severity = st.selectbox("Symptoms Severity", [1, 2, 3])
88
- smoking_history = st.selectbox("Smoking History", [0, 1, 2])
89
- alcohol_consumption = st.selectbox("Alcohol Consumption", [0, 1, 2, 3])
90
- exercise_frequency = st.selectbox("Exercise Frequency", [0, 1, 2, 3])
91
- gender = st.selectbox("Gender", [0, 1])
92
- family_history = st.selectbox("Family History", [0, 1])
93
-
94
- input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
95
- alcohol_consumption, exercise_frequency, gender, family_history]]
96
-
97
- if st.button("Predict Cancer Presence"):
98
- if 'trained_model' in st.session_state:
99
- model = st.session_state['trained_model']
100
- input_df = pd.DataFrame(input_data, columns=X_train.columns)
101
-
102
- # Transform input data using the same preprocessor
103
- input_transformed = model.named_steps['preprocessor'].transform(input_df)
104
- prediction = model.named_steps['classifier'].predict(input_transformed)
105
-
106
- st.write("Cancer Prediction:", "Positive" if prediction[0] == 1 else "Negative")
107
- else:
 
 
108
  st.error("Please train a model first!")
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.pipeline import Pipeline
5
+ from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
6
+ from sklearn.impute import SimpleImputer
7
+ from sklearn.compose import ColumnTransformer
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.tree import DecisionTreeClassifier
10
+ from sklearn.svm import SVC
11
+ from sklearn.linear_model import LogisticRegression
12
+ from sklearn.neighbors import KNeighborsClassifier
13
+
14
+ # Load dataset
15
+ def load_data():
16
+ df = pd.read_csv('cancer_prediction_data (2).csv')
17
+ return df
18
+
19
+ # Data Preprocessing
20
+ def preprocess_data(df):
21
+ numeric = ['Age', 'Tumor_Size']
22
+ ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
23
+ nominal = ['Gender', 'Family_History', 'Smoking_History']
24
+
25
+ # Pipelines
26
+ numeric_preprocess = Pipeline([
27
+ ('imputer', SimpleImputer(strategy='mean')),
28
+ ('scaler', StandardScaler())
29
+ ])
30
+ ordinal_preprocess = Pipeline([
31
+ ('imputer', SimpleImputer(strategy='most_frequent')),
32
+ ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
33
+ ])
34
+ nominal_preprocess = Pipeline([
35
+ ('imputer', SimpleImputer(strategy='most_frequent')),
36
+ ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
37
+ ])
38
+
39
+ # Column Transformer
40
+ preprocess = ColumnTransformer([
41
+ ('num', numeric_preprocess, numeric),
42
+ ('ord', ordinal_preprocess, ordinal),
43
+ ('nom', nominal_preprocess, nominal)
44
+ ], remainder='passthrough')
45
+
46
+ X = df.drop('Cancer_Present', axis=1)
47
+ y = df['Cancer_Present']
48
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)
49
+ return X_train, X_test, y_train, y_test, preprocess
50
+
51
+ # Train Models
52
+ def train_model(X_train, y_train, preprocess, model_name):
53
+ models = {
54
+ 'Decision Tree': DecisionTreeClassifier(),
55
+ 'SVM': SVC(),
56
+ 'Logistic Regression': LogisticRegression(),
57
+ 'KNN': KNeighborsClassifier()
58
+ }
59
+
60
+ model = models[model_name]
61
+ pipeline = Pipeline([
62
+ ('preprocessor', preprocess),
63
+ ('classifier', model)
64
+ ])
65
+ pipeline.fit(X_train, y_train)
66
+ return pipeline
67
+
68
+ # Streamlit UI
69
+ st.markdown("<h1 style='text-align: center; color: #1976D2;'>🎗️ Cancer Prediction Using ML 🎗️</h1>", unsafe_allow_html=True)
70
+
71
+ df = load_data()
72
+ X_train, X_test, y_train, y_test, preprocess = preprocess_data(df)
73
+
74
+ model_name = st.selectbox("Select Model", ['Decision Tree', 'SVM', 'Logistic Regression', 'KNN'])
75
+
76
+ if st.button("Train Model"):
77
+ model = train_model(X_train, y_train, preprocess, model_name)
78
+ accuracy = model.score(X_test, y_test)
79
+ st.write(f"Model Accuracy: {accuracy:.2f}")
80
+ st.session_state['trained_model'] = model
81
+ st.success("Model trained successfully!")
82
+
83
+ # Prediction Section
84
+ st.markdown("<h2 style='color: #D32F2F;'>🔍 Make a Prediction</h2>", unsafe_allow_html=True)
85
+
86
+ age = st.number_input("Age", min_value=18, max_value=100, value=30)
87
+ tumor_size = st.number_input("Tumor Size", min_value=1.0, max_value=10.0, value=5.0)
88
+ tumor_grade = st.selectbox("Tumor Grade", [1, 2, 3])
89
+ symptoms_severity = st.selectbox("Symptoms Severity", [1, 2, 3])
90
+ smoking_history = st.selectbox("Smoking History", [0, 1, 2])
91
+ alcohol_consumption = st.selectbox("Alcohol Consumption", [0, 1, 2, 3])
92
+ exercise_frequency = st.selectbox("Exercise Frequency", [0, 1, 2, 3])
93
+ gender = st.selectbox("Gender", [0, 1])
94
+ family_history = st.selectbox("Family History", [0, 1])
95
+
96
+ input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
97
+ alcohol_consumption, exercise_frequency, gender, family_history]]
98
+
99
+ if st.button("Predict Cancer Presence"):
100
+ if 'trained_model' in st.session_state:
101
+ model = st.session_state['trained_model']
102
+ input_df = pd.DataFrame(input_data, columns=X_train.columns)
103
+
104
+ # Transform input data using the same preprocessor
105
+ input_transformed = model.named_steps['preprocessor'].transform(input_df)
106
+ prediction = model.named_steps['classifier'].predict(input_transformed)
107
+
108
+ st.write("Cancer Prediction:", "Positive" if prediction[0] == 1 else "Negative")
109
+ else:
110
  st.error("Please train a model first!")