prahalya commited on
Commit
b9ef127
·
verified ·
1 Parent(s): fef2fe0

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +108 -0
  2. cancer_prediction_data (2).csv +0 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.pipeline import Pipeline
5
+ from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
6
+ from sklearn.impute import SimpleImputer
7
+ from sklearn.compose import ColumnTransformer
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.tree import DecisionTreeClassifier
10
+ from sklearn.svm import SVC
11
+ from sklearn.linear_model import LogisticRegression
12
+ from sklearn.neighbors import KNeighborsClassifier
13
+
14
+ # Load dataset
15
+ def load_data():
16
+ df = pd.read_csv('cancer_prediction_data (2).csv')
17
+ return df
18
+
19
+ # Data Preprocessing
20
+ def preprocess_data(df):
21
+ numeric = ['Age', 'Tumor_Size']
22
+ ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
23
+ nominal = ['Gender', 'Family_History', 'Smoking_History']
24
+
25
+ # Pipelines
26
+ numeric_preprocess = Pipeline([
27
+ ('imputer', SimpleImputer(strategy='mean')),
28
+ ('scaler', StandardScaler())
29
+ ])
30
+ ordinal_preprocess = Pipeline([
31
+ ('imputer', SimpleImputer(strategy='most_frequent')),
32
+ ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
33
+ ])
34
+ nominal_preprocess = Pipeline([
35
+ ('imputer', SimpleImputer(strategy='most_frequent')),
36
+ ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
37
+ ])
38
+
39
+ # Column Transformer
40
+ preprocess = ColumnTransformer([
41
+ ('num', numeric_preprocess, numeric),
42
+ ('ord', ordinal_preprocess, ordinal),
43
+ ('nom', nominal_preprocess, nominal)
44
+ ], remainder='passthrough')
45
+
46
+ X = df.drop('Cancer_Present', axis=1)
47
+ y = df['Cancer_Present']
48
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)
49
+ return X_train, X_test, y_train, y_test, preprocess
50
+
51
+ # Train Models
52
+ def train_model(X_train, y_train, preprocess, model_name):
53
+ models = {
54
+ 'Decision Tree': DecisionTreeClassifier(),
55
+ 'SVM': SVC(),
56
+ 'Logistic Regression': LogisticRegression(),
57
+ 'KNN': KNeighborsClassifier()
58
+ }
59
+
60
+ model = models[model_name]
61
+ pipeline = Pipeline([
62
+ ('preprocessor', preprocess),
63
+ ('classifier', model)
64
+ ])
65
+ pipeline.fit(X_train, y_train)
66
+ return pipeline
67
+
68
+ # Streamlit UI
69
+ st.title("Cancer Prediction Using Machine Learning")
70
+ df = load_data()
71
+ X_train, X_test, y_train, y_test, preprocess = preprocess_data(df)
72
+
73
+ model_name = st.selectbox("Select Model", ['Decision Tree', 'SVM', 'Logistic Regression', 'KNN'])
74
+
75
+ if st.button("Train Model"):
76
+ model = train_model(X_train, y_train, preprocess, model_name)
77
+ accuracy = model.score(X_test, y_test)
78
+ st.write(f"Model Accuracy: {accuracy:.2f}")
79
+ st.session_state['trained_model'] = model
80
+ st.success("Model trained successfully!")
81
+
82
+ # Prediction Section
83
+ st.header("Make a Prediction")
84
+ age = st.number_input("Age", min_value=18, max_value=100, value=30)
85
+ tumor_size = st.number_input("Tumor Size", min_value=1.0, max_value=10.0, value=5.0)
86
+ tumor_grade = st.selectbox("Tumor Grade", [1, 2, 3])
87
+ symptoms_severity = st.selectbox("Symptoms Severity", [1, 2, 3])
88
+ smoking_history = st.selectbox("Smoking History", [0, 1, 2])
89
+ alcohol_consumption = st.selectbox("Alcohol Consumption", [0, 1, 2, 3])
90
+ exercise_frequency = st.selectbox("Exercise Frequency", [0, 1, 2, 3])
91
+ gender = st.selectbox("Gender", [0, 1])
92
+ family_history = st.selectbox("Family History", [0, 1])
93
+
94
+ input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
95
+ alcohol_consumption, exercise_frequency, gender, family_history]]
96
+
97
+ if st.button("Predict Cancer Presence"):
98
+ if 'trained_model' in st.session_state:
99
+ model = st.session_state['trained_model']
100
+ input_df = pd.DataFrame(input_data, columns=X_train.columns)
101
+
102
+ # Transform input data using the same preprocessor
103
+ input_transformed = model.named_steps['preprocessor'].transform(input_df)
104
+ prediction = model.named_steps['classifier'].predict(input_transformed)
105
+
106
+ st.write("Cancer Prediction:", "Positive" if prediction[0] == 1 else "Negative")
107
+ else:
108
+ st.error("Please train a model first!")
cancer_prediction_data (2).csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ scikit-learn
3
+ pandas
4
+ numpy