Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.svm import SVC | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.neighbors import KNeighborsClassifier | |
| # Load dataset | |
| def load_data(): | |
| df = pd.read_csv('cancer_prediction_data (2).csv') | |
| return df | |
| # Data Preprocessing | |
| def preprocess_data(df): | |
| numeric = ['Age', 'Tumor_Size'] | |
| ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency'] | |
| nominal = ['Gender', 'Family_History', 'Smoking_History'] | |
| # Pipelines | |
| numeric_preprocess = Pipeline([ | |
| ('imputer', SimpleImputer(strategy='mean')), | |
| ('scaler', StandardScaler()) | |
| ]) | |
| ordinal_preprocess = Pipeline([ | |
| ('imputer', SimpleImputer(strategy='most_frequent')), | |
| ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)) | |
| ]) | |
| nominal_preprocess = Pipeline([ | |
| ('imputer', SimpleImputer(strategy='most_frequent')), | |
| ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore')) | |
| ]) | |
| # Column Transformer | |
| preprocess = ColumnTransformer([ | |
| ('num', numeric_preprocess, numeric), | |
| ('ord', ordinal_preprocess, ordinal), | |
| ('nom', nominal_preprocess, nominal) | |
| ], remainder='passthrough') | |
| X = df.drop('Cancer_Present', axis=1) | |
| y = df['Cancer_Present'] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23) | |
| return X_train, X_test, y_train, y_test, preprocess | |
| # Train Models | |
| def train_model(X_train, y_train, preprocess, model_name): | |
| models = { | |
| 'Decision Tree': DecisionTreeClassifier(), | |
| 'SVM': SVC(), | |
| 'Logistic Regression': LogisticRegression(), | |
| 'KNN': KNeighborsClassifier() | |
| } | |
| model = models[model_name] | |
| pipeline = Pipeline([ | |
| ('preprocessor', preprocess), | |
| ('classifier', model) | |
| ]) | |
| pipeline.fit(X_train, y_train) | |
| return pipeline | |
| # Streamlit UI | |
| st.title("Cancer Prediction Using Machine Learning") | |
| df = load_data() | |
| X_train, X_test, y_train, y_test, preprocess = preprocess_data(df) | |
| model_name = st.selectbox("Select Model", ['Decision Tree', 'SVM', 'Logistic Regression', 'KNN']) | |
| if st.button("Train Model"): | |
| model = train_model(X_train, y_train, preprocess, model_name) | |
| accuracy = model.score(X_test, y_test) | |
| st.write(f"Model Accuracy: {accuracy:.2f}") | |
| st.session_state['trained_model'] = model | |
| st.success("Model trained successfully!") | |
| # Prediction Section | |
| st.header("Make a Prediction") | |
| age = st.number_input("Age", min_value=18, max_value=100, value=30) | |
| tumor_size = st.number_input("Tumor Size", min_value=1.0, max_value=10.0, value=5.0) | |
| tumor_grade = st.selectbox("Tumor Grade", [1, 2, 3]) | |
| symptoms_severity = st.selectbox("Symptoms Severity", [1, 2, 3]) | |
| smoking_history = st.selectbox("Smoking History", [0, 1, 2]) | |
| alcohol_consumption = st.selectbox("Alcohol Consumption", [0, 1, 2, 3]) | |
| exercise_frequency = st.selectbox("Exercise Frequency", [0, 1, 2, 3]) | |
| gender = st.selectbox("Gender", [0, 1]) | |
| family_history = st.selectbox("Family History", [0, 1]) | |
| input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history, | |
| alcohol_consumption, exercise_frequency, gender, family_history]] | |
| if st.button("Predict Cancer Presence"): | |
| if 'trained_model' in st.session_state: | |
| model = st.session_state['trained_model'] | |
| input_df = pd.DataFrame(input_data, columns=X_train.columns) | |
| # Transform input data using the same preprocessor | |
| input_transformed = model.named_steps['preprocessor'].transform(input_df) | |
| prediction = model.named_steps['classifier'].predict(input_transformed) | |
| st.write("Cancer Prediction:", "Positive" if prediction[0] == 1 else "Negative") | |
| else: | |
| st.error("Please train a model first!") |