Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import warnings | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.svm import SVC | |
| from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.compose import ColumnTransformer | |
| # Filter out warnings | |
| warnings.filterwarnings("ignore") | |
| st.title("Cancer Prediction App") | |
| st.markdown("### Powered by Innomatics Research Lab") | |
| # Load dataset | |
| def load_data(): | |
| return pd.read_csv('cancer_prediction_data (2).csv') | |
| data = load_data() | |
| # Ensure target column exists | |
| target_col = 'Cancer_Present' | |
| if target_col not in data.columns: | |
| st.error(f"Target column '{target_col}' not found in data!") | |
| st.stop() | |
| # Split features and target | |
| X = data.drop(columns=[target_col]) | |
| y = data[target_col] | |
| # Define feature categories | |
| numerical_features = ['Age', 'Tumor_Size'] | |
| categorical_features = ['Gender', 'Tumor_Grade', 'Symptoms_Severity', 'Family_History', | |
| 'Smoking_History', 'Alcohol_Consumption', 'Exercise_Frequency'] | |
| # Preprocessing pipeline | |
| def create_preprocessing_pipeline(): | |
| numerical_pipeline = Pipeline([ | |
| ('imputer', SimpleImputer(strategy='mean')), | |
| ('scaler', StandardScaler()) | |
| ]) | |
| categorical_pipeline = Pipeline([ | |
| ('imputer', SimpleImputer(strategy='most_frequent')), | |
| ('encoder', OneHotEncoder(handle_unknown='ignore')) | |
| ]) | |
| return ColumnTransformer([ | |
| ('num', numerical_pipeline, numerical_features), | |
| ('cat', categorical_pipeline, categorical_features) | |
| ]) | |
| preprocess = create_preprocessing_pipeline() | |
| # Sidebar - Select Algorithm | |
| st.sidebar.header("Model Selection") | |
| algorithm = st.sidebar.radio("Choose an Algorithm", ["SVM", "Random Forest", "Gradient Boosting"]) | |
| # Train different models | |
| model_dict = { | |
| "SVM": SVC(), | |
| "Random Forest": RandomForestClassifier(), | |
| "Gradient Boosting": GradientBoostingClassifier() | |
| } | |
| # Create pipeline | |
| pipeline = Pipeline([ | |
| ('preprocessing', preprocess), | |
| ('classifier', model_dict[algorithm]) | |
| ]) | |
| # Train model | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| pipeline.fit(X_train, y_train) | |
| accuracy = pipeline.score(X_test, y_test) | |
| st.sidebar.write(f"**{algorithm} Accuracy:** {accuracy * 100:.2f}%") | |
| # Sidebar - User input | |
| def user_input_features(): | |
| age = st.sidebar.slider("Age", 0, 120, 50) | |
| tumor_size = st.sidebar.slider("Tumor Size", 0.0, 100.0, 5.0) | |
| gender = st.sidebar.selectbox("Gender", ["Male", "Female"]) | |
| tumor_grade = st.sidebar.selectbox("Tumor Grade", ["Low", "Medium", "High"]) | |
| symptoms_severity = st.sidebar.selectbox("Symptoms Severity", ["Mild", "Moderate", "Severe"]) | |
| family_history = st.sidebar.selectbox("Family History", ["Yes", "No"]) | |
| smoking_history = st.sidebar.selectbox("Smoking History", ["Current Smoker", "Non-Smoker"]) | |
| alcohol_consumption = st.sidebar.selectbox("Alcohol Consumption", ["Low", "Moderate", "High"]) | |
| exercise_frequency = st.sidebar.selectbox("Exercise Frequency", ["Never", "Rarely", "Occasionally", "Often"]) | |
| return pd.DataFrame({ | |
| 'Age': [age], | |
| 'Tumor_Size': [tumor_size], | |
| 'Gender': [gender], | |
| 'Tumor_Grade': [tumor_grade], | |
| 'Symptoms_Severity': [symptoms_severity], | |
| 'Family_History': [family_history], | |
| 'Smoking_History': [smoking_history], | |
| 'Alcohol_Consumption': [alcohol_consumption], | |
| 'Exercise_Frequency': [exercise_frequency] | |
| }) | |
| st.sidebar.markdown("### Patient Data Input") | |
| input_df = user_input_features() | |
| st.subheader("User Input Data") | |
| st.write(input_df) | |
| # Prediction | |
| if st.button("Predict Cancer Presence"): | |
| prediction = pipeline.predict(input_df) | |
| result = "Cancer Detected" if prediction[0] == 1 else "No Cancer Detected" | |
| st.subheader(f"### Prediction: {result}") | |