import streamlit as st
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Load dataset
def load_data():
    return pd.read_csv('cancer_prediction_data (2).csv')

# Data Preprocessing
def preprocess_data(df):
    numeric = ['Age', 'Tumor_Size']
    ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
    nominal = ['Gender', 'Family_History', 'Smoking_History']

    preprocess = ColumnTransformer([
        ('num', Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), numeric),
        ('ord', Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
        ]), ordinal),
        ('nom', Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
        ]), nominal)
    ], remainder='passthrough')

    x = df.drop('Cancer_Present', axis=1)
    y = df['Cancer_Present']
    return train_test_split(x, y, test_size=0.2, random_state=23), preprocess

# Train Model
def train_model(x_train, y_train, preprocess, model_name):
    models = {
        'Decision Tree': DecisionTreeClassifier(),
        'Logistic Regression': LogisticRegression(),
        'KNN': KNeighborsClassifier(),
        'Random Forest': RandomForestClassifier(),
        'XGBoost': XGBClassifier()
    }
    pipeline = Pipeline([
        ('preprocessor', preprocess),
        ('classifier', models[model_name])
    ])
    pipeline.fit(x_train, y_train)
    return pipeline

# Streamlit UI
st.set_page_config(page_title='Cancer Prediction App', layout='wide')

with st.sidebar:
    st.markdown("### Select Machine Learning Model")
    model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
    if st.button("Train Model"):
        df = load_data()
        (x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
        model = train_model(x_train, y_train, preprocess, model_name)
        accuracy = model.score(x_test, y_test)
        st.session_state['trained_model'] = model
        st.session_state['x_train'] = x_train
        st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")

st.title("🎗️ Cancer Prediction")

st.markdown("""<style>.big-font {font-size:20px !important;}</style>
<p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)

col1, col2 = st.columns(2)
with col1:
    age = st.slider("Age", 18, 100, 30)
    tumor_size = st.slider("Tumor Size", 1.0, 10.0, 5.0)
    tumor_grade = st.selectbox("Tumor Grade", ['High', 'Low', 'Medium'])
    symptoms_severity = st.selectbox("Symptoms Severity", ['Mild', 'Moderate', 'Severe'])

with col2:
    smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
    alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
    exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
    gender = st.selectbox("Gender", [0, 1])
    family_history = st.selectbox("Family History", ["No", "Yes"])

input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history, 
               alcohol_consumption, exercise_frequency, gender, family_history]]

if st.button("Predict Cancer Presence"):
    if 'trained_model' in st.session_state:
        model = st.session_state['trained_model']
        x_train = st.session_state['x_train']

        # Create DataFrame for input
        input_df = pd.DataFrame(input_data, columns=x_train.columns)

        # Convert numeric inputs explicitly to float
        for col in ['Age', 'Tumor_Size']:
            input_df[col] = pd.to_numeric(input_df[col], errors='coerce')

        # Apply preprocessing
        input_transformed = model.named_steps['preprocessor'].transform(input_df)

        # Make prediction
        prediction = model.named_steps['classifier'].predict(input_transformed)

        if prediction[0] == 1:
            st.markdown("<h3 style='color: red;'>Cancer Prediction: Positive 🟥</h3>", unsafe_allow_html=True)
            st.write("Unfortunately, the model predicts the presence of cancer. Please consult a doctor for further advice.")
        else:
            st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
            st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
    else:
        st.error("Please train a model first!")