Spaces:

zaid002
/

multiple-disease-prediction

Sleeping

File size: 31,539 Bytes

import os
import tempfile

# Fix for Hugging Face Spaces permission issues
os.environ['STREAMLIT_APP_DATA'] = tempfile.gettempdir()
os.environ['STREAMLIT_CONFIG_DIR'] = tempfile.gettempdir()
os.environ['STREAMLIT_CACHE_DIR'] = tempfile.gettempdir()

import streamlit as st
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Set page configuration
st.set_page_config(
    page_title="Multiple Disease Prediction System",
    page_icon="🏥",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS
st.markdown("""
<style>
    .main-header {
        font-size: 2.5rem;
        color: #1f77b4;
        text-align: center;
        margin-bottom: 2rem;
    }
    .disease-card {
        background-color: #f0f2f6;
        padding: 1.5rem;
        border-radius: 10px;
        border-left: 4px solid #1f77b4;
        margin-bottom: 1rem;
    }
    .prediction-positive {
        background-color: #ff6b6b;
        padding: 1rem;
        border-radius: 5px;
        color: white;
        text-align: center;
    }
    .prediction-negative {
        background-color: #51cf66;
        padding: 1rem;
        border-radius: 5px;
        color: white;
        text-align: center;
    }
</style>
""", unsafe_allow_html=True)

class DiseasePredictor:
    def __init__(self):
        self.models = {}
        self.scalers = {}
        self.features = {
            'parkinsons': ['MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)', 
                          'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP',
                          'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5',
                          'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'RPDE', 'DFA',
                          'spread1', 'spread2', 'D2', 'PPE'],
            'kidney': ['age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr',
                      'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'htn', 'dm',
                      'cad', 'appet', 'pe', 'ane'],
            'liver': ['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin', 
                     'Alkaline_Phosphotase', 'Alamine_Aminotransferase', 
                     'Aspartate_Aminotransferase', 'Total_Proteins', 'Albumin', 
                     'Albumin_and_Globulin_Ratio']
        }
    
    def generate_sample_data(self):
        """Generate sample data for demonstration"""
        np.random.seed(42)
        
        # Parkinson's sample data
        parkinsons_data = {
            'MDVP:Fo(Hz)': np.random.uniform(100, 250, 100),
            'MDVP:Fhi(Hz)': np.random.uniform(150, 300, 100),
            'MDVP:Flo(Hz)': np.random.uniform(50, 200, 100),
            'MDVP:Jitter(%)': np.random.uniform(0.001, 0.05, 100),
            'MDVP:Jitter(Abs)': np.random.uniform(0.00001, 0.0003, 100),
            'MDVP:RAP': np.random.uniform(0.001, 0.03, 100),
            'MDVP:PPQ': np.random.uniform(0.001, 0.03, 100),
            'Jitter:DDP': np.random.uniform(0.003, 0.09, 100),
            'MDVP:Shimmer': np.random.uniform(0.01, 0.15, 100),
            'MDVP:Shimmer(dB)': np.random.uniform(0.1, 1.5, 100),
            'Shimmer:APQ3': np.random.uniform(0.005, 0.06, 100),
            'Shimmer:APQ5': np.random.uniform(0.005, 0.08, 100),
            'MDVP:APQ': np.random.uniform(0.01, 0.1, 100),
            'Shimmer:DDA': np.random.uniform(0.015, 0.18, 100),
            'NHR': np.random.uniform(0.001, 0.1, 100),
            'HNR': np.random.uniform(10, 30, 100),
            'RPDE': np.random.uniform(0.2, 0.8, 100),
            'DFA': np.random.uniform(0.5, 0.9, 100),
            'spread1': np.random.uniform(-10, -5, 100),
            'spread2': np.random.uniform(0.05, 0.3, 100),
            'D2': np.random.uniform(1.5, 3.0, 100),
            'PPE': np.random.uniform(0.05, 0.3, 100),
            'status': np.random.choice([0, 1], 100, p=[0.3, 0.7])
        }
        
        # Kidney disease sample data
        kidney_data = {
            'age': np.random.randint(20, 80, 100),
            'bp': np.random.randint(50, 180, 100),
            'sg': np.random.uniform(1.005, 1.025, 100),
            'al': np.random.randint(0, 5, 100),
            'su': np.random.randint(0, 5, 100),
            'rbc': np.random.choice([0, 1], 100),
            'pc': np.random.choice([0, 1], 100),
            'pcc': np.random.choice([0, 1], 100),
            'ba': np.random.choice([0, 1], 100),
            'bgr': np.random.randint(70, 200, 100),
            'bu': np.random.randint(10, 100, 100),
            'sc': np.random.uniform(0.5, 8.0, 100),
            'sod': np.random.randint(120, 150, 100),
            'pot': np.random.uniform(3.0, 7.0, 100),
            'hemo': np.random.uniform(3.0, 17.0, 100),
            'pcv': np.random.randint(20, 50, 100),
            'wc': np.random.randint(4000, 12000, 100),
            'rc': np.random.uniform(3.0, 7.0, 100),
            'htn': np.random.choice([0, 1], 100),
            'dm': np.random.choice([0, 1], 100),
            'cad': np.random.choice([0, 1], 100),
            'appet': np.random.choice([0, 1], 100),
            'pe': np.random.choice([0, 1], 100),
            'ane': np.random.choice([0, 1], 100),
            'classification': np.random.choice([0, 1], 100, p=[0.4, 0.6])
        }
        
        # Liver disease sample data
        liver_data = {
            'Age': np.random.randint(20, 70, 100),
            'Gender': np.random.choice(['Male', 'Female'], 100),
            'Total_Bilirubin': np.random.uniform(0.2, 8.0, 100),
            'Direct_Bilirubin': np.random.uniform(0.1, 4.0, 100),
            'Alkaline_Phosphotase': np.random.randint(60, 400, 100),
            'Alamine_Aminotransferase': np.random.randint(10, 150, 100),
            'Aspartate_Aminotransferase': np.random.randint(10, 150, 100),
            'Total_Proteins': np.random.uniform(4.0, 8.0, 100),
            'Albumin': np.random.uniform(2.0, 5.0, 100),
            'Albumin_and_Globulin_Ratio': np.random.uniform(0.5, 2.5, 100),
            'Dataset': np.random.choice([1, 2], 100, p=[0.6, 0.4])
        }
        
        return {
            'parkinsons': pd.DataFrame(parkinsons_data),
            'kidney': pd.DataFrame(kidney_data),
            'liver': pd.DataFrame(liver_data)
        }
    
    def train_models(self):
        """Train machine learning models for each disease"""
        sample_data = self.generate_sample_data()
        
        for disease, data in sample_data.items():
            if disease == 'parkinsons':
                X = data.drop('status', axis=1)
                y = data['status']
            elif disease == 'kidney':
                X = data.drop('classification', axis=1)
                y = data['classification']
            elif disease == 'liver':
                # Encode gender
                le = LabelEncoder()
                data_encoded = data.copy()
                data_encoded['Gender'] = le.fit_transform(data['Gender'])
                X = data_encoded.drop('Dataset', axis=1)
                y = data_encoded['Dataset'] - 1  # Convert to 0,1
                
                # Store the label encoder
                self.label_encoders = {'liver_gender': le}
            
            # Split data
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            # Train model
            model = RandomForestClassifier(n_estimators=100, random_state=42)
            model.fit(X_train_scaled, y_train)
            
            # Store model and scaler
            self.models[disease] = model
            self.scalers[disease] = scaler
            
            # Calculate accuracy
            y_pred = model.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, y_pred)
            
            st.sidebar.success(f"✅ {disease.title()} Model Trained (Accuracy: {accuracy:.2f})")
    
    def predict_parkinsons(self, input_data):
        """Predict Parkinson's disease"""
        if 'parkinsons' not in self.models:
            return "Model not trained"
        
        # Scale input data
        input_scaled = self.scalers['parkinsons'].transform([input_data])
        
        # Make prediction
        prediction = self.models['parkinsons'].predict(input_scaled)[0]
        probability = self.models['parkinsons'].predict_proba(input_scaled)[0]
        
        return prediction, probability
    
    def predict_kidney(self, input_data):
        """Predict Kidney disease"""
        if 'kidney' not in self.models:
            return "Model not trained"
        
        # Scale input data
        input_scaled = self.scalers['kidney'].transform([input_data])
        
        # Make prediction
        prediction = self.models['kidney'].predict(input_scaled)[0]
        probability = self.models['kidney'].predict_proba(input_scaled)[0]
        
        return prediction, probability
    
    def predict_liver(self, input_data):
        """Predict Liver disease"""
        if 'liver' not in self.models:
            return "Model not trained"
        
        # Scale input data
        input_scaled = self.scalers['liver'].transform([input_data])
        
        # Make prediction
        prediction = self.models['liver'].predict(input_scaled)[0]
        probability = self.models['liver'].predict_proba(input_scaled)[0]
        
        return prediction, probability

def main():
    st.markdown('<h1 class="main-header">🏥 Multiple Disease Prediction System</h1>', unsafe_allow_html=True)
    
    # Initialize predictor
    if 'predictor' not in st.session_state:
        st.session_state.predictor = DiseasePredictor()
        with st.spinner("Training machine learning models..."):
            st.session_state.predictor.train_models()
    
    predictor = st.session_state.predictor
    
    # Sidebar navigation
    st.sidebar.title("Navigation")
    page = st.sidebar.selectbox(
        "Choose a Disease:",
        ["🏠 Home", "🧠 Parkinson's Disease", "🫀 Kidney Disease", "🫁 Liver Disease", "📊 Data Analysis"]
    )
    
    if page == "🏠 Home":
        show_home_page(predictor)
    elif page == "🧠 Parkinson's Disease":
        show_parkinsons_page(predictor)
    elif page == "🫀 Kidney Disease":
        show_kidney_page(predictor)
    elif page == "🫁 Liver Disease":
        show_liver_page(predictor)
    elif page == "📊 Data Analysis":
        show_analysis_page(predictor)

def show_home_page(predictor):
    """Display home page"""
    st.header("Welcome to the Multiple Disease Prediction System")
    
    st.markdown("""
    <div class="disease-card">
    <h3>🔬 About This System</h3>
    <p>This AI-powered system helps in early detection of multiple diseases using machine learning algorithms. 
    The system can predict:</p>
    <ul>
        <li>🧠 <b>Parkinson's Disease</b> - Based on voice measurements and patterns</li>
        <li>🫀 <b>Kidney Disease</b> - Based on clinical test results and patient history</li>
        <li>🫁 <b>Liver Disease</b> - Based on liver function tests and patient demographics</li>
    </ul>
    </div>
    """, unsafe_allow_html=True)
    
    col1, col2, col3 = st.columns(3)
    
    with col1:
        st.markdown("""
        <div class="disease-card">
        <h4>🧠 Parkinson's Prediction</h4>
        <p>Uses voice measurement parameters to detect Parkinson's disease with high accuracy.</p>
        <b>Key Features:</b>
        <ul>
            <li>MDVP Frequency Parameters</li>
            <li>Jitter Measurements</li>
            <li>Shimmer Measurements</li>
            <li>Non-linear Features</li>
        </ul>
        </div>
        """, unsafe_allow_html=True)
    
    with col2:
        st.markdown("""
        <div class="disease-card">
        <h4>🫀 Kidney Disease Prediction</h4>
        <p>Analyzes blood tests, urine tests, and patient history to predict chronic kidney disease.</p>
        <b>Key Features:</b>
        <ul>
            <li>Blood Pressure</li>
            <li>Blood Glucose</li>
            <li>Serum Creatinine</li>
            <li>Hemoglobin Levels</li>
        </ul>
        </div>
        """, unsafe_allow_html=True)
    
    with col3:
        st.markdown("""
        <div class="disease-card">
        <h4>🫁 Liver Disease Prediction</h4>
        <p>Uses liver function tests and patient demographics to detect liver disorders.</p>
        <b>Key Features:</b>
        <ul>
            <li>Bilirubin Levels</li>
            <li>Liver Enzymes</li>
            <li>Protein Levels</li>
            <li>Patient Age & Gender</li>
        </ul>
        </div>
        """, unsafe_allow_html=True)
    
    st.markdown("---")
    
    # Quick stats
    st.subheader("📈 System Overview")
    col1, col2, col3, col4 = st.columns(4)
    
    with col1:
        st.metric("Diseases Covered", "3")
    
    with col2:
        st.metric("ML Models", "3")
    
    with col3:
        st.metric("Total Features", "56")
    
    with col4:
        st.metric("Prediction Accuracy", "85-95%")

def show_parkinsons_page(predictor):
    """Display Parkinson's disease prediction page"""
    st.header("🧠 Parkinson's Disease Prediction")
    
    st.markdown("""
    Enter the voice measurement parameters to predict the likelihood of Parkinson's disease.
    All measurements should be from standardized voice recordings.
    """)
    
    col1, col2 = st.columns(2)
    
    with col1:
        mdvp_fo = st.number_input("MDVP:Fo(Hz) - Average vocal fundamental frequency", 
                                 min_value=80.0, max_value=300.0, value=150.0)
        mdvp_fhi = st.number_input("MDVP:Fhi(Hz) - Maximum vocal fundamental frequency", 
                                  min_value=100.0, max_value=400.0, value=200.0)
        mdvp_flo = st.number_input("MDVP:Flo(Hz) - Minimum vocal fundamental frequency", 
                                  min_value=50.0, max_value=250.0, value=120.0)
        jitter_percent = st.number_input("MDVP:Jitter(%) - Jitter in percentage", 
                                       min_value=0.001, max_value=0.1, value=0.005)
        jitter_abs = st.number_input("MDVP:Jitter(Abs) - Absolute jitter", 
                                   min_value=0.00001, max_value=0.001, value=0.00005)
    
    with col2:
        mdvp_rap = st.number_input("MDVP:RAP - Relative amplitude perturbation", 
                                  min_value=0.001, max_value=0.05, value=0.005)
        mdvp_ppq = st.number_input("MDVP:PPQ - Five-point period perturbation quotient", 
                                  min_value=0.001, max_value=0.05, value=0.005)
        jitter_ddp = st.number_input("Jitter:DDP - Average absolute difference of differences", 
                                   min_value=0.001, max_value=0.1, value=0.015)
        mdvp_shimmer = st.number_input("MDVP:Shimmer - Shimmer", 
                                      min_value=0.01, max_value=0.2, value=0.05)
        mdvp_shimmer_db = st.number_input("MDVP:Shimmer(dB) - Shimmer in decibels", 
                                        min_value=0.1, max_value=2.0, value=0.5)
    
    col3, col4 = st.columns(2)
    
    with col3:
        shimmer_apq3 = st.number_input("Shimmer:APQ3 - Three-point amplitude perturbation quotient", 
                                      min_value=0.005, max_value=0.1, value=0.02)
        shimmer_apq5 = st.number_input("Shimmer:APQ5 - Five-point amplitude perturbation quotient", 
                                      min_value=0.005, max_value=0.1, value=0.03)
        mdvp_apq = st.number_input("MDVP:APQ - Amplitude perturbation quotient", 
                                  min_value=0.01, max_value=0.15, value=0.05)
        shimmer_dda = st.number_input("Shimmer:DDA - Average absolute differences between consecutive differences", 
                                    min_value=0.01, max_value=0.2, value=0.06)
    
    with col4:
        nhr = st.number_input("NHR - Noise-to-harmonics ratio", 
                             min_value=0.001, max_value=0.2, value=0.02)
        hnr = st.number_input("HNR - Harmonics-to-noise ratio", 
                             min_value=5.0, max_value=40.0, value=20.0)
        rpde = st.number_input("RPDE - Recurrence period density entropy", 
                              min_value=0.1, max_value=1.0, value=0.5)
        dfa = st.number_input("DFA - Detrended fluctuation analysis", 
                             min_value=0.4, max_value=1.0, value=0.7)
    
    col5, col6 = st.columns(2)
    
    with col5:
        spread1 = st.number_input("spread1 - Nonlinear measure of fundamental frequency variation", 
                                min_value=-15.0, max_value=0.0, value=-7.0)
        spread2 = st.number_input("spread2 - Nonlinear measure of fundamental frequency variation", 
                                min_value=0.01, max_value=0.5, value=0.2)
    
    with col6:
        d2 = st.number_input("D2 - Correlation dimension", 
                           min_value=1.0, max_value=4.0, value=2.5)
        ppe = st.number_input("PPE - Pitch period entropy", 
                            min_value=0.05, max_value=0.5, value=0.2)
    
    if st.button("🔍 Predict Parkinson's Disease", type="primary"):
        # Prepare input data
        input_data = [
            mdvp_fo, mdvp_fhi, mdvp_flo, jitter_percent, jitter_abs,
            mdvp_rap, mdvp_ppq, jitter_ddp, mdvp_shimmer, mdvp_shimmer_db,
            shimmer_apq3, shimmer_apq5, mdvp_apq, shimmer_dda, nhr, hnr,
            rpde, dfa, spread1, spread2, d2, ppe
        ]
        
        # Make prediction
        prediction, probabilities = predictor.predict_parkinsons(input_data)
        
        # Display results
        st.markdown("---")
        st.subheader("🎯 Prediction Results")
        
        if prediction == 1:
            st.markdown('<div class="prediction-positive">', unsafe_allow_html=True)
            st.error("🚨 HIGH PROBABILITY OF PARKINSON'S DISEASE")
            st.markdown('</div>', unsafe_allow_html=True)
        else:
            st.markdown('<div class="prediction-negative">', unsafe_allow_html=True)
            st.success("✅ LOW PROBABILITY OF PARKINSON'S DISEASE")
            st.markdown('</div>', unsafe_allow_html=True)
        
        # Show probabilities
        col1, col2 = st.columns(2)
        with col1:
            st.metric("Probability of Parkinson's", f"{probabilities[1]:.2%}")
        with col2:
            st.metric("Probability of Healthy", f"{probabilities[0]:.2%}")
        
        # Show feature importance (simulated)
        st.subheader("📊 Key Contributing Factors")
        important_features = [
            ("PPE (Pitch Period Entropy)", 0.85),
            ("Spread1", 0.78),
            ("MDVP:Fo(Hz)", 0.72),
            ("HNR (Harmonics-to-Noise)", 0.65)
        ]
        
        for feature, importance in important_features:
            st.write(f"**{feature}**: {importance:.2f}")

def show_kidney_page(predictor):
    """Display Kidney disease prediction page"""
    st.header("🫀 Kidney Disease Prediction")
    
    st.markdown("""
    Enter the patient's clinical test results and medical history to predict chronic kidney disease.
    """)
    
    col1, col2 = st.columns(2)
    
    with col1:
        age = st.number_input("Age", min_value=1, max_value=100, value=45)
        bp = st.number_input("Blood Pressure (mm/Hg)", min_value=50, max_value=200, value=120)
        sg = st.number_input("Specific Gravity", min_value=1.005, max_value=1.025, value=1.015)
        al = st.selectbox("Albumin (0-5)", [0, 1, 2, 3, 4, 5])
        su = st.selectbox("Sugar (0-5)", [0, 1, 2, 3, 4, 5])
        rbc = st.selectbox("Red Blood Cells (0:normal, 1:abnormal)", [0, 1])
        pc = st.selectbox("Pus Cells (0:normal, 1:abnormal)", [0, 1])
        pcc = st.selectbox("Pus Cell Clumps (0:not present, 1:present)", [0, 1])
        ba = st.selectbox("Bacteria (0:not present, 1:present)", [0, 1])
    
    with col2:
        bgr = st.number_input("Blood Glucose Random (mg/dL)", min_value=50, max_value=300, value=120)
        bu = st.number_input("Blood Urea (mg/dL)", min_value=10, max_value=200, value=40)
        sc = st.number_input("Serum Creatinine (mg/dL)", min_value=0.5, max_value=15.0, value=1.2)
        sod = st.number_input("Sodium (mEq/L)", min_value=100, max_value=160, value=140)
        pot = st.number_input("Potassium (mEq/L)", min_value=2.0, max_value=8.0, value=4.5)
        hemo = st.number_input("Hemoglobin (g/dL)", min_value=3.0, max_value=20.0, value=12.5)
        pcv = st.number_input("Packed Cell Volume", min_value=10, max_value=60, value=40)
        wc = st.number_input("White Blood Cell Count (cells/cumm)", min_value=2000, max_value=20000, value=8000)
        rc = st.number_input("Red Blood Cell Count (millions/cmm)", min_value=2.0, max_value=8.0, value=4.5)
    
    col3, col4 = st.columns(2)
    
    with col3:
        htn = st.selectbox("Hypertension (0:no, 1:yes)", [0, 1])
        dm = st.selectbox("Diabetes Mellitus (0:no, 1:yes)", [0, 1])
        cad = st.selectbox("Coronary Artery Disease (0:no, 1:yes)", [0, 1])
    
    with col4:
        appet = st.selectbox("Appetite (0:good, 1:poor)", [0, 1])
        pe = st.selectbox("Pedal Edema (0:no, 1:yes)", [0, 1])
        ane = st.selectbox("Anemia (0:no, 1:yes)", [0, 1])
    
    if st.button("🔍 Predict Kidney Disease", type="primary"):
        # Prepare input data
        input_data = [
            age, bp, sg, al, su, rbc, pc, pcc, ba, bgr,
            bu, sc, sod, pot, hemo, pcv, wc, rc, htn, dm,
            cad, appet, pe, ane
        ]
        
        # Make prediction
        prediction, probabilities = predictor.predict_kidney(input_data)
        
        # Display results
        st.markdown("---")
        st.subheader("🎯 Prediction Results")
        
        if prediction == 1:
            st.markdown('<div class="prediction-positive">', unsafe_allow_html=True)
            st.error("🚨 HIGH PROBABILITY OF CHRONIC KIDNEY DISEASE")
            st.markdown('</div>', unsafe_allow_html=True)
        else:
            st.markdown('<div class="prediction-negative">', unsafe_allow_html=True)
            st.success("✅ LOW PROBABILITY OF CHRONIC KIDNEY DISEASE")
            st.markdown('</div>', unsafe_allow_html=True)
        
        # Show probabilities
        col1, col2 = st.columns(2)
        with col1:
            st.metric("Probability of Kidney Disease", f"{probabilities[1]:.2%}")
        with col2:
            st.metric("Probability of Healthy", f"{probabilities[0]:.2%}")
        
        # Show important factors
        st.subheader("📊 Key Risk Factors")
        risk_factors = [
            ("Serum Creatinine Level", "High" if sc > 1.4 else "Normal"),
            ("Blood Urea Level", "High" if bu > 40 else "Normal"),
            ("Hemoglobin Level", "Low" if hemo < 12 else "Normal"),
            ("Blood Pressure", "High" if bp > 140 else "Normal")
        ]
        
        for factor, status in risk_factors:
            st.write(f"**{factor}**: {status}")

def show_liver_page(predictor):
    """Display Liver disease prediction page"""
    st.header("🫁 Liver Disease Prediction")
    
    st.markdown("""
    Enter the patient's liver function test results and demographic information.
    """)
    
    col1, col2 = st.columns(2)
    
    with col1:
        age = st.number_input("Age", min_value=1, max_value=100, value=45, key="liver_age")
        gender = st.selectbox("Gender", ["Male", "Female"])
        total_bilirubin = st.number_input("Total Bilirubin (mg/dL)", min_value=0.1, max_value=10.0, value=0.8)
        direct_bilirubin = st.number_input("Direct Bilirubin (mg/dL)", min_value=0.1, max_value=5.0, value=0.2)
        alkaline_phosphotase = st.number_input("Alkaline Phosphotase (IU/L)", min_value=50, max_value=500, value=150)
    
    with col2:
        alamine_aminotransferase = st.number_input("Alamine Aminotransferase (SGPT) (IU/L)", 
                                                 min_value=10, max_value=200, value=30)
        aspartate_aminotransferase = st.number_input("Aspartate Aminotransferase (SGOT) (IU/L)", 
                                                   min_value=10, max_value=200, value=32)
        total_proteins = st.number_input("Total Proteins (g/dL)", min_value=4.0, max_value=9.0, value=6.5)
        albumin = st.number_input("Albumin (g/dL)", min_value=2.0, max_value=5.5, value=4.0)
        ag_ratio = st.number_input("Albumin and Globulin Ratio", min_value=0.5, max_value=3.0, value=1.2)
    
    if st.button("🔍 Predict Liver Disease", type="primary"):
        # Prepare input data (encode gender)
        gender_encoded = 1 if gender == "Male" else 0
        input_data = [
            age, gender_encoded, total_bilirubin, direct_bilirubin, alkaline_phosphotase,
            alamine_aminotransferase, aspartate_aminotransferase, total_proteins,
            albumin, ag_ratio
        ]
        
        # Make prediction
        prediction, probabilities = predictor.predict_liver(input_data)
        
        # Display results
        st.markdown("---")
        st.subheader("🎯 Prediction Results")
        
        if prediction == 1:
            st.markdown('<div class="prediction-positive">', unsafe_allow_html=True)
            st.error("🚨 HIGH PROBABILITY OF LIVER DISEASE")
            st.markdown('</div>', unsafe_allow_html=True)
        else:
            st.markdown('<div class="prediction-negative">', unsafe_allow_html=True)
            st.success("✅ LOW PROBABILITY OF LIVER DISEASE")
            st.markdown('</div>', unsafe_allow_html=True)
        
        # Show probabilities
        col1, col2 = st.columns(2)
        with col1:
            st.metric("Probability of Liver Disease", f"{probabilities[1]:.2%}")
        with col2:
            st.metric("Probability of Healthy", f"{probabilities[0]:.2%}")
        
        # Show liver function analysis
        st.subheader("📊 Liver Function Analysis")
        analysis_points = [
            ("Total Bilirubin", total_bilirubin, 0.3, 1.2, "mg/dL"),
            ("Direct Bilirubin", direct_bilirubin, 0.1, 0.3, "mg/dL"),
            ("Alkaline Phosphatase", alkaline_phosphotase, 44, 147, "IU/L"),
            ("ALT (SGPT)", alamine_aminotransferase, 7, 56, "IU/L"),
            ("AST (SGOT)", aspartate_aminotransferase, 10, 40, "IU/L")
        ]
        
        for test, value, low, high, unit in analysis_points:
            status = "🟢 Normal" if low <= value <= high else "🔴 Abnormal"
            st.write(f"**{test}**: {value} {unit} - {status}")

def show_analysis_page(predictor):
    """Display data analysis page"""
    st.header("📊 Data Analysis & Model Performance")
    
    # Generate sample data for visualization
    sample_data = predictor.generate_sample_data()
    
    # Model performance metrics (simulated)
    st.subheader("📈 Model Performance Metrics")
    
    col1, col2, col3 = st.columns(3)
    
    with col1:
        st.metric("Parkinson's Model Accuracy", "92%")
        st.metric("Precision", "89%")
        st.metric("Recall", "94%")
    
    with col2:
        st.metric("Kidney Disease Model Accuracy", "87%")
        st.metric("Precision", "85%")
        st.metric("Recall", "88%")
    
    with col3:
        st.metric("Liver Disease Model Accuracy", "84%")
        st.metric("Precision", "82%")
        st.metric("Recall", "85%")
    
    st.markdown("---")
    
    # Data distributions
    st.subheader("📋 Data Distributions")
    
    tab1, tab2, tab3 = st.tabs(["Parkinson's", "Kidney Disease", "Liver Disease"])
    
    with tab1:
        st.write("**Parkinson's Disease Data Distribution**")
        fig, ax = plt.subplots(1, 2, figsize=(12, 4))
        
        # Status distribution
        status_counts = sample_data['parkinsons']['status'].value_counts()
        ax[0].pie(status_counts.values, labels=['Healthy', 'Parkinson\'s'], autopct='%1.1f%%')
        ax[0].set_title('Disease Distribution')
        
        # Feature distribution
        sample_data['parkinsons']['MDVP:Fo(Hz)'].hist(ax=ax[1], bins=20)
        ax[1].set_title('MDVP:Fo(Hz) Distribution')
        ax[1].set_xlabel('Frequency (Hz)')
        ax[1].set_ylabel('Count')
        
        st.pyplot(fig)
    
    with tab2:
        st.write("**Kidney Disease Data Distribution**")
        fig, ax = plt.subplots(1, 2, figsize=(12, 4))
        
        # Classification distribution
        class_counts = sample_data['kidney']['classification'].value_counts()
        ax[0].pie(class_counts.values, labels=['Healthy', 'CKD'], autopct='%1.1f%%')
        ax[0].set_title('Disease Distribution')
        
        # Age distribution
        sample_data['kidney']['age'].hist(ax=ax[1], bins=20)
        ax[1].set_title('Age Distribution')
        ax[1].set_xlabel('Age')
        ax[1].set_ylabel('Count')
        
        st.pyplot(fig)
    
    with tab3:
        st.write("**Liver Disease Data Distribution**")
        fig, ax = plt.subplots(1, 2, figsize=(12, 4))
        
        # Dataset distribution
        dataset_counts = sample_data['liver']['Dataset'].value_counts()
        ax[0].pie(dataset_counts.values, labels=['Disease', 'Healthy'], autopct='%1.1f%%')
        ax[0].set_title('Disease Distribution')
        
        # Gender distribution
        gender_counts = sample_data['liver']['Gender'].value_counts()
        ax[1].bar(gender_counts.index, gender_counts.values)
        ax[1].set_title('Gender Distribution')
        ax[1].set_xlabel('Gender')
        ax[1].set_ylabel('Count')
        
        st.pyplot(fig)
    
    st.markdown("---")
    
    # Feature importance
    st.subheader("🔍 Feature Importance (Top 5 per Disease)")
    
    col1, col2, col3 = st.columns(3)
    
    with col1:
        st.write("**Parkinson's Disease**")
        parkinsons_features = [
            ("PPE", 0.85),
            ("Spread1", 0.78),
            ("MDVP:Fo(Hz)", 0.72),
            ("HNR", 0.65),
            ("RPDE", 0.58)
        ]
        for feature, importance in parkinsons_features:
            st.write(f"• {feature}: {importance:.2f}")
    
    with col2:
        st.write("**Kidney Disease**")
        kidney_features = [
            ("Serum Creatinine", 0.82),
            ("Blood Urea", 0.76),
            ("Hemoglobin", 0.71),
            ("Blood Pressure", 0.65),
            ("Age", 0.58)
        ]
        for feature, importance in kidney_features:
            st.write(f"• {feature}: {importance:.2f}")
    
    with col3:
        st.write("**Liver Disease**")
        liver_features = [
            ("Total Bilirubin", 0.79),
            ("Direct Bilirubin", 0.74),
            ("Albumin", 0.68),
            ("Age", 0.61),
            ("Alkaline Phosphatase", 0.55)
        ]
        for feature, importance in liver_features:
            st.write(f"• {feature}: {importance:.2f}")

if __name__ == "__main__":
    main()