import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report import streamlit as st # Load the data @st.cache def load_data(): return pd.read_csv('data.csv') data = load_data() # Check for missing values (optional, but good practice) st.write("Checking for missing values...") missing_values = data.isnull().sum() st.write(missing_values) # Encode categorical variables label_encoders = {} for column in data.select_dtypes(include=['object']).columns: le = LabelEncoder() data[column] = le.fit_transform(data[column]) label_encoders[column] = le # Split the data into features and target X = data.drop(columns=['Disease Risk']) y = data['Disease Risk'] # Standardize the features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) # Initialize the model model = RandomForestClassifier(n_estimators=100, random_state=42) # Train the model (optional to train here, but recommended to show steps) st.write("Training the model...") model.fit(X_train, y_train) # Make predictions (optional for initial run, but necessary for GUI) y_pred = model.predict(X_test) # Evaluate the model (optional, but good for understanding performance) accuracy = accuracy_score(y_test, y_pred) st.write(f'Model Accuracy: {accuracy}') st.write(classification_report(y_test, y_pred)) # Function to get predictions def predict_disease_risk(input_data): input_df = pd.DataFrame([input_data]) for column, le in label_encoders.items(): input_df[column] = le.transform(input_df[column]) input_scaled = scaler.transform(input_df) prediction = model.predict(input_scaled) return prediction[0] # Streamlit GUI st.title('Health Risk Prediction Based on Diet') # User inputs (simplified for demonstration, customize as needed) st.sidebar.title('User Input') age = st.sidebar.slider('Age', min_value=18, max_value=100, value=30) gender = st.sidebar.radio('Gender', ['Male', 'Female']) meals_per_day = st.sidebar.slider('Meals per Day', min_value=1, max_value=10, value=3) diet = st.sidebar.selectbox('Diet Type', ['Pollotarian', 'Vegetarian', 'Pescatarian', 'Non-Vegetarian', 'Eggetarian']) # Add more inputs based on your specific dataset columns input_data = { 'Age': age, 'Gender': gender, 'Meals per Day': meals_per_day, 'Diet Type': diet, # Add more keys based on your specific dataset columns } # Prediction button if st.button('Predict Disease Risk'): prediction = predict_disease_risk(input_data) st.write(f'Predicted Disease Risk: {prediction}')