Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder, StandardScaler | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.metrics import accuracy_score, classification_report | |
| import streamlit as st | |
| # Load the data | |
| def load_data(): | |
| return pd.read_csv('data.csv') | |
| data = load_data() | |
| # Check for missing values (optional, but good practice) | |
| st.write("Checking for missing values...") | |
| missing_values = data.isnull().sum() | |
| st.write(missing_values) | |
| # Encode categorical variables | |
| label_encoders = {} | |
| for column in data.select_dtypes(include=['object']).columns: | |
| le = LabelEncoder() | |
| data[column] = le.fit_transform(data[column]) | |
| label_encoders[column] = le | |
| # Split the data into features and target | |
| X = data.drop(columns=['Disease Risk']) | |
| y = data['Disease Risk'] | |
| # Standardize the features | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| # Split the data into training and testing sets | |
| X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) | |
| # Initialize the model | |
| model = RandomForestClassifier(n_estimators=100, random_state=42) | |
| # Train the model (optional to train here, but recommended to show steps) | |
| st.write("Training the model...") | |
| model.fit(X_train, y_train) | |
| # Make predictions (optional for initial run, but necessary for GUI) | |
| y_pred = model.predict(X_test) | |
| # Evaluate the model (optional, but good for understanding performance) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| st.write(f'Model Accuracy: {accuracy}') | |
| st.write(classification_report(y_test, y_pred)) | |
| # Function to get predictions | |
| def predict_disease_risk(input_data): | |
| input_df = pd.DataFrame([input_data]) | |
| for column, le in label_encoders.items(): | |
| input_df[column] = le.transform(input_df[column]) | |
| input_scaled = scaler.transform(input_df) | |
| prediction = model.predict(input_scaled) | |
| return prediction[0] | |
| # Streamlit GUI | |
| st.title('Health Risk Prediction Based on Diet') | |
| # User inputs (simplified for demonstration, customize as needed) | |
| st.sidebar.title('User Input') | |
| age = st.sidebar.slider('Age', min_value=18, max_value=100, value=30) | |
| gender = st.sidebar.radio('Gender', ['Male', 'Female']) | |
| meals_per_day = st.sidebar.slider('Meals per Day', min_value=1, max_value=10, value=3) | |
| diet = st.sidebar.selectbox('Diet Type', ['Pollotarian', 'Vegetarian', 'Pescatarian', 'Non-Vegetarian', 'Eggetarian']) | |
| # Add more inputs based on your specific dataset columns | |
| input_data = { | |
| 'Age': age, | |
| 'Gender': gender, | |
| 'Meals per Day': meals_per_day, | |
| 'Diet Type': diet, | |
| # Add more keys based on your specific dataset columns | |
| } | |
| # Prediction button | |
| if st.button('Predict Disease Risk'): | |
| prediction = predict_disease_risk(input_data) | |
| st.write(f'Predicted Disease Risk: {prediction}') | |