manma / model.py
ayush-goud's picture
Upload 4 files
ed26e7d verified
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import streamlit as st
# Load the data
@st.cache
def load_data():
return pd.read_csv('data.csv')
data = load_data()
# Check for missing values (optional, but good practice)
st.write("Checking for missing values...")
missing_values = data.isnull().sum()
st.write(missing_values)
# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
le = LabelEncoder()
data[column] = le.fit_transform(data[column])
label_encoders[column] = le
# Split the data into features and target
X = data.drop(columns=['Disease Risk'])
y = data['Disease Risk']
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
# Train the model (optional to train here, but recommended to show steps)
st.write("Training the model...")
model.fit(X_train, y_train)
# Make predictions (optional for initial run, but necessary for GUI)
y_pred = model.predict(X_test)
# Evaluate the model (optional, but good for understanding performance)
accuracy = accuracy_score(y_test, y_pred)
st.write(f'Model Accuracy: {accuracy}')
st.write(classification_report(y_test, y_pred))
# Function to get predictions
def predict_disease_risk(input_data):
input_df = pd.DataFrame([input_data])
for column, le in label_encoders.items():
input_df[column] = le.transform(input_df[column])
input_scaled = scaler.transform(input_df)
prediction = model.predict(input_scaled)
return prediction[0]
# Streamlit GUI
st.title('Health Risk Prediction Based on Diet')
# User inputs (simplified for demonstration, customize as needed)
st.sidebar.title('User Input')
age = st.sidebar.slider('Age', min_value=18, max_value=100, value=30)
gender = st.sidebar.radio('Gender', ['Male', 'Female'])
meals_per_day = st.sidebar.slider('Meals per Day', min_value=1, max_value=10, value=3)
diet = st.sidebar.selectbox('Diet Type', ['Pollotarian', 'Vegetarian', 'Pescatarian', 'Non-Vegetarian', 'Eggetarian'])
# Add more inputs based on your specific dataset columns
input_data = {
'Age': age,
'Gender': gender,
'Meals per Day': meals_per_day,
'Diet Type': diet,
# Add more keys based on your specific dataset columns
}
# Prediction button
if st.button('Predict Disease Risk'):
prediction = predict_disease_risk(input_data)
st.write(f'Predicted Disease Risk: {prediction}')