Hemg's picture
app.py
03c774c verified
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.linear_model import LinearRegression
from huggingface_hub import HfApi, login
import gradio as gr
import numpy as np
from huggingface_hub import hf_hub_download
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
REPO_ID = "Hemg/heightweightprediction" # hugging face repo ID
MoDEL_FILENAME = "hdpred.joblib" # model file name
SCALER_FILENAME ="scalerpred.joblib" # scaler file name
model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MoDEL_FILENAME))
scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
#Fill missing values in the DataFrame.
def handle_missing_values(df):
categorical_columns = df.select_dtypes(include=['object']).columns
for col in categorical_columns:
df[col] = df[col].fillna(df[col].mode()[0])
numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns
numerical_features_skew = df[numerical_columns].apply(lambda x: x.skew())
for feature in numerical_columns:
if numerical_features_skew[feature] > 0:
df[feature] = df[feature].fillna(df[feature].mean())
elif numerical_features_skew[feature] < 0:
df[feature] = df[feature].fillna(df[feature].median())
else:
df[feature] = df[feature].fillna(df[feature].mean())
return df
#Encode categorical columns using LabelEncoder and OneHotEncoder.
def encode_categorical_columns(df):
label_encoder = LabelEncoder()
ordinal_columns = df.select_dtypes(include=['object']).columns
for col in ordinal_columns:
df[col] = label_encoder.fit_transform(df[col])
nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
return df
#Define a linear regression model
def linear_regression_model():
return LinearRegression()
def preprocess_and_train(csv_file_path, dependent_var_name, independent_variables):
# Load CSV file
df = pd.read_csv(csv_file_path)
# Handle missing values
df = handle_missing_values(df)
# Encode categorical columns
df = encode_categorical_columns(df)
# Define the X and y
X = df[independent_variables]
y = df[dependent_var_name]
# Apply standard scaling on X
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Define Linear Regression model
model = linear_regression_model()
# Print Mean Squared Error on Test Set
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on Test Set: {mse}")
# Fit the model
model.fit(X_train, y_train)
# Save the scikit-learn model locally
joblib.dump(model,'hdpred.joblib')
# Save the scaler
joblib.dump(scaler, 'scalerpred.joblib')
def predict_weight(Gender, Height):
input_data = [[Gender, Height]]
feature_names = ["Gender", "Height"]
input_df = pd.DataFrame(input_data, columns=feature_names)
df = handle_missing_values(input_df)
df = encode_categorical_columns(input_df)
# Scale the input data using the loaded scaler
scaled_input = scaler.transform(df)
# Make predictions using the loaded model
prediction = model.predict(scaled_input)[0]
return f"Predicted weight is: {prediction:,.2f} pounds"
# Create the Gradio app
iface = gr.Interface(
fn=predict_weight,
inputs=[
gr.Radio(["Male", "Female"], label="Gender", info="What's your gender?"),
gr.Slider(minimum=50, maximum=75, step=1, label="Height", info="What's your height (inches)?")
],
outputs="text",
title="Weight Prediction",
description="Predict Weight based on Gender and Height"
)
if __name__ == "__main__":
iface.launch(inline=False)