Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import os | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import mean_squared_error | |
| import joblib | |
| from sklearn.linear_model import LinearRegression | |
| from huggingface_hub import HfApi, login | |
| import gradio as gr | |
| import numpy as np | |
| from huggingface_hub import hf_hub_download | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.pipeline import Pipeline | |
| import joblib | |
| REPO_ID = "Hemg/heightweightprediction" # hugging face repo ID | |
| MoDEL_FILENAME = "hdpred.joblib" # model file name | |
| SCALER_FILENAME ="scalerpred.joblib" # scaler file name | |
| model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MoDEL_FILENAME)) | |
| scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)) | |
| #Fill missing values in the DataFrame. | |
| def handle_missing_values(df): | |
| categorical_columns = df.select_dtypes(include=['object']).columns | |
| for col in categorical_columns: | |
| df[col] = df[col].fillna(df[col].mode()[0]) | |
| numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns | |
| numerical_features_skew = df[numerical_columns].apply(lambda x: x.skew()) | |
| for feature in numerical_columns: | |
| if numerical_features_skew[feature] > 0: | |
| df[feature] = df[feature].fillna(df[feature].mean()) | |
| elif numerical_features_skew[feature] < 0: | |
| df[feature] = df[feature].fillna(df[feature].median()) | |
| else: | |
| df[feature] = df[feature].fillna(df[feature].mean()) | |
| return df | |
| #Encode categorical columns using LabelEncoder and OneHotEncoder. | |
| def encode_categorical_columns(df): | |
| label_encoder = LabelEncoder() | |
| ordinal_columns = df.select_dtypes(include=['object']).columns | |
| for col in ordinal_columns: | |
| df[col] = label_encoder.fit_transform(df[col]) | |
| nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns) | |
| df = pd.get_dummies(df, columns=nominal_columns, drop_first=True) | |
| return df | |
| #Define a linear regression model | |
| def linear_regression_model(): | |
| return LinearRegression() | |
| def preprocess_and_train(csv_file_path, dependent_var_name, independent_variables): | |
| # Load CSV file | |
| df = pd.read_csv(csv_file_path) | |
| # Handle missing values | |
| df = handle_missing_values(df) | |
| # Encode categorical columns | |
| df = encode_categorical_columns(df) | |
| # Define the X and y | |
| X = df[independent_variables] | |
| y = df[dependent_var_name] | |
| # Apply standard scaling on X | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| # Split data into train and test sets | |
| X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) | |
| # Define Linear Regression model | |
| model = linear_regression_model() | |
| # Print Mean Squared Error on Test Set | |
| y_pred = model.predict(X_test) | |
| mse = mean_squared_error(y_test, y_pred) | |
| print(f"Mean Squared Error on Test Set: {mse}") | |
| # Fit the model | |
| model.fit(X_train, y_train) | |
| # Save the scikit-learn model locally | |
| joblib.dump(model,'hdpred.joblib') | |
| # Save the scaler | |
| joblib.dump(scaler, 'scalerpred.joblib') | |
| def predict_weight(Gender, Height): | |
| input_data = [[Gender, Height]] | |
| feature_names = ["Gender", "Height"] | |
| input_df = pd.DataFrame(input_data, columns=feature_names) | |
| df = handle_missing_values(input_df) | |
| df = encode_categorical_columns(input_df) | |
| # Scale the input data using the loaded scaler | |
| scaled_input = scaler.transform(df) | |
| # Make predictions using the loaded model | |
| prediction = model.predict(scaled_input)[0] | |
| return f"Predicted weight is: {prediction:,.2f} pounds" | |
| # Create the Gradio app | |
| iface = gr.Interface( | |
| fn=predict_weight, | |
| inputs=[ | |
| gr.Radio(["Male", "Female"], label="Gender", info="What's your gender?"), | |
| gr.Slider(minimum=50, maximum=75, step=1, label="Height", info="What's your height (inches)?") | |
| ], | |
| outputs="text", | |
| title="Weight Prediction", | |
| description="Predict Weight based on Gender and Height" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch(inline=False) | |