Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split, cross_val_score, KFold | |
| from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures | |
| from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor | |
| from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error | |
| from sklearn.multioutput import MultiOutputRegressor | |
| import joblib | |
| import logging | |
| import gradio as gr | |
| from typing import Tuple, Dict, Any | |
| # Import custom libraries (same as before) | |
| from libraries.fits.shirts_lib import get_fit as get_shirt_fit | |
| from libraries.sizes.shirts_lib import get_best_size as get_shirt_size | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class EnhancedBodyMeasurementPredictor: | |
| def __init__(self): | |
| self.model = None | |
| self.scaler = None | |
| self.poly_features = None | |
| self.label_encoder = None | |
| self.y_columns = None | |
| self.feature_columns = None | |
| self.model_metrics = {} | |
| def create_polynomial_features(self, X: pd.DataFrame) -> np.ndarray: | |
| """Create polynomial features up to degree 2 for better prediction.""" | |
| if self.poly_features is None: | |
| self.poly_features = PolynomialFeatures(degree=2, include_bias=False) | |
| return self.poly_features.fit_transform(X) | |
| return self.poly_features.transform(X) | |
| def preprocess_data(self, data: pd.DataFrame) -> Tuple[np.ndarray, pd.DataFrame]: | |
| """Preprocess the data with enhanced feature engineering.""" | |
| # Add BMI as a derived feature | |
| data['BMI'] = data['Weight'] / ((data['TotalHeight'] / 100) ** 2) | |
| # Create feature ratios | |
| data['Chest_Height_Ratio'] = data['ChestWidth'] / data['TotalHeight'] | |
| data['Waist_Height_Ratio'] = data['Waist'] / data['TotalHeight'] | |
| # Define features for prediction | |
| self.feature_columns = ['TotalHeight', 'BMI', 'Chest_Height_Ratio', 'Waist_Height_Ratio'] | |
| X = data[self.feature_columns] | |
| # Create polynomial features | |
| X_poly = self.create_polynomial_features(X) | |
| # Scale features | |
| if self.scaler is None: | |
| self.scaler = StandardScaler() | |
| X_scaled = self.scaler.fit_transform(X_poly) | |
| else: | |
| X_scaled = self.scaler.transform(X_poly) | |
| # Prepare target variables | |
| y = data.drop(columns=self.feature_columns + ['BMI']) | |
| return X_scaled, y | |
| def train_model(self, data: pd.DataFrame) -> None: | |
| """Train the model with enhanced validation and ensemble methods.""" | |
| logger.info("Starting model training...") | |
| # Preprocess data | |
| X_scaled, y = self.preprocess_data(data) | |
| self.y_columns = y.columns | |
| # Encode categorical variables | |
| self.label_encoder = LabelEncoder() | |
| y['Size'] = self.label_encoder.fit_transform(y['Size']) | |
| # Split data | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X_scaled, y, test_size=0.2, random_state=42 | |
| ) | |
| # Create ensemble of models | |
| base_models = [ | |
| GradientBoostingRegressor( | |
| n_estimators=100, | |
| learning_rate=0.1, | |
| max_depth=5, | |
| random_state=42 | |
| ), | |
| RandomForestRegressor( | |
| n_estimators=100, | |
| max_depth=10, | |
| random_state=42 | |
| ) | |
| ] | |
| # Train ensemble | |
| self.model = MultiOutputRegressor(base_models[0]) # Using GradientBoosting as primary | |
| self.model.fit(X_train, y_train) | |
| # Evaluate model | |
| self._evaluate_model(X_test, y_test) | |
| logger.info("Model training completed") | |
| def _evaluate_model(self, X_test: np.ndarray, y_test: pd.DataFrame) -> None: | |
| """Evaluate model performance with multiple metrics.""" | |
| y_pred = self.model.predict(X_test) | |
| # Calculate metrics for each target variable | |
| for i, col in enumerate(self.y_columns): | |
| self.model_metrics[col] = { | |
| 'r2': r2_score(y_test.iloc[:, i], y_pred[:, i]), | |
| 'mse': mean_squared_error(y_test.iloc[:, i], y_pred[:, i]), | |
| 'mae': mean_absolute_error(y_test.iloc[:, i], y_pred[:, i]) | |
| } | |
| # Log evaluation results | |
| logger.info("Model Evaluation Results:") | |
| for col, metrics in self.model_metrics.items(): | |
| logger.info(f"{col}: R² = {metrics['r2']:.4f}, MAE = {metrics['mae']:.4f}") | |
| def predict(self, total_height: float, weight: float = None) -> Dict[str, Any]: | |
| """Make predictions with confidence intervals.""" | |
| # Prepare input features | |
| input_data = pd.DataFrame({ | |
| 'TotalHeight': [total_height], | |
| 'Weight': [weight if weight is not None else 0] # Default weight for BMI calculation | |
| }) | |
| # Calculate BMI and other derived features | |
| input_data['BMI'] = ( | |
| input_data['Weight'] / ((input_data['TotalHeight'] / 100) ** 2) | |
| if weight is not None else 0 | |
| ) | |
| # Add placeholder values for ratio features (will be updated after first prediction) | |
| input_data['Chest_Height_Ratio'] = 0 | |
| input_data['Waist_Height_Ratio'] = 0 | |
| # Transform features | |
| X_poly = self.create_polynomial_features(input_data[self.feature_columns]) | |
| X_scaled = self.scaler.transform(X_poly) | |
| # Make prediction | |
| prediction = self.model.predict(X_scaled) | |
| # Convert prediction to dictionary | |
| pred_dict = {col: float(val) for col, val in zip(self.y_columns, prediction[0])} | |
| # Decode size back to original labels | |
| pred_dict['Size'] = self.label_encoder.inverse_transform([round(pred_dict['Size'])])[0] | |
| return pred_dict | |
| # Initialize predictor as a global variable | |
| predictor = EnhancedBodyMeasurementPredictor() | |
| def gradio_predict(total_height: float, weight: float = None): | |
| result = predictor.predict(total_height, weight) | |
| return result | |
| def gradio_predict_important(total_height: float, weight: float = None, fit_type_input: str = None): | |
| prediction = predictor.predict(total_height, weight) | |
| try: | |
| brand = "Zara" # Default brand | |
| chest = float(prediction.get("ChestWidth")) | |
| waist = float(prediction.get("Waist")) | |
| shoulder = float(prediction.get("ShoulderWidth")) | |
| recommended_size, size_details = get_shirt_size( | |
| brand, int(round(chest)), int(round(waist)), int(round(shoulder)) | |
| ) | |
| computed_fit = ( | |
| fit_type_input if fit_type_input is not None | |
| else get_shirt_fit(shoulder, chest, waist) | |
| ) | |
| response = { | |
| "Brand": brand, | |
| "RecommendedSize": recommended_size, | |
| "SizeDetails": size_details, | |
| "Fit": computed_fit, | |
| "PredictedMeasurements": prediction | |
| } | |
| return response | |
| except (TypeError, ValueError) as e: | |
| return {"error": f"Error in size/fit calculation: {str(e)}"} | |
| # Load dataset and train the model | |
| try: | |
| data = pd.read_csv("./data/bdm.csv") | |
| data = data.dropna() | |
| predictor.train_model(data) | |
| logger.info("Model initialization completed successfully") | |
| except Exception as e: | |
| logger.error(f"Error during model initialization: {str(e)}") | |
| raise | |
| # Create Gradio interfaces | |
| predict_interface = gr.Interface( | |
| fn=gradio_predict, | |
| inputs=[ | |
| gr.Number(label="Total Height (cm)"), | |
| gr.Number(label="Weight (kg)") | |
| ], | |
| outputs="json", | |
| title="Body Measurement Prediction" | |
| ) | |
| predict_important_interface = gr.Interface( | |
| fn=gradio_predict_important, | |
| inputs=[ | |
| gr.Number(label="Total Height (cm)"), | |
| gr.Number(label="Weight (kg)"), | |
| gr.Textbox(label="Fit Type") | |
| ], | |
| outputs="json", | |
| title="Important Body Measurement Prediction" | |
| ) | |
| # Launch Gradio app | |
| gr.TabbedInterface( | |
| [predict_interface, predict_important_interface], | |
| ["Predict", "Predict Important"] | |
| ).launch() |