import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import joblib import os # 1. Dataset Preparation print("=== Preparing Dataset ===") data = { 'face_shape': ['Oval', 'Round', 'Square'] * 50, 'skin_tone': ['Fair', 'Medium', 'Dark'] * 50, 'face_size': ['Small', 'Medium', 'Large'] * 50, 'mask_style': ['Glitter', 'Animal', 'Floral'] * 50, 'mask_image': ['mask_images/glitter.png', 'mask_images/animal.png', 'mask_images/floral.png'] * 50 } df = pd.DataFrame(data) print(f"Dataset created with {len(df)} samples") # 2. Initialize Encoders with Image Mappings print("\n=== Initializing Encoders ===") encoders = { 'face_shape': LabelEncoder().fit(df['face_shape'].unique()), 'skin_tone': LabelEncoder().fit(df['skin_tone'].unique()), 'face_size': LabelEncoder().fit(df['face_size'].unique()), 'mask_style': LabelEncoder().fit(df['mask_style'].unique()), 'mask_images': { 0: 'mask_images/glitter.png', 1: 'mask_images/animal.png', 2: 'mask_images/floral.png' } } # 3. Feature Engineering print("\n=== Encoding Features ===") X = pd.DataFrame({ 'face_shape': encoders['face_shape'].transform(df['face_shape']), 'skin_tone': encoders['skin_tone'].transform(df['skin_tone']), 'face_size': encoders['face_size'].transform(df['face_size']) }) y = encoders['mask_style'].transform(df['mask_style']) # 4. Train-Test Split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) print(f"Train samples: {len(X_train)}, Test samples: {len(X_test)}") # 5. Model Training print("\n=== Training Model ===") model = RandomForestClassifier( n_estimators=150, # Increased for better performance max_depth=7, min_samples_split=5, class_weight='balanced', random_state=42 ) model.fit(X_train, y_train) # 6. Enhanced Evaluation print("\n=== Model Evaluation ===") print(f"Training Accuracy: {model.score(X_train, y_train):.2f}") print(f"Test Accuracy: {model.score(X_test, y_test):.2f}") # Feature Importance importances = model.feature_importances_ print("\nFeature Importances:") for col, imp in zip(X.columns, importances): print(f"- {col}: {imp:.3f}") # Classification Report print("\nDetailed Classification Report:") print(classification_report(y_test, model.predict(X_test))) # 7. Saving with Verification print("\n=== Saving Assets ===") os.makedirs('model', exist_ok=True) # Verify mask images exist print("\nMask Image Verification:") for class_idx, path in encoders['mask_images'].items(): if os.path.exists(path): print(f"✓ {encoders['mask_style'].classes_[class_idx]}: {path}") else: print(f"✗ Missing: {path}") # Save models joblib.dump(model, 'model/random_forest.pkl') joblib.dump(encoders, 'model/label_encoders.pkl') print("\n=== Saved Assets ===") print("Model saved: model/random_forest.pkl") print("Encoders saved: model/label_encoders.pkl") print("\nClass Mappings:") print("- Face Shapes:", list(encoders['face_shape'].classes_)) print("- Mask Styles:", list(encoders['mask_style'].classes_)) print("- Mask Images:", encoders['mask_images']) print("\nTraining complete! Ready for deployment.")