Spaces:
Configuration error
Configuration error
| import pandas as pd | |
| import numpy as np | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import classification_report | |
| import joblib | |
| import os | |
| # 1. Dataset Preparation | |
| print("=== Preparing Dataset ===") | |
| data = { | |
| 'face_shape': ['Oval', 'Round', 'Square'] * 50, | |
| 'skin_tone': ['Fair', 'Medium', 'Dark'] * 50, | |
| 'face_size': ['Small', 'Medium', 'Large'] * 50, | |
| 'mask_style': ['Glitter', 'Animal', 'Floral'] * 50, | |
| 'mask_image': ['mask_images/glitter.png', 'mask_images/animal.png', 'mask_images/floral.png'] * 50 | |
| } | |
| df = pd.DataFrame(data) | |
| print(f"Dataset created with {len(df)} samples") | |
| # 2. Initialize Encoders with Image Mappings | |
| print("\n=== Initializing Encoders ===") | |
| encoders = { | |
| 'face_shape': LabelEncoder().fit(df['face_shape'].unique()), | |
| 'skin_tone': LabelEncoder().fit(df['skin_tone'].unique()), | |
| 'face_size': LabelEncoder().fit(df['face_size'].unique()), | |
| 'mask_style': LabelEncoder().fit(df['mask_style'].unique()), | |
| 'mask_images': { | |
| 0: 'mask_images/glitter.png', | |
| 1: 'mask_images/animal.png', | |
| 2: 'mask_images/floral.png' | |
| } | |
| } | |
| # 3. Feature Engineering | |
| print("\n=== Encoding Features ===") | |
| X = pd.DataFrame({ | |
| 'face_shape': encoders['face_shape'].transform(df['face_shape']), | |
| 'skin_tone': encoders['skin_tone'].transform(df['skin_tone']), | |
| 'face_size': encoders['face_size'].transform(df['face_size']) | |
| }) | |
| y = encoders['mask_style'].transform(df['mask_style']) | |
| # 4. Train-Test Split | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42 | |
| ) | |
| print(f"Train samples: {len(X_train)}, Test samples: {len(X_test)}") | |
| # 5. Model Training | |
| print("\n=== Training Model ===") | |
| model = RandomForestClassifier( | |
| n_estimators=150, # Increased for better performance | |
| max_depth=7, | |
| min_samples_split=5, | |
| class_weight='balanced', | |
| random_state=42 | |
| ) | |
| model.fit(X_train, y_train) | |
| # 6. Enhanced Evaluation | |
| print("\n=== Model Evaluation ===") | |
| print(f"Training Accuracy: {model.score(X_train, y_train):.2f}") | |
| print(f"Test Accuracy: {model.score(X_test, y_test):.2f}") | |
| # Feature Importance | |
| importances = model.feature_importances_ | |
| print("\nFeature Importances:") | |
| for col, imp in zip(X.columns, importances): | |
| print(f"- {col}: {imp:.3f}") | |
| # Classification Report | |
| print("\nDetailed Classification Report:") | |
| print(classification_report(y_test, model.predict(X_test))) | |
| # 7. Saving with Verification | |
| print("\n=== Saving Assets ===") | |
| os.makedirs('model', exist_ok=True) | |
| # Verify mask images exist | |
| print("\nMask Image Verification:") | |
| for class_idx, path in encoders['mask_images'].items(): | |
| if os.path.exists(path): | |
| print(f"✓ {encoders['mask_style'].classes_[class_idx]}: {path}") | |
| else: | |
| print(f"✗ Missing: {path}") | |
| # Save models | |
| joblib.dump(model, 'model/random_forest.pkl') | |
| joblib.dump(encoders, 'model/label_encoders.pkl') | |
| print("\n=== Saved Assets ===") | |
| print("Model saved: model/random_forest.pkl") | |
| print("Encoders saved: model/label_encoders.pkl") | |
| print("\nClass Mappings:") | |
| print("- Face Shapes:", list(encoders['face_shape'].classes_)) | |
| print("- Mask Styles:", list(encoders['mask_style'].classes_)) | |
| print("- Mask Images:", encoders['mask_images']) | |
| print("\nTraining complete! Ready for deployment.") |