File size: 3,247 Bytes
b9b5f50
3ee9576
 
b9b5f50
 
3ee9576
642e4b0
3ee9576
 
b9b5f50
 
3ee9576
642e4b0
 
 
b9b5f50
 
3ee9576
 
b9b5f50
3ee9576
b9b5f50
 
642e4b0
b9b5f50
 
 
 
 
 
 
 
 
642e4b0
3ee9576
b9b5f50
 
 
 
 
 
 
 
 
 
 
642e4b0
b9b5f50
 
 
 
 
 
 
 
 
 
e911fa0
b9b5f50
 
 
 
 
 
3ee9576
b9b5f50
 
 
 
 
 
 
 
 
 
 
3ee9576
b9b5f50
 
 
 
 
 
 
 
 
 
 
642e4b0
 
3ee9576
b9b5f50
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# train_model.py
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
import pandas as pd
import os

# 1. Dataset Preparation
print("=== Preparing Dataset ===")
data = {
    'face_shape': ['Oval', 'Round', 'Square'] * 100,
    'skin_tone': ['Fair', 'Medium', 'Dark'] * 100,
    'face_size': ['Small', 'Medium', 'Large'] * 100,
    'mask_style': ['Glitter', 'Animal', 'Floral'] * 100,
    'mask_image': ['masks/glitter.png', 'masks/animal.png', 'masks/floral.png'] * 100
}
df = pd.DataFrame(data)
print(f"Dataset created with {len(df)} samples")

# 2. Initialize Encoders with Image Mappings
print("\n=== Initializing Encoders ===")
encoders = {
    'face_shape': LabelEncoder().fit(df['face_shape']),
    'skin_tone': LabelEncoder().fit(df['skin_tone']),
    'face_size': LabelEncoder().fit(df['face_size']),
    'mask_style': LabelEncoder().fit(df['mask_style']),
    'mask_images': {
        0: 'masks/glitter.png',
        1: 'masks/animal.png', 
        2: 'masks/floral.png'
    }
}

# 3. Feature Engineering
print("\n=== Encoding Features ===")
X = pd.DataFrame({
    col: encoders[col].transform(df[col])
    for col in ['face_shape', 'skin_tone', 'face_size']
})
y = encoders['mask_style'].transform(df['mask_style'])

# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f"Train samples: {len(X_train)}, Test samples: {len(X_test)}")

# 5. Model Training (Enhanced Parameters)
print("\n=== Training Model ===")
model = RandomForestClassifier(
    n_estimators=150,        # Increased from 50
    max_depth=7,             # Increased from 5
    min_samples_split=5,     # New parameter
    class_weight='balanced', # Handle imbalanced data
    random_state=42
)
model.fit(X_train, y_train)

# 6. Enhanced Evaluation
print("\n=== Model Evaluation ===")
print(f"Training Accuracy: {model.score(X_train, y_train):.2f}")
print(f"Test Accuracy: {model.score(X_test, y_test):.2f}")

# Feature Importance
print("\nFeature Importances:")
for col, imp in zip(X.columns, model.feature_importances_):
    print(f"- {col}: {imp:.3f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, model.predict(X_test)))

# 7. Save Model with Verification
print("\n=== Saving Assets ===")
os.makedirs('model', exist_ok=True)
os.makedirs('masks', exist_ok=True)

# Verify mask images exist
print("\nMask Image Verification:")
for class_idx, path in encoders['mask_images'].items():
    if os.path.exists(path):
        print(f"✓ {encoders['mask_style'].classes_[class_idx]}: {path}")
    else:
        print(f"✗ Missing: {path}")

# Save files
joblib.dump(model, 'model/random_forest.pkl', protocol=4)
joblib.dump(encoders, 'model/label_encoders.pkl', protocol=4)

print("\n=== Saved Files ===")
print("Model: model/random_forest.pkl")
print("Encoders: model/label_encoders.pkl")
print("\nClass Mappings:")
print("- Face Shapes:", list(encoders['face_shape'].classes_))
print("- Mask Styles:", list(encoders['mask_style'].classes_))
print("- Mask Images:", encoders['mask_images'])