File size: 4,145 Bytes
a5a6a2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
import cv2
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Configuration
DATASET_DIR = "/home/codernotme/Projects/Github/katariaoptics/dataset"
MODEL_PATH = "/home/codernotme/Projects/Github/katariaoptics/ai_service/face_shape_model.pkl"
IMG_SIZE = (64, 64)  # Resize detected faces to this size

# Load Haar Cascade for face detection
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)

def load_dataset():
    images = []
    labels = []
    
    # Expected classes
    classes = ["heart", "oblong", "oval", "round", "square"]
    
    print(f"Loading dataset from {DATASET_DIR}...")
    
    for label in classes:
        folder_path = os.path.join(DATASET_DIR, label)
        if not os.path.isdir(folder_path):
            print(f"Warning: Folder {folder_path} not found.")
            continue
            
        print(f"Processing class: {label}")
        count = 0
        total_files = len(os.listdir(folder_path))
        
        for i, filename in enumerate(os.listdir(folder_path)):
            if i % 50 == 0:
                print(f"  Processed {i}/{total_files} images...")
                
            img_path = os.path.join(folder_path, filename)
            try:
                # Read image
                img = cv2.imread(img_path)
                if img is None:
                    continue
                
                # Resize if huge to speed up detection
                h, w = img.shape[:2]
                if w > 1000:
                    scale = 1000 / w
                    img = cv2.resize(img, (1000, int(h * scale)))
                
                # Convert to grayscale
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                
                # Detect faces
                faces = face_cascade.detectMultiScale(gray, 1.1, 4)
                
                # If face detected, crop and use it
                if len(faces) > 0:
                    # Use the largest face
                    (x, y, w, h) = max(faces, key=lambda f: f[2] * f[3])
                    face_roi = gray[y:y+h, x:x+w]
                    
                    # Resize and flatten
                    resized = cv2.resize(face_roi, IMG_SIZE)
                    flat_features = resized.flatten()
                    
                    images.append(flat_features)
                    labels.append(label)
                    count += 1
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                
        print(f"  Loaded {count} images for {label}")
        
    return np.array(images), np.array(labels)

def train_model():
    X, y = load_dataset()
    
    if len(X) == 0:
        print("Error: No images loaded. Dataset might be empty or paths incorrect.")
        return

    print(f"Total dataset size: {len(X)} samples")

    # Encode labels
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    
    # Save label encoder classes for inference
    with open(MODEL_PATH.replace(".pkl", "_classes.pkl"), "wb") as f:
        pickle.dump(le.classes_, f)
    print(f"Saved class labels: {le.classes_}")

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    # Train Random Forest
    print("Training Random Forest Classifier...")
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    
    # Evaluate
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=le.classes_))
    
    # Save model
    with open(MODEL_PATH, "wb") as f:
        pickle.dump(clf, f)
    print(f"Model saved to {MODEL_PATH}")

if __name__ == "__main__":
    train_model()