import os import cv2 import glob import numpy as np import joblib from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.pipeline import make_pipeline from sklearn.metrics import classification_report from landmarks import get_landmarks from geometry import extract_features DATASET_DIR = "../dataset" MODEL_PATH = "face_shape_model.pkl" def get_feature_vector(features): """Converts feature dictionary to a list for the model.""" return [ features["lw_ratio"], features["jaw_ratio"], features["forehead_ratio"], # Add more if geometry.py calculates them ] def train(): print("Starting training...") X = [] y = [] # Iterate through each subdirectory in the dataset # Assumes structure: dataset/ShapeName/image.jpg if not os.path.exists(DATASET_DIR): print(f"Dataset directory not found: {DATASET_DIR}") return classes = [d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))] print(f"Found classes: {classes}") for label in classes: class_dir = os.path.join(DATASET_DIR, label) image_files = glob.glob(os.path.join(class_dir, "*")) # Normalize label to capitalized format (e.g., "oval" -> "Oval") to match recommendation engine keys normalized_label = label.capitalize() print(f"Processing {label} (normalized to {normalized_label}): {len(image_files)} images") for img_path in image_files: try: landmarks = get_landmarks(img_path) feats = extract_features(landmarks) vector = get_feature_vector(feats) X.append(vector) y.append(normalized_label) except Exception as e: # print(f"Skipping {img_path}: {e}") pass if len(X) == 0: print("No valid data found. Check dataset and landmarks extraction.") return X = np.array(X) y = np.array(y) print(f"Training on {len(X)} samples...") # Train/Test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Pipeline: Scale features -> SVM Classifier model = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True)) model.fit(X_train, y_train) # Evaluate print("Evaluating model...") predictions = model.predict(X_test) print(classification_report(y_test, predictions)) # Save joblib.dump(model, MODEL_PATH) print(f"Model saved to {MODEL_PATH}") if __name__ == "__main__": train()