Spaces:
Sleeping
Sleeping
| import os | |
| import cv2 | |
| import glob | |
| import numpy as np | |
| import joblib | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.svm import SVC | |
| from sklearn.preprocessing import LabelEncoder, StandardScaler | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.metrics import classification_report | |
| from landmarks import get_landmarks | |
| from geometry import extract_features | |
| DATASET_DIR = "../dataset" | |
| MODEL_PATH = "face_shape_model.pkl" | |
| def get_feature_vector(features): | |
| """Converts feature dictionary to a list for the model.""" | |
| return [ | |
| features["lw_ratio"], | |
| features["jaw_ratio"], | |
| features["forehead_ratio"], | |
| # Add more if geometry.py calculates them | |
| ] | |
| def train(): | |
| print("Starting training...") | |
| X = [] | |
| y = [] | |
| # Iterate through each subdirectory in the dataset | |
| # Assumes structure: dataset/ShapeName/image.jpg | |
| if not os.path.exists(DATASET_DIR): | |
| print(f"Dataset directory not found: {DATASET_DIR}") | |
| return | |
| classes = [d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))] | |
| print(f"Found classes: {classes}") | |
| for label in classes: | |
| class_dir = os.path.join(DATASET_DIR, label) | |
| image_files = glob.glob(os.path.join(class_dir, "*")) | |
| # Normalize label to capitalized format (e.g., "oval" -> "Oval") to match recommendation engine keys | |
| normalized_label = label.capitalize() | |
| print(f"Processing {label} (normalized to {normalized_label}): {len(image_files)} images") | |
| for img_path in image_files: | |
| try: | |
| landmarks = get_landmarks(img_path) | |
| feats = extract_features(landmarks) | |
| vector = get_feature_vector(feats) | |
| X.append(vector) | |
| y.append(normalized_label) | |
| except Exception as e: | |
| # print(f"Skipping {img_path}: {e}") | |
| pass | |
| if len(X) == 0: | |
| print("No valid data found. Check dataset and landmarks extraction.") | |
| return | |
| X = np.array(X) | |
| y = np.array(y) | |
| print(f"Training on {len(X)} samples...") | |
| # Train/Test split | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Pipeline: Scale features -> SVM Classifier | |
| model = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True)) | |
| model.fit(X_train, y_train) | |
| # Evaluate | |
| print("Evaluating model...") | |
| predictions = model.predict(X_test) | |
| print(classification_report(y_test, predictions)) | |
| # Save | |
| joblib.dump(model, MODEL_PATH) | |
| print(f"Model saved to {MODEL_PATH}") | |
| if __name__ == "__main__": | |
| train() | |