Spaces:
Sleeping
Sleeping
File size: 2,732 Bytes
a5a6a2e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | import os
import cv2
import glob
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from landmarks import get_landmarks
from geometry import extract_features
DATASET_DIR = "../dataset"
MODEL_PATH = "face_shape_model.pkl"
def get_feature_vector(features):
"""Converts feature dictionary to a list for the model."""
return [
features["lw_ratio"],
features["jaw_ratio"],
features["forehead_ratio"],
# Add more if geometry.py calculates them
]
def train():
print("Starting training...")
X = []
y = []
# Iterate through each subdirectory in the dataset
# Assumes structure: dataset/ShapeName/image.jpg
if not os.path.exists(DATASET_DIR):
print(f"Dataset directory not found: {DATASET_DIR}")
return
classes = [d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))]
print(f"Found classes: {classes}")
for label in classes:
class_dir = os.path.join(DATASET_DIR, label)
image_files = glob.glob(os.path.join(class_dir, "*"))
# Normalize label to capitalized format (e.g., "oval" -> "Oval") to match recommendation engine keys
normalized_label = label.capitalize()
print(f"Processing {label} (normalized to {normalized_label}): {len(image_files)} images")
for img_path in image_files:
try:
landmarks = get_landmarks(img_path)
feats = extract_features(landmarks)
vector = get_feature_vector(feats)
X.append(vector)
y.append(normalized_label)
except Exception as e:
# print(f"Skipping {img_path}: {e}")
pass
if len(X) == 0:
print("No valid data found. Check dataset and landmarks extraction.")
return
X = np.array(X)
y = np.array(y)
print(f"Training on {len(X)} samples...")
# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Pipeline: Scale features -> SVM Classifier
model = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True))
model.fit(X_train, y_train)
# Evaluate
print("Evaluating model...")
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))
# Save
joblib.dump(model, MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")
if __name__ == "__main__":
train()
|