kataria_opticals_api / train_model.py
codernotme's picture
commit
a5a6a2e verified
import os
import cv2
import glob
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from landmarks import get_landmarks
from geometry import extract_features
DATASET_DIR = "../dataset"
MODEL_PATH = "face_shape_model.pkl"
def get_feature_vector(features):
"""Converts feature dictionary to a list for the model."""
return [
features["lw_ratio"],
features["jaw_ratio"],
features["forehead_ratio"],
# Add more if geometry.py calculates them
]
def train():
print("Starting training...")
X = []
y = []
# Iterate through each subdirectory in the dataset
# Assumes structure: dataset/ShapeName/image.jpg
if not os.path.exists(DATASET_DIR):
print(f"Dataset directory not found: {DATASET_DIR}")
return
classes = [d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))]
print(f"Found classes: {classes}")
for label in classes:
class_dir = os.path.join(DATASET_DIR, label)
image_files = glob.glob(os.path.join(class_dir, "*"))
# Normalize label to capitalized format (e.g., "oval" -> "Oval") to match recommendation engine keys
normalized_label = label.capitalize()
print(f"Processing {label} (normalized to {normalized_label}): {len(image_files)} images")
for img_path in image_files:
try:
landmarks = get_landmarks(img_path)
feats = extract_features(landmarks)
vector = get_feature_vector(feats)
X.append(vector)
y.append(normalized_label)
except Exception as e:
# print(f"Skipping {img_path}: {e}")
pass
if len(X) == 0:
print("No valid data found. Check dataset and landmarks extraction.")
return
X = np.array(X)
y = np.array(y)
print(f"Training on {len(X)} samples...")
# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Pipeline: Scale features -> SVM Classifier
model = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True))
model.fit(X_train, y_train)
# Evaluate
print("Evaluating model...")
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))
# Save
joblib.dump(model, MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")
if __name__ == "__main__":
train()