Spaces:

codernotme
/

kataria_opticals_api

Sleeping

App Files Files Community

kataria_opticals_api / train_model.py

codernotme

commit

a5a6a2e verified about 1 month ago

raw

history blame contribute delete

2.73 kB

	import os
	import cv2
	import glob
	import numpy as np
	import joblib
	from sklearn.model_selection import train_test_split
	from sklearn.svm import SVC
	from sklearn.preprocessing import LabelEncoder, StandardScaler
	from sklearn.pipeline import make_pipeline
	from sklearn.metrics import classification_report

	from landmarks import get_landmarks
	from geometry import extract_features

	DATASET_DIR = "../dataset"
	MODEL_PATH = "face_shape_model.pkl"

	def get_feature_vector(features):
	"""Converts feature dictionary to a list for the model."""
	return [
	features["lw_ratio"],
	features["jaw_ratio"],
	features["forehead_ratio"],
	# Add more if geometry.py calculates them
	]

	def train():
	print("Starting training...")
	X = []
	y = []

	# Iterate through each subdirectory in the dataset
	# Assumes structure: dataset/ShapeName/image.jpg
	if not os.path.exists(DATASET_DIR):
	print(f"Dataset directory not found: {DATASET_DIR}")
	return

	classes = [d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))]
	print(f"Found classes: {classes}")

	for label in classes:
	class_dir = os.path.join(DATASET_DIR, label)
	image_files = glob.glob(os.path.join(class_dir, "*"))

	# Normalize label to capitalized format (e.g., "oval" -> "Oval") to match recommendation engine keys
	normalized_label = label.capitalize()

	print(f"Processing {label} (normalized to {normalized_label}): {len(image_files)} images")

	for img_path in image_files:
	try:
	landmarks = get_landmarks(img_path)
	feats = extract_features(landmarks)
	vector = get_feature_vector(feats)

	X.append(vector)
	y.append(normalized_label)
	except Exception as e:
	# print(f"Skipping {img_path}: {e}")
	pass

	if len(X) == 0:
	print("No valid data found. Check dataset and landmarks extraction.")
	return

	X = np.array(X)
	y = np.array(y)

	print(f"Training on {len(X)} samples...")

	# Train/Test split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Pipeline: Scale features -> SVM Classifier
	model = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True))
	model.fit(X_train, y_train)

	# Evaluate
	print("Evaluating model...")
	predictions = model.predict(X_test)
	print(classification_report(y_test, predictions))

	# Save
	joblib.dump(model, MODEL_PATH)
	print(f"Model saved to {MODEL_PATH}")

	if __name__ == "__main__":
	train()