codernotme's picture
commit
a5a6a2e verified
import os
import cv2
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
# Configuration
DATASET_DIR = "/home/codernotme/Projects/Github/katariaoptics/dataset"
MODEL_PATH = "/home/codernotme/Projects/Github/katariaoptics/ai_service/face_shape_model.pkl"
IMG_SIZE = (64, 64) # Resize detected faces to this size
# Load Haar Cascade for face detection
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)
def load_dataset():
images = []
labels = []
# Expected classes
classes = ["heart", "oblong", "oval", "round", "square"]
print(f"Loading dataset from {DATASET_DIR}...")
for label in classes:
folder_path = os.path.join(DATASET_DIR, label)
if not os.path.isdir(folder_path):
print(f"Warning: Folder {folder_path} not found.")
continue
print(f"Processing class: {label}")
count = 0
total_files = len(os.listdir(folder_path))
for i, filename in enumerate(os.listdir(folder_path)):
if i % 50 == 0:
print(f" Processed {i}/{total_files} images...")
img_path = os.path.join(folder_path, filename)
try:
# Read image
img = cv2.imread(img_path)
if img is None:
continue
# Resize if huge to speed up detection
h, w = img.shape[:2]
if w > 1000:
scale = 1000 / w
img = cv2.resize(img, (1000, int(h * scale)))
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Detect faces
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
# If face detected, crop and use it
if len(faces) > 0:
# Use the largest face
(x, y, w, h) = max(faces, key=lambda f: f[2] * f[3])
face_roi = gray[y:y+h, x:x+w]
# Resize and flatten
resized = cv2.resize(face_roi, IMG_SIZE)
flat_features = resized.flatten()
images.append(flat_features)
labels.append(label)
count += 1
except Exception as e:
print(f"Error processing {img_path}: {e}")
print(f" Loaded {count} images for {label}")
return np.array(images), np.array(labels)
def train_model():
X, y = load_dataset()
if len(X) == 0:
print("Error: No images loaded. Dataset might be empty or paths incorrect.")
return
print(f"Total dataset size: {len(X)} samples")
# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
# Save label encoder classes for inference
with open(MODEL_PATH.replace(".pkl", "_classes.pkl"), "wb") as f:
pickle.dump(le.classes_, f)
print(f"Saved class labels: {le.classes_}")
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
# Train Random Forest
print("Training Random Forest Classifier...")
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
# Evaluate
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))
# Save model
with open(MODEL_PATH, "wb") as f:
pickle.dump(clf, f)
print(f"Model saved to {MODEL_PATH}")
if __name__ == "__main__":
train_model()