Spaces:

codernotme
/

kataria_opticals_api

Sleeping

File size: 4,145 Bytes

a5a6a2e

import os
import cv2
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Configuration
DATASET_DIR = "/home/codernotme/Projects/Github/katariaoptics/dataset"
MODEL_PATH = "/home/codernotme/Projects/Github/katariaoptics/ai_service/face_shape_model.pkl"
IMG_SIZE = (64, 64)  # Resize detected faces to this size

# Load Haar Cascade for face detection
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)

def load_dataset():
    images = []
    labels = []
    
    # Expected classes
    classes = ["heart", "oblong", "oval", "round", "square"]
    
    print(f"Loading dataset from {DATASET_DIR}...")
    
    for label in classes:
        folder_path = os.path.join(DATASET_DIR, label)
        if not os.path.isdir(folder_path):
            print(f"Warning: Folder {folder_path} not found.")
            continue
            
        print(f"Processing class: {label}")
        count = 0
        total_files = len(os.listdir(folder_path))
        
        for i, filename in enumerate(os.listdir(folder_path)):
            if i % 50 == 0:
                print(f"  Processed {i}/{total_files} images...")
                
            img_path = os.path.join(folder_path, filename)
            try:
                # Read image
                img = cv2.imread(img_path)
                if img is None:
                    continue
                
                # Resize if huge to speed up detection
                h, w = img.shape[:2]
                if w > 1000:
                    scale = 1000 / w
                    img = cv2.resize(img, (1000, int(h * scale)))
                
                # Convert to grayscale
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                
                # Detect faces
                faces = face_cascade.detectMultiScale(gray, 1.1, 4)
                
                # If face detected, crop and use it
                if len(faces) > 0:
                    # Use the largest face
                    (x, y, w, h) = max(faces, key=lambda f: f[2] * f[3])
                    face_roi = gray[y:y+h, x:x+w]
                    
                    # Resize and flatten
                    resized = cv2.resize(face_roi, IMG_SIZE)
                    flat_features = resized.flatten()
                    
                    images.append(flat_features)
                    labels.append(label)
                    count += 1
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                
        print(f"  Loaded {count} images for {label}")
        
    return np.array(images), np.array(labels)

def train_model():
    X, y = load_dataset()
    
    if len(X) == 0:
        print("Error: No images loaded. Dataset might be empty or paths incorrect.")
        return

    print(f"Total dataset size: {len(X)} samples")

    # Encode labels
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    
    # Save label encoder classes for inference
    with open(MODEL_PATH.replace(".pkl", "_classes.pkl"), "wb") as f:
        pickle.dump(le.classes_, f)
    print(f"Saved class labels: {le.classes_}")

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    # Train Random Forest
    print("Training Random Forest Classifier...")
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    
    # Evaluate
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=le.classes_))
    
    # Save model
    with open(MODEL_PATH, "wb") as f:
        pickle.dump(clf, f)
    print(f"Model saved to {MODEL_PATH}")

if __name__ == "__main__":
    train_model()