Spaces:
Sleeping
Sleeping
| import os | |
| import cv2 | |
| import numpy as np | |
| import pickle | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score, classification_report | |
| from sklearn.preprocessing import LabelEncoder | |
| # Configuration | |
| DATASET_DIR = "/home/codernotme/Projects/Github/katariaoptics/dataset" | |
| MODEL_PATH = "/home/codernotme/Projects/Github/katariaoptics/ai_service/face_shape_model.pkl" | |
| IMG_SIZE = (64, 64) # Resize detected faces to this size | |
| # Load Haar Cascade for face detection | |
| cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' | |
| face_cascade = cv2.CascadeClassifier(cascade_path) | |
| def load_dataset(): | |
| images = [] | |
| labels = [] | |
| # Expected classes | |
| classes = ["heart", "oblong", "oval", "round", "square"] | |
| print(f"Loading dataset from {DATASET_DIR}...") | |
| for label in classes: | |
| folder_path = os.path.join(DATASET_DIR, label) | |
| if not os.path.isdir(folder_path): | |
| print(f"Warning: Folder {folder_path} not found.") | |
| continue | |
| print(f"Processing class: {label}") | |
| count = 0 | |
| total_files = len(os.listdir(folder_path)) | |
| for i, filename in enumerate(os.listdir(folder_path)): | |
| if i % 50 == 0: | |
| print(f" Processed {i}/{total_files} images...") | |
| img_path = os.path.join(folder_path, filename) | |
| try: | |
| # Read image | |
| img = cv2.imread(img_path) | |
| if img is None: | |
| continue | |
| # Resize if huge to speed up detection | |
| h, w = img.shape[:2] | |
| if w > 1000: | |
| scale = 1000 / w | |
| img = cv2.resize(img, (1000, int(h * scale))) | |
| # Convert to grayscale | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| # Detect faces | |
| faces = face_cascade.detectMultiScale(gray, 1.1, 4) | |
| # If face detected, crop and use it | |
| if len(faces) > 0: | |
| # Use the largest face | |
| (x, y, w, h) = max(faces, key=lambda f: f[2] * f[3]) | |
| face_roi = gray[y:y+h, x:x+w] | |
| # Resize and flatten | |
| resized = cv2.resize(face_roi, IMG_SIZE) | |
| flat_features = resized.flatten() | |
| images.append(flat_features) | |
| labels.append(label) | |
| count += 1 | |
| except Exception as e: | |
| print(f"Error processing {img_path}: {e}") | |
| print(f" Loaded {count} images for {label}") | |
| return np.array(images), np.array(labels) | |
| def train_model(): | |
| X, y = load_dataset() | |
| if len(X) == 0: | |
| print("Error: No images loaded. Dataset might be empty or paths incorrect.") | |
| return | |
| print(f"Total dataset size: {len(X)} samples") | |
| # Encode labels | |
| le = LabelEncoder() | |
| y_encoded = le.fit_transform(y) | |
| # Save label encoder classes for inference | |
| with open(MODEL_PATH.replace(".pkl", "_classes.pkl"), "wb") as f: | |
| pickle.dump(le.classes_, f) | |
| print(f"Saved class labels: {le.classes_}") | |
| # Split dataset | |
| X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42) | |
| # Train Random Forest | |
| print("Training Random Forest Classifier...") | |
| clf = RandomForestClassifier(n_estimators=100, random_state=42) | |
| clf.fit(X_train, y_train) | |
| # Evaluate | |
| y_pred = clf.predict(X_test) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| print(f"Model Accuracy: {accuracy * 100:.2f}%") | |
| print("\nClassification Report:") | |
| print(classification_report(y_test, y_pred, target_names=le.classes_)) | |
| # Save model | |
| with open(MODEL_PATH, "wb") as f: | |
| pickle.dump(clf, f) | |
| print(f"Model saved to {MODEL_PATH}") | |
| if __name__ == "__main__": | |
| train_model() | |