Spaces:
Running
Running
| # File: src/EmotionRecognition/components/data_preparation.py | |
| import os | |
| import pandas as pd | |
| import numpy as np | |
| from PIL import Image | |
| from tqdm import tqdm | |
| import shutil | |
| from EmotionRecognition import logger | |
| from EmotionRecognition.entity.config_entity import DataPreparationConfig | |
| from pathlib import Path | |
| import glob | |
| class DataPreparation: | |
| def __init__(self, config: DataPreparationConfig, params: dict): | |
| self.config = config | |
| self.params = params.DATA_PARAMS | |
| def _process_and_save(self, best_emotion_name, usage, index, pixels, dataset_prefix): | |
| """Helper function to handle the merging logic and save images.""" | |
| # --- MERGING LOGIC --- | |
| # If the emotion is 'contempt', we re-label it as 'disgust'. | |
| if best_emotion_name == 'contempt': | |
| final_emotion_name = 'disgust' | |
| else: | |
| final_emotion_name = best_emotion_name | |
| # --- END MERGING LOGIC --- | |
| # Check if this emotion is one of our final target classes | |
| if final_emotion_name in self.params.CLASSES: | |
| if usage == 'Training': | |
| output_dir = self.config.combined_train_dir | |
| elif usage == 'PublicTest': | |
| output_dir = self.config.ferplus_test_dir | |
| else: | |
| return # Skip other usages like PrivateTest | |
| image = Image.fromarray(pixels) | |
| emotion_folder = Path(output_dir) / final_emotion_name | |
| emotion_folder.mkdir(parents=True, exist_ok=True) | |
| image.save(emotion_folder / f"{dataset_prefix}_{index}.png") | |
| def _prepare_ferplus(self): | |
| logger.info("Starting preparation of FER+ dataset...") | |
| pixels_df = pd.read_csv(self.config.ferplus_pixels_csv) | |
| labels_df = pd.read_csv(self.config.ferplus_labels_csv) | |
| ferplus_emotion_columns = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt'] | |
| for index, row in tqdm(pixels_df.iterrows(), total=len(pixels_df), desc="Processing FER+ Images"): | |
| label_votes = labels_df.iloc[index][ferplus_emotion_columns].values | |
| source_emotion_name = ferplus_emotion_columns[np.argmax(label_votes)] | |
| # --- STANDARDIZE THE NAME --- | |
| # Default to the source name | |
| our_emotion_name = source_emotion_name | |
| if source_emotion_name == 'happiness': our_emotion_name = 'happy' | |
| if source_emotion_name == 'sadness': our_emotion_name = 'sad' | |
| if source_emotion_name == 'anger': our_emotion_name = 'angry' | |
| if source_emotion_name == 'contempt': our_emotion_name = 'disgust' # MERGE | |
| if our_emotion_name in self.params.CLASSES: | |
| usage = row['Usage'] | |
| if usage == 'Training': output_dir = self.config.combined_train_dir | |
| elif usage == 'PublicTest': output_dir = self.config.ferplus_test_dir | |
| else: continue | |
| pixels = np.array(row['pixels'].split(), 'uint8').reshape((48, 48)) | |
| image = Image.fromarray(pixels) | |
| emotion_folder = Path(output_dir) / our_emotion_name | |
| emotion_folder.mkdir(parents=True, exist_ok=True) | |
| image.save(emotion_folder / f"ferplus_{index}.png") | |
| logger.info("FER+ dataset preparation complete.") | |
| def _prepare_ckplus(self): | |
| logger.info("Starting preparation of CK+ dataset...") | |
| for ckplus_folder_name in tqdm(os.listdir(self.config.ckplus_dir), desc="Processing CK+ Folders"): | |
| source_emotion_dir = Path(self.config.ckplus_dir) / ckplus_folder_name | |
| # --- STANDARDIZE THE NAME --- | |
| our_emotion_name = ckplus_folder_name # Default | |
| if ckplus_folder_name == 'contempt': our_emotion_name = 'disgust' # MERGE | |
| if our_emotion_name in self.params.CLASSES and source_emotion_dir.is_dir(): | |
| dest_emotion_dir = Path(self.config.combined_train_dir) / our_emotion_name | |
| dest_emotion_dir.mkdir(parents=True, exist_ok=True) | |
| for img_file in os.listdir(source_emotion_dir): | |
| shutil.copy(source_emotion_dir / img_file, dest_emotion_dir / f"ckplus_{img_file}") | |
| logger.info("CK+ dataset preparation complete.") | |
| def _log_dataset_statistics(self): | |
| logger.info("--- Final Dataset Statistics ---") | |
| logger.info("Training Set:") | |
| for emotion in sorted(self.params.CLASSES): | |
| count = len(glob.glob(str(self.config.combined_train_dir / emotion / '*.png'))) | |
| logger.info(f"- {emotion}: {count} images") | |
| logger.info("\nTest Set:") | |
| for emotion in sorted(self.params.CLASSES): | |
| count = len(glob.glob(str(self.config.ferplus_test_dir / emotion / '*.png'))) | |
| logger.info(f"- {emotion}: {count} images") | |
| logger.info("---------------------------------") | |
| def combine_and_prepare_data(self): | |
| logger.info("--- Starting Data Preparation Stage ---") | |
| if os.path.exists(self.config.combined_train_dir): shutil.rmtree(self.config.combined_train_dir) | |
| if os.path.exists(self.config.ferplus_test_dir): shutil.rmtree(self.config.ferplus_test_dir) | |
| os.makedirs(self.config.combined_train_dir, exist_ok=True) | |
| os.makedirs(self.config.ferplus_test_dir, exist_ok=True) | |
| self._prepare_ferplus() | |
| self._prepare_ckplus() | |
| self._log_dataset_statistics() | |
| logger.info("--- Data Preparation Stage Complete ---") |