Spaces:
Runtime error
Runtime error
| # research/evaluation.py | |
| import torch | |
| import pandas as pd | |
| import json | |
| from pathlib import Path | |
| import sys | |
| import os | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score, confusion_matrix, classification_report | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from torch.utils.data import DataLoader | |
| from tqdm import tqdm | |
| # Add the project's src directory to the Python path | |
| # This allows us to import our custom modules | |
| src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')) | |
| sys.path.append(src_path) | |
| # Now we can import our custom classes | |
| from cnnClassifier.components.multi_task_model_trainer import MultiTaskEfficientNet, FairFaceDataset | |
| from cnnClassifier.utils.common import read_yaml | |
| from torchvision.transforms import Compose, Resize, ToTensor, Normalize | |
| from transformers import AutoImageProcessor | |
| # ============================================================================== | |
| # CONFIGURATION | |
| # ============================================================================== | |
| # Define paths directly. We are not using the config manager. | |
| MODEL_PATH = Path("artifacts/multi_task_model_trainer/facial_demographics_model") | |
| DATA_PATH = Path("artifacts/data_preparation/fairface_cleaned.csv") | |
| PARAMS_PATH = Path("params.yaml") | |
| EVALUATION_OUTPUT_DIR = Path("artifacts/manual_evaluation") | |
| EVALUATION_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| # Load parameters | |
| params = read_yaml(PARAMS_PATH) | |
| IMAGE_SIZE = params.IMAGE_SIZE | |
| BATCH_SIZE = params.BATCH_SIZE | |
| TEST_SPLIT_SIZE = params.TEST_SPLIT_SIZE | |
| RANDOM_STATE = params.RANDOM_STATE | |
| # ============================================================================== | |
| # MAIN EVALUATION LOGIC | |
| # ============================================================================== | |
| def evaluate_model(): | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"--- Running evaluation on device: {device} ---") | |
| # 1. Load data and prepare the test split | |
| print("Loading and preparing test data...") | |
| df = pd.read_csv(DATA_PATH) | |
| label_maps = {} | |
| for task in ['age', 'gender', 'race']: | |
| labels = sorted(df[task].unique()) | |
| label_maps[f'{task}_label2id'] = {label: i for i, label in enumerate(labels)} | |
| label_maps[f'{task}_id2label'] = {i: label for i, label in enumerate(labels)} | |
| df[f'{task}_id'] = df[task].map(label_maps[f'{task}_label2id']) | |
| # Use the same random_state to ensure we get the identical test split as in training | |
| _, test_df = train_test_split( | |
| df, | |
| test_size=TEST_SPLIT_SIZE, | |
| random_state=RANDOM_STATE, | |
| stratify=df['age'] | |
| ) | |
| # 2. Create the PyTorch DataLoader | |
| model_config = read_yaml(Path("config/config.yaml")) | |
| base_model_name = model_config.multi_task_model_trainer.model_name | |
| processor = AutoImageProcessor.from_pretrained(base_model_name) | |
| _transforms = Compose([ | |
| Resize((IMAGE_SIZE, IMAGE_SIZE)), | |
| ToTensor(), | |
| Normalize(mean=processor.image_mean, std=processor.image_std) | |
| ]) | |
| test_dataset = FairFaceDataset(dataframe=test_df, processor=processor, transforms=_transforms) | |
| test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) | |
| # 3. Load the trained model | |
| print(f"Loading model from: {MODEL_PATH}") | |
| model = MultiTaskEfficientNet( | |
| model_name=str(MODEL_PATH), # Pass the path as the model name | |
| num_labels_age=len(label_maps['age_id2label']), | |
| num_labels_gender=len(label_maps['gender_id2label']), | |
| num_labels_race=len(label_maps['race_id2label']), | |
| ).to(device) | |
| # Load the trained weights | |
| model.load_state_dict(torch.load(MODEL_PATH / 'pytorch_model.bin', map_location=device)) | |
| model.eval() | |
| # 4. Run predictions on the test set | |
| print("Running predictions on the test set...") | |
| all_preds = {'age': [], 'gender': [], 'race': []} | |
| all_labels = {'age': [], 'gender': [], 'race': []} | |
| for batch in tqdm(test_dataloader, desc="Evaluating"): | |
| pixel_values = batch['pixel_values'].to(device) | |
| labels = batch['labels'] | |
| with torch.no_grad(): | |
| outputs = model(pixel_values=pixel_values) | |
| all_preds['age'].extend(outputs['age_logits'].argmax(1).cpu().numpy()) | |
| all_preds['gender'].extend(outputs['gender_logits'].argmax(1).cpu().numpy()) | |
| all_preds['race'].extend(outputs['race_logits'].argmax(1).cpu().numpy()) | |
| all_labels['age'].extend(labels['age'].cpu().numpy()) | |
| all_labels['gender'].extend(labels['gender'].cpu().numpy()) | |
| all_labels['race'].extend(labels['race'].cpu().numpy()) | |
| # 5. Calculate metrics, generate reports, and save artifacts | |
| print("--- Evaluation Results ---") | |
| metrics = {} | |
| for task in ['age', 'gender', 'race']: | |
| accuracy = accuracy_score(all_labels[task], all_preds[task]) | |
| print(f"\n--- {task.capitalize()} ---") | |
| print(f"Accuracy: {accuracy:.4f}") | |
| report_str = classification_report( | |
| all_labels[task], | |
| all_preds[task], | |
| target_names=list(label_maps[f'{task}_id2label'].values()) | |
| ) | |
| print("Classification Report:") | |
| print(report_str) | |
| metrics[f'{task}_accuracy'] = accuracy | |
| # Confusion Matrix | |
| cm = confusion_matrix(all_labels[task], all_preds[task]) | |
| plt.figure(figsize=(12, 10)) | |
| sns.heatmap(cm, annot=True, fmt='d', xticklabels=list(label_maps[f'{task}_id2label'].values()), yticklabels=list(label_maps[f'{task}_id2label'].values()), cmap='Blues') | |
| plt.title(f'Confusion Matrix for {task.capitalize()}', fontsize=16) | |
| plt.ylabel('Actual', fontsize=12) | |
| plt.xlabel('Predicted', fontsize=12) | |
| plt.xticks(rotation=45) | |
| plt.yticks(rotation=0) | |
| cm_path = EVALUATION_OUTPUT_DIR / f'{task}_confusion_matrix.png' | |
| plt.savefig(cm_path, bbox_inches='tight') | |
| plt.close() # Close the plot to avoid displaying it in the console | |
| print(f"Saved {task} confusion matrix to {cm_path}") | |
| # Save metrics to a JSON file | |
| metrics_path = EVALUATION_OUTPUT_DIR / "metrics.json" | |
| with open(metrics_path, 'w') as f: | |
| json.dump(metrics, f, indent=4) | |
| print(f"\nSaved final metrics to {metrics_path}") | |
| # Save the label maps used for this evaluation run | |
| label_maps_path = EVALUATION_OUTPUT_DIR / "label_maps.json" | |
| with open(label_maps_path, 'w') as f: | |
| json.dump(label_maps, f, indent=4) | |
| print(f"Saved label maps to {label_maps_path}") | |
| if __name__ == '__main__': | |
| evaluate_model() |