Spaces:
Runtime error
Runtime error
File size: 6,676 Bytes
eacd6a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
# research/evaluation.py
import torch
import pandas as pd
import json
from pathlib import Path
import sys
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from tqdm import tqdm
# Add the project's src directory to the Python path
# This allows us to import our custom modules
src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src'))
sys.path.append(src_path)
# Now we can import our custom classes
from cnnClassifier.components.multi_task_model_trainer import MultiTaskEfficientNet, FairFaceDataset
from cnnClassifier.utils.common import read_yaml
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from transformers import AutoImageProcessor
# ==============================================================================
# CONFIGURATION
# ==============================================================================
# Define paths directly. We are not using the config manager.
MODEL_PATH = Path("artifacts/multi_task_model_trainer/facial_demographics_model")
DATA_PATH = Path("artifacts/data_preparation/fairface_cleaned.csv")
PARAMS_PATH = Path("params.yaml")
EVALUATION_OUTPUT_DIR = Path("artifacts/manual_evaluation")
EVALUATION_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Load parameters
params = read_yaml(PARAMS_PATH)
IMAGE_SIZE = params.IMAGE_SIZE
BATCH_SIZE = params.BATCH_SIZE
TEST_SPLIT_SIZE = params.TEST_SPLIT_SIZE
RANDOM_STATE = params.RANDOM_STATE
# ==============================================================================
# MAIN EVALUATION LOGIC
# ==============================================================================
def evaluate_model():
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"--- Running evaluation on device: {device} ---")
# 1. Load data and prepare the test split
print("Loading and preparing test data...")
df = pd.read_csv(DATA_PATH)
label_maps = {}
for task in ['age', 'gender', 'race']:
labels = sorted(df[task].unique())
label_maps[f'{task}_label2id'] = {label: i for i, label in enumerate(labels)}
label_maps[f'{task}_id2label'] = {i: label for i, label in enumerate(labels)}
df[f'{task}_id'] = df[task].map(label_maps[f'{task}_label2id'])
# Use the same random_state to ensure we get the identical test split as in training
_, test_df = train_test_split(
df,
test_size=TEST_SPLIT_SIZE,
random_state=RANDOM_STATE,
stratify=df['age']
)
# 2. Create the PyTorch DataLoader
model_config = read_yaml(Path("config/config.yaml"))
base_model_name = model_config.multi_task_model_trainer.model_name
processor = AutoImageProcessor.from_pretrained(base_model_name)
_transforms = Compose([
Resize((IMAGE_SIZE, IMAGE_SIZE)),
ToTensor(),
Normalize(mean=processor.image_mean, std=processor.image_std)
])
test_dataset = FairFaceDataset(dataframe=test_df, processor=processor, transforms=_transforms)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
# 3. Load the trained model
print(f"Loading model from: {MODEL_PATH}")
model = MultiTaskEfficientNet(
model_name=str(MODEL_PATH), # Pass the path as the model name
num_labels_age=len(label_maps['age_id2label']),
num_labels_gender=len(label_maps['gender_id2label']),
num_labels_race=len(label_maps['race_id2label']),
).to(device)
# Load the trained weights
model.load_state_dict(torch.load(MODEL_PATH / 'pytorch_model.bin', map_location=device))
model.eval()
# 4. Run predictions on the test set
print("Running predictions on the test set...")
all_preds = {'age': [], 'gender': [], 'race': []}
all_labels = {'age': [], 'gender': [], 'race': []}
for batch in tqdm(test_dataloader, desc="Evaluating"):
pixel_values = batch['pixel_values'].to(device)
labels = batch['labels']
with torch.no_grad():
outputs = model(pixel_values=pixel_values)
all_preds['age'].extend(outputs['age_logits'].argmax(1).cpu().numpy())
all_preds['gender'].extend(outputs['gender_logits'].argmax(1).cpu().numpy())
all_preds['race'].extend(outputs['race_logits'].argmax(1).cpu().numpy())
all_labels['age'].extend(labels['age'].cpu().numpy())
all_labels['gender'].extend(labels['gender'].cpu().numpy())
all_labels['race'].extend(labels['race'].cpu().numpy())
# 5. Calculate metrics, generate reports, and save artifacts
print("--- Evaluation Results ---")
metrics = {}
for task in ['age', 'gender', 'race']:
accuracy = accuracy_score(all_labels[task], all_preds[task])
print(f"\n--- {task.capitalize()} ---")
print(f"Accuracy: {accuracy:.4f}")
report_str = classification_report(
all_labels[task],
all_preds[task],
target_names=list(label_maps[f'{task}_id2label'].values())
)
print("Classification Report:")
print(report_str)
metrics[f'{task}_accuracy'] = accuracy
# Confusion Matrix
cm = confusion_matrix(all_labels[task], all_preds[task])
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=list(label_maps[f'{task}_id2label'].values()), yticklabels=list(label_maps[f'{task}_id2label'].values()), cmap='Blues')
plt.title(f'Confusion Matrix for {task.capitalize()}', fontsize=16)
plt.ylabel('Actual', fontsize=12)
plt.xlabel('Predicted', fontsize=12)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
cm_path = EVALUATION_OUTPUT_DIR / f'{task}_confusion_matrix.png'
plt.savefig(cm_path, bbox_inches='tight')
plt.close() # Close the plot to avoid displaying it in the console
print(f"Saved {task} confusion matrix to {cm_path}")
# Save metrics to a JSON file
metrics_path = EVALUATION_OUTPUT_DIR / "metrics.json"
with open(metrics_path, 'w') as f:
json.dump(metrics, f, indent=4)
print(f"\nSaved final metrics to {metrics_path}")
# Save the label maps used for this evaluation run
label_maps_path = EVALUATION_OUTPUT_DIR / "label_maps.json"
with open(label_maps_path, 'w') as f:
json.dump(label_maps, f, indent=4)
print(f"Saved label maps to {label_maps_path}")
if __name__ == '__main__':
evaluate_model() |