Spaces:
Sleeping
Sleeping
File size: 3,607 Bytes
4821854 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# src/vitClassifier/components/model_evaluation.py
import mlflow
import mlflow.pytorch
import torch
import json
from pathlib import Path
from datasets import load_from_disk
from transformers import (ViTForImageClassification, ViTImageProcessor, Trainer, TrainingArguments, DefaultDataCollator)
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from vitClassifier.entity.config_entity import EvaluationConfig
from vitClassifier.utils.common import read_yaml # Keep this if you need it, but it's not used here
from vitClassifier import logger
class ModelEvaluation:
def __init__(self, config: EvaluationConfig):
self.config = config
def evaluate(self):
# Determine device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load the best model from the training stage and move it to the correct device
model_path = str(self.config.path_of_model)
model = ViTForImageClassification.from_pretrained(model_path).to(device)
# Load the pre-processed test dataset
test_data = load_from_disk(str(self.config.test_dataset_path))
# We DO NOT need transforms here because the data is already processed
# test_data.set_transform(...) # REMOVED
# Use the default collator which handles 'pixel_values' and 'label'
data_collator = DefaultDataCollator()
# Dummy trainer for running predictions
eval_args = TrainingArguments(
output_dir="./eval_output", # Temporary directory
per_device_eval_batch_size=self.config.batch_size,
report_to="none"
)
trainer = Trainer(
model=model,
args=eval_args,
data_collator=data_collator
)
# --- Run Predictions ---
logger.info("Running final evaluation on the test set...")
outputs = trainer.predict(test_data)
y_true = outputs.label_ids
y_pred = outputs.predictions.argmax(1)
# --- Calculate Metrics ---
scores = {
"accuracy": accuracy_score(y_true, y_pred),
"f1_score": f1_score(y_true, y_pred, average='macro'),
"precision": precision_score(y_true, y_pred, average='macro'),
"recall": recall_score(y_true, y_pred, average='macro')
}
logger.info(f"Test Set Metrics: {scores}")
# --- Save Metrics to a JSON file ---
metrics_path = Path(self.config.metrics_file_name)
# Now create the directory
metrics_path.parent.mkdir(parents=True, exist_ok=True)
with open(metrics_path, 'w') as f:
json.dump(scores, f, indent=4)
logger.info(f"Metrics saved to {metrics_path}")
# --- Log to MLflow ---
mlflow.set_tracking_uri(self.config.mlflow_uri)
mlflow.set_experiment("Pneumonia-ViT-Classification")
with mlflow.start_run():
logger.info("Logging parameters and metrics to MLflow...")
mlflow.log_params(self.config.all_params)
mlflow.log_metrics(scores)
# --- THIS IS THE FINAL FIX ---
# Instead of logging the model object, log the directory where the
# trained model was already saved by the Trainer.
# `mlflow.log_artifact` is a simple upload and will not cause registry errors.
model_dir_path = str(self.config.path_of_model)
mlflow.log_artifact(model_dir_path, artifact_path="model")
logger.info("Successfully logged artifacts to MLflow.") |