|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import matplotlib.pyplot as plt |
|
|
from sklearn.metrics import roc_curve, auc |
|
|
import logging |
|
|
import os |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def plot_roc_curve(results_path, output_image_path): |
|
|
""" |
|
|
Reads predictions CSV, calculates AUC, and plots ROC curve. |
|
|
""" |
|
|
if not os.path.exists(results_path): |
|
|
logger.error(f"Results file not found: {results_path}") |
|
|
return |
|
|
|
|
|
try: |
|
|
df = pd.read_csv(results_path) |
|
|
logger.info(f"Loaded {len(df)} predictions from {results_path}") |
|
|
|
|
|
|
|
|
df = df.dropna(subset=['pneumothorax_score']) |
|
|
if len(df) == 0: |
|
|
logger.error("No valid predictions found.") |
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
y_true = (df['true_label'] == 'Pneumothorax').astype(int) |
|
|
y_scores = df['pneumothorax_score'] |
|
|
|
|
|
|
|
|
fpr, tpr, thresholds = roc_curve(y_true, y_scores) |
|
|
roc_auc = auc(fpr, tpr) |
|
|
logger.info(f"Calculated AUC: {roc_auc:.4f}") |
|
|
|
|
|
|
|
|
plt.figure(figsize=(8, 6)) |
|
|
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})') |
|
|
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') |
|
|
plt.xlim([0.0, 1.0]) |
|
|
plt.ylim([0.0, 1.05]) |
|
|
plt.xlabel('False Positive Rate') |
|
|
plt.ylabel('True Positive Rate') |
|
|
plt.title('ROC Curve - Zero-Shot Pneumothorax Classification (Kaggle)') |
|
|
plt.legend(loc="lower right") |
|
|
plt.grid(True, alpha=0.3) |
|
|
|
|
|
plt.savefig(output_image_path) |
|
|
logger.info(f"ROC curve saved to {output_image_path}") |
|
|
plt.close() |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Failed to plot ROC curve: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
results_file = "results/kaggle_predictions.csv" |
|
|
output_image = "results/kaggle_roc_curve.png" |
|
|
plot_roc_curve(results_file, output_image) |
|
|
|