import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import tempfile from typing import Tuple, Optional from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score def create_confusion_matrix_plot( cm: np.ndarray, accuracy: float, labels: list = ['No', 'Yes'] ) -> str: """ Create a confusion matrix plot and save it to a temporary file. Args: cm: Confusion matrix array accuracy: Accuracy score labels: Labels for the confusion matrix Returns: Path to the saved plot file """ plt.figure(figsize=(6, 5)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels) plt.title(f'Confusion Matrix (Accuracy: {accuracy:.1%})') plt.ylabel('Ground Truth') plt.xlabel('Model Prediction') temp_file = tempfile.mktemp(suffix='.png') plt.savefig(temp_file, dpi=150, bbox_inches='tight') plt.close() return temp_file def create_accuracy_table(df: pd.DataFrame) -> Tuple[pd.DataFrame, str, pd.DataFrame]: """ Create accuracy metrics table and confusion matrix from results dataframe. Args: df: DataFrame with 'Ground Truth' and 'Binary Output' columns Returns: Tuple of (metrics_df, confusion_matrix_plot_path, confusion_matrix_values_df) Raises: ValueError: If insufficient data for binary classification """ df_copy = df.copy() # Get unique values from both Ground Truth and Binary Output # Convert to string first, then apply .str operations ground_truth_values = df_copy['Ground Truth'].dropna().astype(str).str.lower().unique() binary_output_values = df_copy['Binary Output'].dropna().astype(str).str.lower().unique() # Combine and get all unique values all_values = set(list(ground_truth_values) + list(binary_output_values)) all_values = [v for v in all_values if v.strip()] # Remove empty strings if len(all_values) < 2: raise ValueError("Need at least 2 different values for binary classification") # Sort values to ensure consistent mapping (alphabetical order) sorted_values = sorted(all_values) # Create mapping: first value (alphabetically) = 0, second = 1 # This ensures consistent mapping regardless of order in data value_mapping = {sorted_values[0]: 0} if len(sorted_values) >= 2: value_mapping[sorted_values[1]] = 1 # If there are more than 2 values, map the rest to 1 (positive class) for i in range(2, len(sorted_values)): value_mapping[sorted_values[i]] = 1 print(f"Detected binary mapping: {value_mapping}") # Apply mapping - convert to string first, then apply .str operations df_copy['Ground Truth Binary'] = df_copy['Ground Truth'].astype(str).str.lower().map(value_mapping) df_copy['Binary Output Binary'] = df_copy['Binary Output'].astype(str).str.lower().map(value_mapping) # Remove rows where either ground truth or binary output is NaN df_copy = df_copy.dropna(subset=['Ground Truth Binary', 'Binary Output Binary']) if len(df_copy) == 0: raise ValueError("No valid data for accuracy calculation after mapping. Check that Ground Truth and Binary Output contain valid binary values.") # Calculate metrics cm = confusion_matrix(df_copy['Ground Truth Binary'], df_copy['Binary Output Binary']) accuracy = accuracy_score(df_copy['Ground Truth Binary'], df_copy['Binary Output Binary']) precision = precision_score(df_copy['Ground Truth Binary'], df_copy['Binary Output Binary'], zero_division=0) recall = recall_score(df_copy['Ground Truth Binary'], df_copy['Binary Output Binary'], zero_division=0) f1 = f1_score(df_copy['Ground Truth Binary'], df_copy['Binary Output Binary'], zero_division=0) # Create metrics dataframe metrics_data = [ ["Accuracy", f"{accuracy:.3f}"], ["Precision", f"{precision:.3f}"], ["Recall", f"{recall:.3f}"], ["F1 Score", f"{f1:.3f}"], ["Total Samples", f"{len(df_copy)}"] ] metrics_df = pd.DataFrame(metrics_data, columns=["Metric", "Value"]) # Create labels for confusion matrix based on detected values # Find the original case versions of the labels original_labels = [] for mapped_val in sorted([k for k, v in value_mapping.items() if v in [0, 1]]): # Find original case version from the data original_case = None for val in df_copy['Ground Truth'].dropna(): if str(val).lower() == mapped_val: original_case = str(val) break if original_case is None: for val in df_copy['Binary Output'].dropna(): if str(val).lower() == mapped_val: original_case = str(val) break original_labels.append(original_case if original_case else mapped_val.title()) # Ensure we have exactly 2 labels if len(original_labels) < 2: original_labels = ['Class 0', 'Class 1'] cm_plot_path = create_confusion_matrix_plot(cm, accuracy, original_labels) # Confusion matrix values table if cm.shape == (2, 2): tn, fp, fn, tp = cm.ravel() cm_values = pd.DataFrame( [[tn, fp], [fn, tp]], columns=[f"Predicted {original_labels[0]}", f"Predicted {original_labels[1]}"], index=[f"Actual {original_labels[0]}", f"Actual {original_labels[1]}"] ) else: cm_values = pd.DataFrame(cm) return metrics_df, cm_plot_path, cm_values def save_dataframe_to_csv(df: pd.DataFrame) -> Optional[str]: """ Save dataframe to a temporary CSV file. Args: df: DataFrame to save Returns: Path to saved CSV file or None if failed """ if df is None or df.empty: return None temp_file = tempfile.mktemp(suffix='.csv') df.to_csv(temp_file, index=False) return temp_file