# Data Analysis for Coffee Leaf Rust Severity # Author: [User's Name/Project Group] # Description: Comparative analysis of segmentation methods (ImageJ, pliman, DeepLabV3+, SAM_CLR, SAM3). # ============================================================================== # 1. Setup and Libraries # ============================================================================== library(readxl) library(dplyr) library(tidyverse) library(purrr) library(epiR) library(cowplot) # Set global plotting theme theme_set(cowplot::theme_half_open(font_size = 12)) # ============================================================================== # 2. Data Loading # ============================================================================== # Function to load and clean CSVs ensuring numeric types where needed load_csv <- function(path) { read.csv(path, stringsAsFactors = FALSE) } # Load Gold Standard (Severity Index) # Removed R1OLD, R2OLD, R1NEW, R2NEW. Renamed SAM3_2 as SAM3. severity_final <- load_csv("severity_final.csv") %>% select(-R1OLD, -R2OLD, -R1NEW, -R2NEW, -SAM3) %>% rename(SAM3 = SAM3_2) %>% rename(SAM_CLR = SAM100) %>% mutate(across(-image, as.numeric)) # Load Method Results deeplab <- load_csv("deeplab.csv") imagej <- load_csv("ImageJ.csv") pliman <- load_csv("pliman.csv") SAM_CLR <- load_csv("SAM2.csv") SAM3 <- load_csv("SAM3.csv") # ============================================================================== # 3. Helper Functions # ============================================================================== #' Extract CCC results from epiR object extract_epi_ccc <- function(res) { data.frame( CCC = res$rho.c$est, CCC.lwr95 = res$rho.c$lower, CCC.upr95 = res$rho.c$upper, r = res$rho, Cb = res$C.b ) } #' Compute R-squared and RSE compute_r2_rse <- function(gs, method) { fit <- lm(method ~ gs) data.frame( R_squared = summary(fit)$r.squared, RSE = summary(fit)$sigma ) } #' Summarise segmentation metrics (IoU, Dice, etc.) summarise_metrics <- function(df, method_name, fun = median) { df %>% mutate(across(c(iou, dice, precision, recall, f1), ~ as.numeric(gsub(",", "", .x)))) %>% summarise( Method = method_name, IoU = fun(iou, na.rm = TRUE), Dice = fun(dice, na.rm = TRUE), Precision = fun(precision, na.rm = TRUE), Recall = fun(recall, na.rm = TRUE), F1 = fun(f1, na.rm = TRUE) ) } #' Plot segmentation metrics vs gold standard severity plot_method_vs_severity <- function(df_metrics, method_name, gs_data) { # Prepare data for plotting df_long <- df_metrics %>% select(image_id, iou, dice, precision, recall, f1) %>% left_join(select(gs_data, image_id, GS), by = "image_id") %>% mutate( GS = as.numeric(GS), across(c(iou, dice, precision, recall, f1), ~ as.numeric(gsub(",", "", .x))) ) %>% pivot_longer( cols = c(iou, dice, precision, recall, f1), names_to = "metric", values_to = "value" ) %>% mutate( metric = case_when( metric == "iou" ~ "IoU", TRUE ~ toupper(metric) ) ) %>% filter(!is.na(GS), !is.na(value)) # Create plot ggplot(df_long, aes(x = GS, y = value)) + geom_point(alpha = 0.6, size = 1.5, aes(color = metric)) + geom_smooth(method = "loess", span = 0.75, se = FALSE, linewidth = 1, color = "black") + facet_wrap(~metric, ncol = 3) + scale_y_continuous(limits = c(0, 1)) + scale_color_viridis_d() + labs( title = paste("Segmentation performance vs severity —", ifelse(method_name == "pliman", "pliman", toupper(method_name))), x = "Gold standard severity", y = "Metric value" ) + theme( strip.text = element_text(face = "bold"), panel.grid.minor = element_blank(), legend.position = "none" ) } # ============================================================================== # 4. Analysis: Agreement (CCC) # ============================================================================== gold_standard <- "GS" # Ordered as requested: IMAGEJ, PLIMAN, DEEPLABV3, SAM_CLR AND SAM3 methods_cols <- c("ImageJ", "Pliman", "DeeplabV3", "SAM_CLR", "SAM3") # Calculate CCC and Regression stats for each method final_results <- map_dfr(methods_cols, function(m) { res_ccc <- epi.ccc( x = severity_final[[gold_standard]], y = severity_final[[m]], ci = "z-transform", conf.level = 0.95 ) cbind( Method = m, extract_epi_ccc(res_ccc), compute_r2_rse(severity_final[[gold_standard]], severity_final[[m]]) ) }) # Plot Agreement plot_ccc <- final_results %>% arrange(CCC) %>% ggplot(aes(x = CCC, y = reorder(Method, CCC))) + geom_point(size = 3) + geom_errorbar(aes(xmin = CCC.lwr95, xmax = CCC.upr95), height = 0.2, orientation = "y" ) + geom_vline(xintercept = 0.90, linetype = "dashed") + scale_x_continuous(limits = c(0.3, 1)) + scale_color_viridis_d() + labs( title = "Agreement with Gold Standard (CCC)", x = "Lin’s CCC", y = "Method" ) print(plot_ccc) ggsave("ccc_agreement.png", plot_ccc, width = 8, height = 6, dpi = 300) # ============================================================================== # 5. CCC Scatterplots (All dots + CCC stats like publication figures) # ============================================================================== plot_ccc_method <- function(df, method_col, gs_col = "GS", method_name = NULL) { x <- df[[gs_col]] y <- df[[method_col]] keep <- is.finite(x) & is.finite(y) x <- x[keep] y <- y[keep] # ---- CCC ---- res <- epi.ccc(x, y, ci = "z-transform", conf.level = 0.95) rho_c <- res$rho.c$est lwr <- res$rho.c$lower upr <- res$rho.c$upper r <- res$rho Cb <- res$C.b # ---- Regression ---- fit <- lm(y ~ x) slope <- coef(fit)[2] mu <- coef(fit)[1] rmse <- sqrt(mean((y - x)^2)) bias <- mean(y - x) title <- ifelse(is.null(method_name), method_col, toupper(method_name)) subtitle <- sprintf( "ρc = %.2f [%.2f–%.2f], Cb = %.2f, r = %.2f\nμ = %.2f, β = %.2f, RMSE = %.2f, Bias = %.2f", rho_c, lwr, upr, Cb, r, mu, slope, rmse, bias ) ggplot(data.frame(x, y), aes(x, y)) + geom_point(color = "#F28E2B",size = 2, alpha = 0.8) + geom_abline(slope = 1, intercept = 0, linetype = "dashed") + geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.8) + labs( title = title, subtitle = subtitle, x = "Reference Severity (%)", y = "Predicted Severity (%)" ) + coord_equal(xlim = c(0, 60), ylim = c(0, 60)) + theme_half_open(font_size = 12) } # ---- Create plots for all methods in the requested order ---- plots_ccc <- map(methods_cols, ~ plot_ccc_method( severity_final, method_col = .x, gs_col = gold_standard, method_name = .x )) # ---- Combine into one figure ---- figure_ccc <- plot_grid(plotlist = plots_ccc, ncol = 3) print(figure_ccc) ggsave("CCC_scatter_models.png", figure_ccc, width = 10, height = 7, dpi = 300) # ============================================================================== # 6. Analysis: Metrics Summary # ============================================================================== method_names <- c("deeplab", "imagej", "pliman", "SAM_CLR", "SAM3") # Generate Median and Mean summary tables metrics_median <- map_dfr(method_names, ~ summarise_metrics(get(.x), .x, fun = median)) metrics_mean <- map_dfr(method_names, ~ summarise_metrics(get(.x), .x, fun = mean)) cat("\n--- Median Metrics ---\n") print(metrics_median) cat("\n--- Mean Metrics ---\n") print(metrics_mean) # ============================================================================== # 7. Analysis: Visualization # ============================================================================== # Clean Image IDs (remove extensions) clean_id <- function(df) mutate(df, image_id = sub("\\.[Pp][Nn][Gg]$", "", image)) severity_final <- clean_id(severity_final) methods_list <- list( deeplab = clean_id(deeplab), imagej = clean_id(imagej), pliman = clean_id(pliman), SAM_CLR = clean_id(SAM_CLR), SAM3 = clean_id(SAM3) ) # Plot 1: Performance vs Severity for each method plots_perf <- imap(methods_list, ~ plot_method_vs_severity(.x, .y, severity_final)) # Display all performance plots walk(plots_perf, print) # Plot 2: Density Distribution of Metrics # Combine all data into one long format dataframe metrics_long <- imap_dfr(methods_list, ~ .x %>% mutate(Method = .y)) %>% select(image_id, Method, iou, dice, precision, recall, f1) %>% left_join(select(severity_final, image_id, GS), by = "image_id") %>% mutate(across(c(iou, dice, precision, recall, f1), ~ as.numeric(gsub(",", "", .x)))) %>% pivot_longer( cols = c(iou, dice, precision, recall, f1), names_to = "metric", values_to = "value" ) %>% mutate( Method = ifelse(Method == "pliman", "pliman", toupper(Method)), metric = case_when( metric == "iou" ~ "IoU", TRUE ~ toupper(metric) ) ) %>% filter(!is.na(value)) # Calculate medians for the plot overlay metrics_medians <- metrics_long %>% group_by(Method, metric) %>% summarise(grp_median = median(value, na.rm = TRUE), .groups = "drop") # Create Density Plot plot_density <- ggplot(metrics_long, aes(x = value, fill = metric)) + geom_density(alpha = 0.6, color = NA) + # Add dashed vertical line for the median geom_vline( data = metrics_medians, aes(xintercept = grp_median), linetype = "dashed", linewidth = 0.4 ) + geom_text( data = metrics_medians, aes(x = 0, y = Inf, label = paste0("tilde(x):~", round(grp_median, 2))), parse = TRUE, hjust = -0.1, vjust = 5, size = 3, inherit.aes = FALSE ) + facet_grid(Method ~ metric) + scale_fill_viridis_d() + # Clean x-axis labels (remove trailing decimal zeros) scale_x_continuous( breaks = seq(0, 1, 0.25), labels = function(x) ifelse(x %in% c(0, 1), as.character(x), as.character(x)) ) + labs( x = "Metric value", y = "Density", fill = "Metric", title = "Distribution of segmentation metrics by method" ) + theme( strip.background = element_rect(fill = "grey90", color = NA), strip.text = element_text(face = "bold"), legend.position = "bottom" ) print(plot_density) ggsave("models.png", plot_density, bg = "white", width = 8, height = 6, dpi = 300) # ============================================================ # gt + gtExtras: metrics in columns, with (median + mini-plot) # Requires: metrics_long with columns Method, metric, value # ============================================================ library(tidyr) library(gt) library(gtExtras) # --- 0) (Optional) ensure expected order/names of metrics --- metrics <- c("IoU","DICE","PRECISION","RECALL","F1") methods <- c("pliman", "IMAGEJ", "DEEPLAB", "SAM_CLR", "SAM3") metrics_long2 <- metrics_long %>% filter(!is.na(value)) %>% mutate( metric = factor(metric, levels = metrics), Method = factor(Method, levels = methods) ) # --- 1) List-column with values (to become a mini-plot) --- dist_wide <- metrics_long2 %>% group_by(Method, metric) %>% summarise(dist = list(as.numeric(value)), .groups = "drop") %>% mutate(metric = as.character(metric)) %>% pivot_wider(names_from = metric, values_from = dist) # --- 2) Medians per metric (becomes numeric columns) --- med_wide <- metrics_long2 %>% group_by(Method, metric) %>% summarise(med = median(as.numeric(value), na.rm = TRUE), .groups = "drop") %>% mutate(metric = as.character(metric)) %>% pivot_wider(names_from = metric, values_from = med, names_prefix = "med_") # --- 3) Combine everything (1 row per Method) --- tbl_wide <- dist_wide %>% left_join(med_wide, by = "Method") %>% mutate(Method = factor(Method, levels = methods)) %>% arrange(Method) # --- 4) gt Table: mini-plots + spanners with unique id --- tab <- tbl_wide %>% gt(rowname_col = "Method") %>% # format medians fmt_number(columns = all_of(paste0("med_", metrics)), decimals = 2) %>% # mini-plots (one per metric column with list-column) { purrr::reduce(metrics, .init = ., .f = \(gt_tbl, m) { gt_tbl %>% gt_plt_dist(column = all_of(m), fill = "steelblue", type = "density") }) } %>% # labels cols_label(.list = c( setNames(rep("Median", length(metrics)), paste0("med_", metrics)), setNames(rep("Dist.", length(metrics)), metrics) )) %>% # spanners per metric (with unique id to avoid errors) { purrr::reduce(metrics, .init = ., .f = \(gt_tbl, m) { gt_tbl %>% tab_spanner( label = m, columns = c(paste0("med_", m), m), id = paste0("sp_", m) ) }) } %>% # column widths (fine tuning) cols_width( all_of(paste0("med_", metrics)) ~ px(70), all_of(metrics) ~ px(100) ) %>% cols_align(align = "center") %>% # font style, sizes and borders tab_options( table.font.names = "Arial", column_labels.font.size = px(20), column_labels.font.weight = "bold", table.font.size = px(16), data_row.padding = px(15), # Black borders (top and bottom only) table.border.top.color = "black", table.border.bottom.color = "black", column_labels.border.top.color = "black", column_labels.border.bottom.color = "black", # Remove internal lines table_body.hlines.style = "none", column_labels.border.bottom.width = px(2), # keep a slightly thicker line below labels table_body.border.bottom.color = "black", stub.border.style = "none" ) %>% tab_style( style = cell_text(size = px(22), weight = "bold", align = "center"), locations = cells_column_spanners() ) tab