Spaces:

harpomaxx
/

goat-behavior

Running

App Files Files Community

harpomaxx commited on Jul 16, 2023

Commit

41592fb

1 Parent(s): 89e50b1

add SHAP values analysis

Browse files

Files changed (5) hide show

Dockerfile +3 -0
app.R +106 -1
calculate_shap.R +328 -0
plot_shap.R +299 -0
selected_features.tsv +16 -0

Dockerfile CHANGED Viewed

@@ -7,7 +7,10 @@ RUN install2.r --error \
     ggExtra \
     readr \
     caret \
     ggplot2 \
     shiny
 RUN install2.r --error \

     ggExtra \
     readr \
     caret \
+    fastshap \
     ggplot2 \
+    ggExtra \
+    forcats \
     shiny
 RUN install2.r --error \

app.R CHANGED Viewed

@@ -5,6 +5,9 @@ library(readr)
 library(catboost)
 library(ggplot2)
 # Load the pre-trained model
 model <- readRDS("goat_behavior_model_caret.rds")
@@ -149,7 +152,18 @@ ui <- fluidPage(
                  tableOutput("contents"),
                  verbatimTextOutput("confusionMatText"),
                  plotOutput("confusionMatPlot"),
-                 downloadButton("downloadData", "Download Predictions"))
       )
     )
   )
@@ -229,6 +243,97 @@ server <- function(input, output) {
       theme_minimal() +
       theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
   })
 }
 # Create a Shiny app object

 library(catboost)
 library(ggplot2)
+source("calculate_shap.R")
+source("plot_shap.R")
 # Load the pre-trained model
 model <- readRDS("goat_behavior_model_caret.rds")
                  tableOutput("contents"),
                  verbatimTextOutput("confusionMatText"),
                  plotOutput("confusionMatPlot"),
+                 downloadButton("downloadData", "Download Predictions")),
+         tabPanel("SHAP Summary",
+                 plotOutput("SHAPSummary")),
+        tabPanel("SHAP Summary per class",
+                 plotOutput("SHAPSummaryperclass")),
+        tabPanel("SHAP Dependency",
+                 plotOutput("SHAPDependency"))
       )
     )
   )
       theme_minimal() +
       theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
   })
+  output$SHAPSummary <- renderPlot({
+    if (is.null(input$file1))
+      return(NULL)
+    inFile <- input$file1
+    dataset <- readr::read_delim(inFile$datapath,delim='\t')
+    predictions <- predict(model, dataset)
+    selected_variables <-
+      readr::read_delim(
+        "selected_features.tsv",
+        col_types = cols(),
+        delim = '\t'
+      )
+    new_dataset <-
+      dataset %>% select(selected_variables$variable, Anim, Activity)
+    new_dataset <- cbind(new_dataset, predictions)
+    shap_values <- calculate_shap(new_dataset, model, nsim = 30)
+    pall<-shap_summary_plot(shap_values)
+    pall+xlim(0,0.35)
+  })
+  output$SHAPSummaryperclass <- renderPlot({
+    if (is.null(input$file1))
+      return(NULL)
+    inFile <- input$file1
+    dataset <- readr::read_delim(inFile$datapath,delim='\t')
+    predictions <- predict(model, dataset)
+    selected_variables <-
+      readr::read_delim(
+        "selected_features.tsv",
+        col_types = cols(),
+        delim = '\t'
+      )
+    new_dataset <-
+      dataset %>% select(selected_variables$variable, Anim, Activity)
+    new_dataset <- cbind(new_dataset, predictions)
+    shap_values <- calculate_shap(new_dataset, model, nsim = 30)
+    pW<-shap_summary_plot_perclass(shap_values, class= "W",color="#C77CFF")+xlab("Activity W")+xlim(0,0.25)
+    pGM<-shap_summary_plot_perclass(shap_values, class= "GM",color="#7CAE00")+xlab("Activity GM")+xlim(0,0.25)
+    pG<-shap_summary_plot_perclass(shap_values, class= "G",color="#F8766D")+xlab("Activity G")+xlim(0,0.25)
+    pR<-shap_summary_plot_perclass(shap_values, class= "R",color="#00BFC4")+xlab("Activity R")+xlim(0,0.25)
+    grid.arrange(pW,pR,pG,pGM)
+  })
+  output$SHAPDependency <- renderPlot({
+    if (is.null(input$file1))
+      return(NULL)
+    inFile <- input$file1
+    dataset <- readr::read_delim(inFile$datapath,delim='\t')
+    predictions <- predict(model, dataset)
+    selected_variables <-
+      readr::read_delim(
+        "selected_features.tsv",
+        col_types = cols(),
+        delim = '\t'
+      )
+    new_dataset <-
+      dataset %>% select(selected_variables$variable, Anim, Activity)
+    new_dataset <- cbind(new_dataset, predictions)
+    shap_values <- calculate_shap(new_dataset, model, nsim = 30)
+    li<-list()
+    li[[1]]<-dependency_plot("Steps",dataset = new_dataset,shap=shap_values)
+    #li[[2]]<-dependency_plot("prev_steps1",dataset = new_dataset,shap=shap_values)
+    li[[2]]<-dependency_plot("%HeadDown",dataset = new_dataset,shap=shap_values)
+    #li[[4]]<-dependency_plot("prev_headdown1",dataset = new_dataset,shap=shap_values)
+    li[[3]]<-dependency_plot("Active",dataset = new_dataset,shap=shap_values)
+    #li[[6]]<-dependency_plot("prev_Active1",dataset = new_dataset,shap=shap_values)
+    li[[4]]<-dependency_plot("Standing",dataset = new_dataset,shap=shap_values)
+    #li[[8]]<-dependency_plot("prev_Standing1",dataset = new_dataset,shap=shap_values)
+    #li[[9]]<-dependency_plot("X_Act",dataset = new_dataset, shap=shap_values)
+    #li[[10]]<-dependency_plot("Y_Act",dataset = new_dataset, shap=shap_values)
+    #li[[11]]<-dependency_plot("DBA123",dataset = new_dataset, shap=shap_values)
+    #li[[12]]<-dependency_plot("DFA123",dataset = new_dataset, shap=shap_values)
+    do.call(grid.arrange, c(li, ncol = 1))
+  })
 }
 # Create a Shiny app object

calculate_shap.R ADDED Viewed

	@@ -0,0 +1,328 @@

+suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(fastshap)) # for fast (approximate) Shapley values
+suppressPackageStartupMessages(library(caret))
+suppressPackageStartupMessages(library(doMC))
+registerDoMC(cores = 10)
+p_function_G <-
+  function(object, newdata)
+    caret::predict.train(object, newdata = newdata, type = "prob")[, "G"]
+p_function_GM <-
+  function(object, newdata)
+    caret::predict.train(object, newdata = newdata, type = "prob")[, "GM"]
+p_function_R <-
+  function(object, newdata)
+    caret::predict.train(object, newdata = newdata, type = "prob")[, "R"]
+p_function_W <-
+  function(object, newdata)
+    caret::predict.train(object, newdata = newdata, type = "prob")[, "W"]
+# DEPRECATED
+calculate_shap_deprecated <- function(dataset,model,nsim=10) {
+#  library(doParallel)
+#  registerDoParallel(8)
+  trainset <- dataset %>%  na.omit() %>%
+    as.data.frame()
+  trainset_y <- dataset %>%
+    select(Activity) %>%
+    na.omit() %>%
+    unlist() %>%
+    unname()
+  trainset <- trainset %>% select(-Activity)
+  trainset_G <- trainset[which(trainset_y == "G"), ]
+  trainset_GM <- trainset[which(trainset_y == "GM"), ]
+  trainset_R <- trainset[which(trainset_y == "R"), ]
+  trainset_W <- trainset[which(trainset_y == "W"), ]
+  # Compute fast (approximate) Shapley values using 50 Monte Carlo repetitions
+  message(" - Calculating SHAP values for class G")
+  shap_values_G <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = p_function_G,
+      nsim = nsim,
+      newdata = trainset_G,
+      .parallel = TRUE
+    )
+  message(" - Calculating SHAP values for class GM")
+  shap_values_GM <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = p_function_GM,
+      nsim = nsim,
+      newdata = trainset_GM,
+      .parallel = TRUE
+    )
+  message(" - Calculating SHAP values for class R")
+  shap_values_R <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = p_function_R,
+      nsim = nsim,
+      newdata = trainset_R,
+      .parallel = TRUE
+    )
+  message(" - Calculating SHAP values for class W")
+  shap_values_W <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = p_function_W,
+      nsim = nsim,
+      newdata = trainset_W,
+      .parallel = TRUE
+    #  adjust = TRUE
+    )
+ shap_values_GM$class<-"GM"
+ shap_values_G$class<-"G"
+ shap_values_R$class<-"R"
+ shap_values_W$class<-"W"
+ shap_values<-rbind(shap_values_G,
+                     shap_values_GM,
+                     shap_values_R,
+                     shap_values_W)
+ shap_values
+}
+#' A new function for calcualting SHAP values
+#' the function returns a dataframe with SHAP values in the same
+#' order of the original dataset.
+#'
+#' SHAP value dataframe also contains information about Animal and
+#' the prediction of the model. Notice that SHAP are calculated considering
+#' the class (ground truth) and not the prediction. The prediction column is only
+#' used for filtering ana analysis. The function `calculate_shapp_class()` can be
+#' used for calculating SHAP values on prediction
+#'
+#' @param dataset a dataset used for calcuating SHAP. The dataset is used for
+#' permutation during SHAP calculation and also each class is filtered and SHAP
+#' value for each class is calculated.
+#' @param model a model
+#' @param nsim number of monte carlo simulation
+#'
+#' @return
+#' @export
+#'
+#' @examples
+calculate_shap <- function(dataset,model,nsim=10) {
+  trainset <- dataset %>%  na.omit() %>%
+    as.data.frame()
+  trainset_y <- dataset %>%
+    select(Activity) %>%
+    na.omit() %>%
+    unlist() %>%
+    unname()
+  ## Create an ID for maintaining the order
+  trainset <- cbind(id=seq(1:nrow(trainset)), trainset)
+  trainset <- trainset %>% select(-Activity)
+  trainset_G <- trainset[which(trainset_y == "G"), ]
+  trainset_GM <- trainset[which(trainset_y == "GM"), ]
+  trainset_R <- trainset[which(trainset_y == "R"), ]
+  trainset_W <- trainset[which(trainset_y == "W"), ]
+  id <- c(trainset_G$id,
+          trainset_GM$id,
+          trainset_R$id,
+          trainset_W$id)
+  trainset <- trainset %>% select(-id)
+  trainset_G <- trainset_G %>% select(-id)
+  trainset_GM <- trainset_GM %>% select(-id)
+  trainset_R <- trainset_R %>% select(-id)
+  trainset_W <- trainset_W %>% select(-id)
+  Anim <- c(trainset_G$Anim,
+            trainset_GM$Anim,
+            trainset_R$Anim,
+            trainset_W$Anim)
+  trainset <- trainset %>% select(-Anim)
+  trainset_G <- trainset_G %>% select(-Anim)
+  trainset_GM <- trainset_GM %>% select(-Anim)
+  trainset_R <- trainset_R %>% select(-Anim)
+  trainset_W <- trainset_W %>% select(-Anim)
+  predictions <- c(trainset_G$predictions,
+                   trainset_GM$predictions,
+                   trainset_R$predictions,
+                   trainset_W$predictions)
+  trainset <- trainset %>% select(-predictions)
+  trainset_G <- trainset_G %>% select(-predictions)
+  trainset_GM <- trainset_GM %>% select(-predictions)
+  trainset_R <- trainset_R %>% select(-predictions)
+  trainset_W <- trainset_W %>% select(-predictions)
+  # Compute fast (approximate) Shapley values using 50 Monte Carlo repetitions
+  message(" - Calculating SHAP values for class G")
+  shap_values_G <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = p_function_G,
+      nsim = nsim,
+      newdata = trainset_G,
+      .parallel = TRUE
+    )
+  message(" - Calculating SHAP values for class GM")
+  shap_values_GM <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = p_function_GM,
+      nsim = nsim,
+      newdata = trainset_GM,
+      .parallel = TRUE
+    )
+  message(" - Calculating SHAP values for class R")
+  shap_values_R <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = p_function_R,
+      nsim = nsim,
+      newdata = trainset_R,
+      .parallel = TRUE
+    )
+  message(" - Calculating SHAP values for class W")
+  shap_values_W <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = p_function_W,
+      nsim = nsim,
+      newdata = trainset_W,
+      .parallel = TRUE
+      #  adjust = TRUE
+    )
+  shap_values_G$class<-"G"
+  shap_values_GM$class<-"GM"
+  shap_values_R$class<-"R"
+  shap_values_W$class<-"W"
+  shap_values<-rbind(shap_values_G,
+                     shap_values_GM,
+                     shap_values_R,
+                     shap_values_W)
+  shap_values <- shap_values %>% tibble::add_column(Anim)
+  shap_values <- shap_values %>% tibble::add_column(predictions)
+  #shap_values <-shap_values %>% tibble::add_column(id)
+  shap_values[order(id),]
+}
+#' Calculate SHAP values for a given PREDICTED class
+#'
+#' @param dataset the dataset used for permutation during SHAP calculation
+#' @param new_data the new data we want to calculate SHAP
+#' @param model  the model used for explanation
+#' @param nsim  the number of Monte Carlos Simulations
+#' @param function_class a wrapper function to obtain only a particular class
+#' @param class_name the name of the class
+#'
+#' @return
+#' @export
+#'
+#' @examples
+#'
+#' # Calculate the SHAP values for class G on new data
+#' shap_values_G <- calculate_shap_class(
+#' dataset,
+#' new_data = newdata,
+#' model = goat_model
+#' nsim = 100,
+#' function_class = p_function_G,
+#' class_name = "G")
+#'
+#'
+calculate_shap_class <- function(dataset, new_data, model,nsim=10,
+                                 function_class, class_name = "G") {
+  trainset <- dataset %>%  na.omit() %>%
+    as.data.frame()
+  trainset_y <- dataset %>%
+    select(predictions) %>%
+    na.omit() %>%
+    unlist() %>%
+    unname()
+  trainset<- trainset %>%select (-Activity,-predictions,-Anim)
+  new_data_class <- new_data
+  Anim <- new_data_class$Anim
+  new_data_class <- new_data_class %>% select(-Anim)
+  Activity <- new_data_class$Activity
+  new_data_class <- new_data_class %>% select(-Activity)
+  predictions <- new_data_class$predictions
+  new_data_class <- new_data_class %>% select(-predictions)
+  # Compute fast (approximate) Shapley values using 50 Monte Carlo repetitions
+  message(" - Calculating SHAP values for class ",class_name)
+  shap_values_class <-
+    fastshap::explain(
+      model,
+      X = trainset,
+      pred_wrapper = function_class,
+      nsim = nsim,
+      newdata = new_data_class,
+      .parallel = TRUE
+    )
+  shap_values_class$class<-Activity
+  shap_values<-shap_values_class
+  shap_values <- shap_values %>% tibble::add_column(Anim)
+  shap_values <- shap_values %>% tibble::add_column(predictions)
+  shap_values
+}
+shap_summary_plot<-function(shap_values){
+  summary_plot <-
+    shap_values %>% reshape2::melt() %>% group_by(class, variable) %>%
+    summarise(mean = mean(abs(value))) %>%
+    arrange(desc(mean)) %>%
+    ggplot() +
+    ggdark::dark_theme_classic() +
+    geom_col(aes(
+      y = variable,
+      x = mean,
+      group = class,
+      fill = class
+    ), position = "stack") +
+    xlab("Mean(|Shap Value|) Average impact on model output magnitude")
+  summary_plot
+}
+shap_beeswarm_plot<-function(shap_values,dataset){
+  shap_values <- shap_values %>% reshape2::melt()
+  dataset<-dataset %>% mutate(class=Activity) %>% select(-Activity) %>%
+    reshape2::melt() %>% group_by(variable) %>%
+    mutate(value_scale=range01(value))
+  beeswarm_plot<-cbind(shap_values, feature_value=dataset$value_scale) %>%
+    # filter(class=="GM") %>%
+    ggplot()+
+    facet_wrap(~class)+
+    #ggdark::dark_theme_bw()+
+    theme_classic()+
+    geom_hline(yintercept=0,
+               color = "red", size=0.5)+
+    ggforce::geom_sina(aes(x=variable,y=value,color=feature_value),size=0.5,bins=4,alpha=0.9,shape=15)+
+    scale_colour_gradient(low = "yellow", high = "red", na.value = NA)+
+    scale_colour_gradient(low = "skyblue", high = "orange", na.value = NA)+
+    xlab("Feature")+ylab("SHAP value")+
+    theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
+  beeswarm_plot
+}

plot_shap.R ADDED Viewed

	@@ -0,0 +1,299 @@

+suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(ggplot2))
+suppressPackageStartupMessages(library(ggExtra))
+suppressPackageStartupMessages(library(forcats))
+range01 <- function(x){(x-min(x))/(max(x)-min(x))}
+shap_summary_plot<-function(shap_values){
+  summary_plot <-
+    shap_values %>% reshape2::melt() %>% group_by(class, variable) %>%
+    summarise(mean = mean(abs(value))) %>%
+    arrange(desc(mean)) %>%
+    ggplot() +
+   # ggdark::dark_theme_classic() +
+    theme_classic()+
+    geom_col(aes(
+      y = variable,
+      x = mean,
+      group = class,
+      fill = class
+    ), position = "stack") +
+    ylab("Feature")+
+    xlab("Mean(|Shap Value|) Average impact on model output magnitude per activity.")+
+    guides(fill=guide_legend(title="Activity"))
+  summary_plot
+}
+shap_summary_plot_perclass<-function(shap_values, class="G",color="#F8766D"){
+  shap_values <-shap_values %>% as.data.frame() %>% filter(class == {{class}} )
+  summary_plot <-
+    shap_values %>% reshape2::melt() %>% group_by(variable) %>%
+    summarise(mean = mean(abs(value))) %>%
+    ggplot() +
+    theme_classic()+
+    geom_col(aes(
+      x = mean,
+      y = fct_reorder(variable,mean)
+    ),
+    fill = color
+    ) +
+    ylab("Feature")+
+    xlab(paste0("Mean(|Shap Value|) Average impact on model output magnitude for activity ", class))+
+    guides(fill=guide_legend(title="Activity"))
+  summary_plot
+}
+shap_beeswarm_plot<-function(shap_values,dataset){
+  shap_values <- shap_values %>% reshape2::melt()
+  dataset<-dataset %>% mutate(class=Activity) %>% select(-Activity) %>%
+    reshape2::melt() %>% group_by(variable) %>%
+    mutate(value_scale=range01(value))
+  beeswarm_plot<-cbind(shap_values, feature_value=dataset$value_scale) %>% # filter(class=="GM") %>%
+    ggplot()+
+    facet_wrap(~class)+
+    #ggdark::dark_theme_bw()+
+    theme_classic()+
+    geom_hline(yintercept=0,
+               color = "red", size=0.5)+
+    ggforce::geom_sina(aes(x=variable,y=value,fill=feature_value),color="black", size=2.4,bins=4,alpha=0.9,shape=22)+
+    scale_fill_gradient(low = "yellow", high = "red", na.value = NA)+
+    scale_fill_gradient(low = "skyblue", high = "orange", na.value = NA)+
+    xlab("Feature")+ylab("SHAP value")+
+    theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
+  beeswarm_plot
+}
+#' Dependency plot for a particular feature. The plot considers
+#' activities and FP/TP
+#'
+#' @param feature a particular feature to calculate
+#' @param dataset a dataset with goat information
+#' @param shap  a shap value dataset for each feature.
+#'
+#' @return a dependency plot for each activity considering the selected feature
+#' @export ggplot object
+#'
+#' @examples
+#'
+#' dataset <-
+#' readr::read_delim("data/split/seba-caprino_loocv.tsv",
+#'        delim = '\t')
+#' selected_variables <-
+#'  readr::read_delim(
+#'    "data/topnfeatures/seba-caprino_selected_features.tsv",
+#'    col_types = cols(),
+#'    delim = '\t'
+#'  )
+#' dataset <-
+#'  dataset %>% select(selected_variables$variable,
+#'  Anim,
+#'  Activity)
+#' goat_model <- readRDS("models/boost/seba-caprino_model.rds")
+#' shap_values <- calculate_shap(dataset,
+#'                model = goat_model,
+#'                nsim = 30)
+#' dependency_plot_full(feature = "Steps",
+#'                      dataset = dataset,
+#'                      shap = shap_values)
+dependency_plot <- function(feature, dataset, shap) {
+  newdata <- dataset %>% mutate({{ feature }} := range01(!!sym(feature)))
+  #activities <- c("G", "GM", "W", "R")
+  activities<-dataset %>% pull(Activity) %>% unique()
+  plots <- list()
+  for (activity in activities) {
+    s <- shap[which(shap$class == activity), 1:18]
+    x <- newdata[which(newdata$Activity == activity), ]
+    data <- cbind(
+      shap = (s %>% as.data.frame %>% select(feature)),
+      feature = (x %>% select(feature)),
+      tp = x %>% mutate(tp = ifelse(Activity == predictions, "TP", "FP")) %>%
+        pull(tp)
+    )
+    names(data) <- c("shap", "feature", "tp")
+    p <- ggplot(data, aes(x = feature)) +
+      geom_point(aes(y = shap, color = tp), alpha = 0.3, size = 0.8) +
+      geom_smooth(aes(y = shap),
+                  se = FALSE,
+                  size = 0.5,
+                  linetype = "dashed") +
+      geom_hline(
+        yintercept = 0,
+        color = 'red',
+        size = 0.5,
+        alpha = 0.5
+      ) +
+      xlab(feature) +
+      labs(title = paste0("Activity ", activity)) +
+      ylab("SHAP Value") +
+      ylim(-0.1, 0.4) +
+      xlim(0, 1) +
+      theme_light() +
+      theme(legend.position = 'none')
+    p1 <-
+      ggMarginal(
+        p,
+        type = "histogram",
+        fill = 'gray',
+        color = 'white',
+        size = 10,
+        xparams = list(bins = 25),
+        yparams = list(bins = 15)
+      ) #,margins='x')
+    plots[[activity]] <- p1
+  }
+  #plots
+  do.call(grid.arrange, c(plots, ncol = 4))
+}
+#' Dependency plot for a particular feature on a particular animal.
+#' The plot considers activities and FP/TP
+#'
+#' @param feature a particular feature to calculate
+#' @param dataset a dataset with goat information
+#' @param shap  a shap value dataset for each feature.
+#' @param anim the id of the animal
+#' @return a dependency plot for each activity considering the selected feature
+#' @export ggplot object
+#'
+#' @examples
+#'
+#' dataset <-
+#' readr::read_delim("data/split/seba-caprino_loocv.tsv",
+#'        delim = '\t')
+#' selected_variables <-
+#'  readr::read_delim(
+#'    "data/topnfeatures/seba-caprino_selected_features.tsv",
+#'    col_types = cols(),
+#'    delim = '\t'
+#'  )
+#' dataset <-
+#'  dataset %>% select(selected_variables$variable,
+#'  Anim,
+#'  Activity)
+#' goat_model <- readRDS("models/boost/seba-caprino_model.rds")
+#' shap_values <- calculate_shap(dataset,
+#'                model = goat_model,
+#'                nsim = 30)
+#' dependency_plot_anim(feature = "Steps",
+#'                      dataset = dataset,
+#'                      shap = shap_values,
+#'                      anim = 'a13')
+dependency_plot_anim<- function(feature,dataset,shap,anim){
+  newdata <- dataset %>% mutate({{feature}} := range01(!!sym(feature)))
+  plots<-list()
+  activities<-newdata %>% filter(Anim == anim) %>% pull(Activity) %>% unique()
+  for (activity in activities) {
+    s <- shap[which(shap$class == activity &
+                      shap$Anim == anim
+    ), 1:18]
+    x <- newdata[which(newdata$Activity == activity &
+                         newdata$Anim == anim
+    ),]
+    data <- cbind(shap=(s %>% as.data.frame %>% select(feature)),
+                  feature = (x %>% select(feature)),
+                  tp = x %>% mutate(tp=ifelse(Activity == predictions,"TP","FP")) %>% pull(tp) )
+    names(data)<-c("shap","feature","tp")
+    p <- ggplot(data, aes(x = feature)) +
+      geom_point(aes(y = shap, color = tp), alpha = 0.3, size = 1.8) +
+      geom_smooth(aes(y = shap),
+                  se = FALSE,
+                  size = 0.5,
+                  linetype = "dashed") +
+      geom_hline(
+        yintercept = 0,
+        color = 'red',
+        size = 0.5,
+        alpha = 0.5
+      ) +
+      xlab(feature) +
+      labs(title = paste0("Activity ", activity)) +
+      ylab("SHAP Value") +
+      ylim(-0.1, 0.4) +
+      xlim(0, 1) +
+      theme_light() +
+      theme(legend.position = 'none')
+    p1 <-
+      ggMarginal(
+        p,
+        type = "histogram",
+        fill = 'gray',
+        color = 'white',
+        size = 15,
+        xparams = list(bins = 25),
+        yparams = list(bins = 15)
+      ) #,margins='x')
+    plots[[activity]] <- p1
+  }
+  do.call(grid.arrange, c(plots, ncol = length(activities)))
+}
+#' contribution plot for SHAP  values
+#'
+#' @param shap shap values for a particular class, animal, etc.
+#' @param num_row the row number of the observation to show
+#'
+#' @return ggplot object
+#' @export
+#'
+#' @examples
+#'
+#' shap_values_G <- calculate_shap_class(
+#' dataset = dataset,
+#' new_data = newdata,
+#' model= model,
+#' nsim = 100,
+#' function_class = p_function_G,
+#' class_name ="G")
+#' p1 <- contribution_plot(shap_values_G,num_row = 1) +
+#' labs(title="Anim a13: class G (FN)", subtitle = "SHAP analysis for class G")
+#'
+contribution_plot <-function(s, num_row = 1){
+  s<-s[num_row,]
+  s <- data.frame(
+    Variable = names(s[,1:15]),
+    Importance = apply(s[,1:15], MARGIN = 2, FUN = function(x) sum(x))
+  )
+  ggplot(s, aes(Variable, Importance, Importance,fill=Importance) )+
+    geom_col() +
+    coord_flip() +
+    xlab("") +
+    ylab("Shapley value")+
+    theme_classic()+
+    theme(legend.position = 'none')
+}
+contribution_plot_w_feature <-function(s, f, num_row = 1){
+  d <- data.frame(
+    variable = names(s[num_row,1:15]),
+    importance = apply(s[num_row,1:15], MARGIN = 2, FUN = function(x) sum(x)),
+    value = apply(f[num_row,1:15], MARGIN = 2, FUN = function(x) sum(x))
+  )
+  ggplot(d, aes(variable, importance, value,fill=value) )+
+    geom_col() +
+    geom_text(aes(label=round(value,digits = 2),hjust = 1.0),size=2)+
+    coord_flip() +
+    xlab("") +
+    ylab("Shapley value")+
+    scale_fill_gradient(low = 'lightgray', high = 'skyblue')+
+    theme_classic()+
+    theme(legend.position = 'none')
+}

selected_features.tsv ADDED Viewed

	@@ -0,0 +1,16 @@

+variable
+Steps
+%HeadDown
+Standing
+Active
+MeanXY
+distance(m)
+prev_steps1
+X_Act
+prev_Active1
+prev_Standing1
+DFA123
+prev_headdown1
+Lying
+Y_Act
+DBA123