richtext's picture
Initial release - 0.1 Alpha
7e54fbe
# ==============================================================================
# Polyphenol Estimation Pipeline - Shiny Application
# Global Configuration and Database Loading
# ==============================================================================
#
# PIPELINE DEVELOPED BY:
# Stephanie M.G. Wilson
# University of California, Davis
# Contact: smgwilson@ucdavis.edu
# Repository: https://github.com/SWi1/polyphenol_pipeline/
# License: MIT
#
# SHINY APP DEVELOPED BY:
# Richard Stoker
# United States Department of Agriculture - Agricultural Research Service
# Contact: Richard.Stoker@usda.gov
# License: CC0 (Public Domain)
#
# DESCRIPTION:
# This file loads required packages, reference databases, and defines
# helper functions used throughout the Shiny application.
#
# VERSION: 0.1 Alpha
# DATE: November 2025
#
# ==============================================================================
# Load required packages
suppressPackageStartupMessages({
library(shiny)
library(bslib)
library(shinyWidgets)
library(DT)
library(plotly)
library(tidyverse)
library(readxl)
library(vroom)
library(rmarkdown)
library(zip)
})
# ------------------------------------------------------------------------------
# Application Constants
# ------------------------------------------------------------------------------
APP_TITLE <- "Polyphenol Estimation Pipeline"
APP_VERSION <- "0.1 Alpha"
PRIMARY_COLOR <- "#6f42c1"
SECONDARY_COLOR <- "#5c2d91"
# ------------------------------------------------------------------------------
# File Paths - Self-contained within shiny_app directory
# ------------------------------------------------------------------------------
DATA_DIR <- "data"
DEMO_DIR <- "demo_data"
# Reference database paths
FDD_FILE <- file.path(DATA_DIR, "FDA_FDD_All_Records_v_3.1.xlsx")
FOODB_CONTENT_FILE <- file.path(DATA_DIR, "FooDB_polyphenol_content_with_dbPUPsubstrates_Aug25.csv")
CLASS_TAX_FILE <- file.path(DATA_DIR, "FooDB_polyphenol_list_3072.csv")
MAPPING_FILE <- file.path(DATA_DIR, "FDA_FooDB_Mapping_Nov_2025.csv")
EUGENOL_FILE <- file.path(DATA_DIR, "FooDB_Eugenol_Content_Final.csv")
DII_SUBCLASSES_FILE <- file.path(DATA_DIR, "FooDB_DII_polyphenol_list.csv")
# Demo data
DEMO_DATA_FILE <- file.path(DEMO_DIR, "VVKAJ_Items.csv")
# ------------------------------------------------------------------------------
# Load Reference Databases
# These files are loaded once at startup for performance
# ------------------------------------------------------------------------------
message("Loading reference databases...")
# FDA Food Disaggregation Database V3.1
if (file.exists(FDD_FILE)) {
FDD_V3 <- read_xlsx(FDD_FILE) %>%
rename(
latest_survey = "Latest Survey",
wweia_food_code = "WWEIA Food Code",
wweia_food_description = "WWEIA Food Description",
fdd_ingredient = "Basic Ingredient Description",
ingredient_percent = "Ingredient Percent"
) %>%
select(wweia_food_code, wweia_food_description, fdd_ingredient, ingredient_percent) %>%
mutate(wweia_food_code = as.integer(wweia_food_code))
message(" - FDA FDD database loaded")
} else {
FDD_V3 <- NULL
warning("FDA FDD database not found at: ", FDD_FILE)
}
# FooDB polyphenol content
if (file.exists(FOODB_CONTENT_FILE)) {
FooDB_mg_100g <- vroom(FOODB_CONTENT_FILE, show_col_types = FALSE) %>%
distinct(food_id, compound_public_id, .keep_all = TRUE) %>%
select(-c(food_public_id, food_name)) %>%
relocate(orig_content_avg, .before = citation) %>%
filter(!is.na(orig_content_avg_RFadj))
message(" - FooDB polyphenol content loaded")
} else {
FooDB_mg_100g <- NULL
warning("FooDB polyphenol content file not found")
}
# FooDB polyphenol class taxonomy
if (file.exists(CLASS_TAX_FILE)) {
class_tax <- vroom(CLASS_TAX_FILE, show_col_types = FALSE) %>%
select(c(compound_public_id, class))
message(" - FooDB class taxonomy loaded")
} else {
class_tax <- NULL
warning("FooDB class taxonomy file not found")
}
# FDA-FooDB Mapping
if (file.exists(MAPPING_FILE)) {
fdd_foodb_mapping <- vroom(MAPPING_FILE, show_col_types = FALSE) %>%
select(-c(method, score))
message(" - FDA-FooDB mapping loaded")
} else {
fdd_foodb_mapping <- NULL
warning("FDA-FooDB mapping file not found")
}
# FooDB Eugenol Content (for DII calculation)
if (file.exists(EUGENOL_FILE)) {
FooDB_eugenol <- vroom(EUGENOL_FILE, show_col_types = FALSE)
message(" - FooDB eugenol content loaded")
} else {
FooDB_eugenol <- NULL
warning("FooDB eugenol content file not found")
}
# FooDB DII polyphenol subclasses
if (file.exists(DII_SUBCLASSES_FILE)) {
FooDB_DII_subclasses <- vroom(DII_SUBCLASSES_FILE, show_col_types = FALSE)
message(" - FooDB DII subclasses loaded")
} else {
FooDB_DII_subclasses <- NULL
warning("FooDB DII subclasses file not found")
}
message("Database loading complete.")
# ------------------------------------------------------------------------------
# Helper Functions
# ------------------------------------------------------------------------------
# Check if all required databases are loaded
databases_ready <- function() {
all(!is.null(FDD_V3),
!is.null(FooDB_mg_100g),
!is.null(class_tax),
!is.null(fdd_foodb_mapping))
}
# Validate ASA24 dietary input data
validate_asa24_data <- function(data) {
required_cols <- c("UserName", "FoodCode", "RecallNo", "FoodAmt")
missing <- setdiff(required_cols, names(data))
if (length(missing) > 0) {
return(list(valid = FALSE, message = paste("Missing required columns:", paste(missing, collapse = ", "))))
}
recalls_per_user <- data %>%
group_by(UserName) %>%
summarise(n_recalls = n_distinct(RecallNo), .groups = "drop")
if (max(recalls_per_user$n_recalls, na.rm = TRUE) < 2) {
return(list(valid = FALSE, message = "Data must contain multiple recalls per participant."))
}
return(list(valid = TRUE, message = "Data validation passed."))
}
# Validate NHANES dietary input data
validate_nhanes_data <- function(data) {
required_cols <- c("SEQN", "DRXIFDCD", "RecallNo")
missing <- setdiff(required_cols, names(data))
if (length(missing) > 0) {
return(list(valid = FALSE, message = paste("Missing required columns:", paste(missing, collapse = ", "))))
}
return(list(valid = TRUE, message = "Data validation passed."))
}
# Apply coffee/tea brewing adjustment
# This adjustment accounts for the brewing process where polyphenols from
# coffee or tea are extracted into water
apply_brewing_adjustment <- function(fdd_data) {
fdd_data %>%
group_by(wweia_food_code) %>%
mutate(
has_tea = any(str_detect(fdd_ingredient, regex("Tea", ignore_case = TRUE))),
has_coffee = any(str_detect(fdd_ingredient, regex("Coffee", ignore_case = TRUE))),
has_water = any(str_detect(fdd_ingredient, regex("Water", ignore_case = TRUE))),
brewing_adjustment_total = case_when(
has_tea & has_water ~ sum(
ingredient_percent[str_detect(fdd_ingredient, regex("Tea|Water", ignore_case = TRUE))],
na.rm = TRUE),
has_coffee & has_water ~ sum(
ingredient_percent[str_detect(fdd_ingredient, regex("Coffee|Water", ignore_case = TRUE))],
na.rm = TRUE),
TRUE ~ NA_real_),
brewing_adjustment_percentage = if_else(
str_detect(fdd_ingredient, regex("Coffee|Tea", ignore_case = TRUE)),
brewing_adjustment_total,
NA_real_)
) %>%
select(-c(has_tea, has_coffee, has_water, brewing_adjustment_total)) %>%
ungroup()
}
# Color palette for visualizations
get_viz_colors <- function(n) {
if (n <= 8) {
return(c("#6f42c1", "#28a745", "#fd7e14", "#17a2b8", "#dc3545",
"#ffc107", "#6610f2", "#e83e8c")[1:n])
}
colorRampPalette(c("#6f42c1", "#28a745", "#fd7e14", "#17a2b8"))(n)
}