Spaces:

cjerzak
/

ra

Running

App Files Files Community

cjerzak commited on Oct 15, 2025

Commit

a470aa8

verified ·

1 Parent(s): 27d3c78

Update app.R

Browse files

Files changed (1) hide show

app.R +63 -661

app.R CHANGED Viewed

@@ -1,665 +1,67 @@
-# setwd("~/Dropbox/ImageDeconfoundAid/BrokenExperiment/ShinyApp/"); Sys.setenv(RETICULATE_PYTHON = "/Users/cjerzak/miniconda3/bin/python")
-# app.R — Remote Audit: Design-Based Tests of Randomization with Satellite Imagery
 # ==============================================================================
-# Performs conditional randomization tests to audit experimental integrity
-# using pre-treatment satellite imagery features (NDVI, Nightlight)
 # ==============================================================================
-# For Hugging Face deployment, set secrets GEE_PROJECT, GEE_EMAIL, GEE_KEY (the service account key JSON as string)
-options(shiny.maxRequestSize = 50 * 1024^2)
-options(error = NULL)
-suppressPackageStartupMessages({
-  library(shiny)
-  library(bslib)
-  library(DT)
-  library(shinyWidgets)
-  library(reticulate)
-  library(dplyr)
-  library(xgboost)
-  library(future)
-  library(future.apply)
-  library(parallel)
-})
-# ============================================================================
-# Helper Functions
-# ============================================================================
-.clamp01 <- function(p, eps = 1e-6) pmin(pmax(p, eps), 1 - eps)
-.lik_improvement <- function(A, phat, a_bar) {
-  phat <- .clamp01(phat)
-  a_bar <- .clamp01(a_bar)
-  L <- sum(A * log(phat) + (1 - A) * log(1 - phat))
-  L0 <- sum(A * log(a_bar) + (1 - A) * log(1 - a_bar))
-  L - L0
-}
-.cf_xgboost_phat <- function(A, X, K = 5, folds = NULL, ntree = 300L, mtry = NULL) {
-  n <- length(A)
-  if (is.null(folds)) folds <- sample(rep(seq_len(K), length.out = n))
-  phat <- rep(NA_real_, n)
-  for (k in seq_len(K)) {
-    idx_te <- which(folds == k)
-    idx_tr <- which(folds != k)
-    A_tr <- A[idx_tr]; X_tr <- as.matrix(X[idx_tr, , drop = FALSE])
-    X_te <- as.matrix(X[idx_te, , drop = FALSE])
-    sdv <- apply(X_tr, 2, sd)
-    keep <- is.finite(sdv) & (sdv > 1e-12)
-    if (!any(keep)) {
-      phat[idx_te] <- mean(A_tr)
-      next
-    }
-    X_tr_k <- X_tr[, keep, drop = FALSE]
-    X_te_k <- X_te[, keep, drop = FALSE]
-    X_df <- as.data.frame(X_tr_k)
-    X_mat <- model.matrix(~ . -1, data = X_df)
-    y_num <- as.numeric(A_tr)
-    p <- ncol(X_mat)
-    mtry_use <- if (is.null(mtry)) max(1L, floor(sqrt(p))) else max(1L, min(as.integer(mtry), p))
-    colsample_frac <- min(1, as.numeric(mtry_use) / max(1, p))
-    params <- list(
-      objective = "binary:logistic",
-      eval_metric = "logloss",
-      eta = 0.1,
-      max_depth = 6,
-      subsample = 0.8,
-      colsample_bytree = colsample_frac,
-      nthread = parallel::detectCores()
-    )
-    dtrain <- xgboost::xgb.DMatrix(data = X_mat, label = y_num)
-    fit <- xgboost::xgb.train(
-      params = params,
-      data = dtrain,
-      nrounds = as.integer(ntree),
-      verbose = 0
-    )
-    phat[idx_te] <- predict(fit, X_te_k)
-  }
-  list(phat = .clamp01(phat), folds = folds)
-}
-.draw_assign_fixed_m <- function(n, m) {
-  A <- rep.int(0L, n)
-  A[sample.int(n, m)] <- 1L
-  A
-}
-remote_audit_crt <- function(A, X,
-                             K = 5,
-                             B = 1000,
-                             seed = 123,
-                             label = "",
-                             xgboost_ntree = 300L,
-                             xgboost_mtry = NULL) {
-  stopifnot(length(A) == nrow(X))
-  keep <- is.finite(A) & apply(as.matrix(X), 1, function(r) all(is.finite(r)))
-  A <- as.integer(A[keep])
-  X <- as.matrix(X[keep, , drop = FALSE])
-  n <- length(A)
-  a_bar <- .clamp01(mean(A))
-  m <- sum(A)
-  set.seed(seed)
-  folds <- sample(rep(seq_len(K), length.out = n))
-  obs <- .cf_xgboost_phat(A, X, K = K, folds = folds, ntree = xgboost_ntree, mtry = xgboost_mtry)
-  T_obs <- .lik_improvement(A, obs$phat, a_bar)
-  plan(multisession(workers = availableCores()))
-  T_null <- future_sapply(seq_len(B), future.seed = TRUE, FUN = function(b) {
-    A_b <- .draw_assign_fixed_m(n, m)
-    ph <- .cf_xgboost_phat(A_b, X, K = K, folds = folds, ntree = xgboost_ntree, mtry = xgboost_mtry)$phat
-    .lik_improvement(A_b, ph, a_bar)
-  })
-  plan(sequential)
-  pval <- (1 + sum(T_null >= T_obs)) / (B + 1)
-  list(
-    T_obs = T_obs,
-    T_null = T_null,
-    p_value = pval,
-    a_bar = a_bar,
-    n = n,
-    treated = m,
-    K = K,
-    B = B,
-    label = label,
-    learner = "xgboost"
-  )
-}
-# ============================================================================
-# UI
-# ============================================================================
-theme <- bs_theme(bootswatch = "flatly")
-ui <- page_sidebar(
-  tags$head(tags$title("Remote Audit")),
-  title = div(
-    span("Remote Audit", style = "font-weight:700;"),
-    span(" with Satellite Imagery", style = "color: #888;")
-  ),
-  theme = theme,
-  sidebar = sidebar(
-    width = 360,
-    h5("Data Input"),
-    radioButtons("data_source", NULL,
-                 choices = c("Upload CSV" = "upload",
-                             "Use Example (Begum et al. 2022)" = "example"),
-                 selected = "example"),
-    conditionalPanel(
-      "input.data_source == 'upload'",
-      fileInput("file_csv", "Upload CSV", accept = ".csv")
-    ),
-    h5("Audit Configuration"),
-    selectInput("audit_type", "Audit Type",
-                choices = c("Randomization" = "randomization",
-                            "Missingness" = "missingness"),
-                selected = "randomization"),
-    conditionalPanel(
-      "input.audit_type == 'randomization'",
-      selectInput("treat_col", "Treatment Column", choices = NULL),
-      numericInput("control_val", "Control Value", value = 1, step = 1),
-      numericInput("treat_val", "Treatment Value", value = 2, step = 1)
-    ),
-    conditionalPanel(
-      "input.audit_type == 'missingness'",
-      selectInput("missing_col", "Variable to Check", choices = NULL)
-    ),
-    selectInput("lat_col", "Latitude Column", choices = NULL),
-    selectInput("long_col", "Longitude Column", choices = NULL),
-    numericInput("start_year", "Start Year", value = 2010, min = 1990, max = 2026),
-    numericInput("end_year", "End Year", value = 2011, min = 1990, max = 2026),
-    checkboxGroupInput("features", "Features",
-                       choices = c("NDVI Median" = "ndvi_median",
-                                   "Nightlight Median" = "ntl_median"),
-                       selected = c("ndvi_median", "ntl_median")),
-    h5("Parameters"),
-    numericInput("K", "K-Folds", value = 5, min = 2, max = 10),
-    numericInput("B", "Resamples", value = 1000, min = 100, max = 5000, step = 100),
-    numericInput("seed", "Random Seed", value = 123),
-    numericInput("ntree", "Number of Trees", value = 300, min = 50, max = 1000),
-    actionButton("run_audit", "Run Audit",
-                 class = "btn-primary btn-lg",
-                 icon = icon("play"),
-                 style = "width: 100%;"),
-    tags$a(
-      href = "https://connorjerzak.com/linkorgs-summary/",
-      target = "_blank",
-      icon("circle-question"), " Technical Details"
-    )
-  ),
-  layout_columns(
-    col_widths = c(12),
-    card(
-      card_header("Data Preview"),
-      card_body(
-        DTOutput("data_preview")
-      )
-    ),
-    conditionalPanel(
-      "output.audit_complete",
-      card(
-        card_header("Audit Results"),
-        card_body(
-          uiOutput("results_summary"),
-          plotOutput("audit_plot", height = "400px"),
-          downloadButton("download_results", "Download Results",
-                         class = "btn-success")
-        )
-      )
-    )
-  )
 )
-# ============================================================================
-# Server
-# ============================================================================
-server <- function(input, output, session) {
-  audit_results <- reactiveVal(NULL)
-  data_loaded <- reactive({
-    if (input$data_source == "example") {
-      print(list.files())
-      if (file.exists("./Islam2019_WithGeocodesAndSatData.Rdata")) {
-        load("./Islam2019_WithGeocodesAndSatData.Rdata")
-        return(data)
-      } else {
-        showNotification("Example data file not found. Please upload your own CSV.",
-                         type = "error", duration = 10)
-        return(NULL)
-      }
-    } else {
-      req(input$file_csv)
-      tryCatch({
-        read.csv(input$file_csv$datapath, stringsAsFactors = FALSE)
-      }, error = function(e) {
-        showNotification(paste("Error reading CSV:", e$message),
-                         type = "error", duration = 10)
-        NULL
-      })
-    }
-  })
-  observe({
-    df <- data_loaded()
-    req(df)
-    cols <- names(df)
-    updateSelectInput(session, "treat_col", choices = cols,
-                      selected = if ("begum_treat" %in% cols) "begum_treat" else cols[1])
-    updateSelectInput(session, "missing_col", choices = cols,
-                      selected = cols[1])
-    updateSelectInput(session, "lat_col", choices = cols,
-                      selected = grep("lat", cols, value = TRUE, ignore.case = TRUE)[1] %||% NULL)
-    updateSelectInput(session, "long_col", choices = cols,
-                      selected = grep("lon|long", cols, value = TRUE, ignore.case = TRUE)[1] %||% NULL)
-  })
-  observeEvent(input$data_source, {
-    if (input$data_source == "upload" && is.null(input$file_csv)) {
-      updateSelectInput(session, "treat_col", choices = character(0))
-      updateSelectInput(session, "missing_col", choices = character(0))
-      updateSelectInput(session, "lat_col", choices = character(0))
-      updateSelectInput(session, "long_col", choices = character(0))
-    }
-  })
-  output$data_preview <- renderDT({
-    df <- data_loaded()
-    req(df)
-    datatable(
-      head(df, 100),
-      options = list(pageLength = 10, scrollX = TRUE, dom = 'tip'),
-      rownames = FALSE
-    )
-  })
-  observeEvent(input$run_audit, {
-    df <- data_loaded()
-    req(df)
-    missing_feats <- setdiff(input$features, names(df))
-    if (length(missing_feats) > 0) {
-      showNotification("Fetching satellite features from GEE...", type = "message")
-      req(input$lat_col %in% names(df), input$long_col %in% names(df))
-      if (input$start_year > input$end_year) {
-        showNotification("Start year must be <= end year", type = "error")
-        return()
-      }
-      gee_project <- Sys.getenv("GEE_PROJECT", unset = NULL)
-      gee_email <- Sys.getenv("GEE_EMAIL", unset = NULL)
-      gee_key <- Sys.getenv("GEE_KEY", unset = NULL)
-      py_run_string("
-import ee, pandas as pd, json
-def _ee_init(project, email=None, key_data=None):
-    if email is None and key_data is None:
-        try:
-            ee.Initialize(project=project)
-        except:
-            ee.Authenticate()
-            ee.Initialize(project=project)
-    else:
-        key_dict = json.loads(key_data)
-        credentials = ee.ServiceAccountCredentials(email, key_data=key_dict)
-        ee.Initialize(credentials=credentials, project=project)
-def satellite_stats(points, start, end, scale=250):
-    feats = [ee.Feature(ee.Geometry.Point([float(p['lon']), float(p['lat'])]),
-                        {'rowid': str(p['rowid'])}) for p in points]
-    fc = ee.FeatureCollection(feats)
-    def mask_modis(img):
-        qa = img.select('SummaryQA')
-        mask = qa.eq(0)
-        return img.updateMask(mask).select('NDVI').multiply(0.0001).copyProperties(img, img.propertyNames())
-    modis = (ee.ImageCollection('MODIS/061/MOD13Q1')
-             .filterDate(start, end)
-             .map(mask_modis))
-    ndvi_mean = modis.select('NDVI').mean().rename('ndvi_mean')
-    ndvi_median = modis.reduce(ee.Reducer.median()).rename('ndvi_median')
-    ndvi_max = modis.select('NDVI').max().rename('ndvi_max')
-    dmsp = ee.ImageCollection('NOAA/DMSP-OLS/NIGHTTIME_LIGHTS').select('stable_lights')
-    viirs = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG').select('avg_rad')
-    dmsp_window = dmsp.filterDate(start, end)
-    viirs_window = viirs.filterDate(start, end)
-    overlap_start = ee.Date('2012-01-01')
-    overlap_end = ee.Date('2014-12-31')
-    dmsp_ov_img = dmsp.filterDate(overlap_start, overlap_end).mean()
-    viirs_ov_img = viirs.filterDate(overlap_start, overlap_end).mean()
-    def _buffer_feat(f):
-        f = ee.Feature(f)
-        return f.buffer(5000)
-    fc_buffer = fc.map(_buffer_feat)
-    region_geom = fc_buffer.geometry()
-    dmsp_ov_mean = ee.Number(dmsp_ov_img.reduceRegion(
-        reducer=ee.Reducer.mean(), geometry=region_geom, scale=5000, maxPixels=1e13
-    ).get('stable_lights'))
-    viirs_ov_mean = ee.Number(viirs_ov_img.reduceRegion(
-        reducer=ee.Reducer.mean(), geometry=region_geom, scale=5000, maxPixels=1e13
-    ).get('avg_rad'))
-    dmsp_global_mean = ee.Number(dmsp_ov_img.reduceRegion(
-        reducer=ee.Reducer.mean(), geometry=ee.Geometry.Rectangle([-180, -90, 180, 90]),
-        scale=50000, maxPixels=1e13
-    ).get('stable_lights'))
-    viirs_global_mean = ee.Number(viirs_ov_img.reduceRegion(
-        reducer=ee.Reducer.mean(), geometry=ee.Geometry.Rectangle([-180, -90, 180, 90]),
-        scale=50000, maxPixels=1e13
-    ).get('avg_rad'))
-    dmsp_use = ee.Algorithms.If(dmsp_ov_mean, dmsp_ov_mean, dmsp_global_mean)
-    viirs_use = ee.Algorithms.If(viirs_ov_mean, viirs_ov_mean, viirs_global_mean)
-    dmsp_use = ee.Number(dmsp_use)
-    viirs_use = ee.Number(viirs_use)
-    scale = ee.Algorithms.If(dmsp_use.gt(0), viirs_use.divide(dmsp_use), 1)
-    scale = ee.Number(scale)
-    def calib_img(img):
-        return img.multiply(scale).toFloat()
-    dmsp_equiv = dmsp_window.map(lambda img: calib_img(img.select('stable_lights').rename('ntl')))
-    viirs_prep = viirs_window.map(lambda img: img.select('avg_rad').rename('ntl').toFloat())
-    ntl_window = dmsp_equiv.merge(viirs_prep)
-    ntl_mean = ntl_window.mean().rename('ntl_mean')
-    ntl_median = ntl_window.reduce(ee.Reducer.median()).rename('ntl_median')
-    ntl_max = ntl_window.max().rename('ntl_max')
-    stacked = (ndvi_mean
-              .addBands([ndvi_median, ndvi_max,
-                         ntl_mean, ntl_median, ntl_max]))
-    samples = stacked.sampleRegions(collection=fc, properties=['rowid'], scale=scale)
-    info = samples.getInfo()
-    rows = []
-    for f in info.get('features', []):
-        p = f.get('properties', {}) or {}
-        rows.append({
-            'rowid': p.get('rowid'),
-            'ndvi_mean': p.get('ndvi_mean'),
-            'ndvi_median': p.get('ndvi_median'),
-            'ndvi_max': p.get('ndvi_max'),
-            'ntl_mean': p.get('ntl_mean'),
-            'ntl_median': p.get('ntl_median'),
-            'ntl_max': p.get('ntl_max')
-        })
-    return pd.DataFrame(rows)
-")
-      py$`_ee_init`(project = gee_project, email = gee_email, key_data = gee_key)
-      df$rowid <- seq_len(nrow(df))
-      pts_all <- df %>%
-        filter(is.finite(!!sym(input$lat_col)), is.finite(!!sym(input$long_col))) %>%
-        transmute(rowid = as.character(rowid),
-                  lon = !!sym(input$long_col),
-                  lat = !!sym(input$lat_col))
-      if (nrow(pts_all) == 0) {
-        showNotification("No valid geocoordinates found", type = "error")
-        return()
-      }
-      start <- sprintf("%d-01-01", input$start_year)
-      end <- sprintf("%d-01-01", input$end_year + 1)
-      batch_size <- 200L
-      idx <- split(seq_len(nrow(pts_all)), ceiling(seq_len(nrow(pts_all)) / batch_size))
-      sat_all <- list()
-      for (ii in idx) {
-        chunk <- pts_all[ii, , drop = FALSE]
-        points <- lapply(seq_len(nrow(chunk)), function(i) {
-          list(
-            rowid = chunk$rowid[i],
-            lon = chunk$lon[i],
-            lat = chunk$lat[i]
-          )
-        })
-        df_chunk <- py$satellite_stats(points, start, end, as.integer(250))
-        if (!is.null(df_chunk) && nrow(df_chunk) > 0) {
-          sat_all[[length(sat_all) + 1L]] <- df_chunk
-        }
-      }
-      if (length(sat_all) > 0) {
-        sat_df <- bind_rows(sat_all) %>% mutate(rowid = as.integer(rowid))
-        df <- left_join(df, sat_df, by = "rowid") %>% select(-rowid)
-      } else {
-        showNotification("Failed to fetch satellite data", type = "error")
-        return()
-      }
-      missing_feats <- setdiff(input$features, names(df))
-      if (length(missing_feats) > 0) {
-        showNotification(paste("Could not fetch:", paste(missing_feats, collapse = ", ")), type = "error")
-        return()
-      }
-    }
-    withProgress(message = "Running audit...", value = 0, {
-      incProgress(0.2, detail = "Preparing data...")
-      if (input$audit_type == "randomization") {
-        req(input$treat_col)
-        if (!(input$treat_col %in% names(df))) {
-          showNotification("Treatment column not found", type = "error")
-          return()
-        }
-        tt <- df[[input$treat_col]]
-        mask <- (tt %in% c(input$control_val, input$treat_val))
-        if (sum(mask) == 0) {
-          showNotification("No units match control/treatment values",
-                           type = "error")
-          return()
-        }
-        A <- ifelse(tt[mask] == input$treat_val, 1L, 0L)
-        X <- as.matrix(df[mask, input$features, drop = FALSE])
-        keep <- apply(X, 1, function(r) all(is.finite(r)))
-        A <- A[keep]
-        X <- X[keep, , drop = FALSE]
-        if (length(A) < 10) {
-          showNotification("Too few complete cases (need >= 10)",
-                           type = "error")
-          return()
-        }
-      } else {
-        req(input$missing_col)
-        if (!(input$missing_col %in% names(df))) {
-          showNotification("Missing column not found", type = "error")
-          return()
-        }
-        R <- as.integer(!is.na(df[[input$missing_col]]))
-        if (all(R == 1)) {
-          showNotification(
-            "No missingness detected in selected variable. Audit cannot proceed.",
-            type = "warning", duration = 10
-          )
-          return()
-        }
-        if (all(R == 0)) {
-          showNotification(
-            "All values are missing. Audit cannot proceed.",
-            type = "warning", duration = 10
-          )
-          return()
-        }
-        A <- R
-        X <- as.matrix(df[, input$features, drop = FALSE])
-        keep <- apply(X, 1, function(r) all(is.finite(r)))
-        A <- A[keep]
-        X <- X[keep, , drop = FALSE]
-      }
-      incProgress(0.4, detail = "Running conditional randomization test...")
-      results <- tryCatch({
-        remote_audit_crt(
-          A = A,
-          X = X,
-          K = input$K,
-          B = input$B,
-          seed = input$seed,
-          label = if (input$audit_type == "randomization") input$treat_col else input$missing_col,
-          xgboost_ntree = input$ntree
-        )
-      }, error = function(e) {
-        showNotification(paste("Audit failed:", e$message),
-                         type = "error", duration = 10)
-        NULL
-      })
-      incProgress(1.0, detail = "Complete!")
-      if (!is.null(results)) {
-        audit_results(results)
-        showNotification("Audit complete!", type = "message", duration = 3)
-      }
-    })
-  })
-  output$results_summary <- renderUI({
-    res <- audit_results()
-    req(res)
-    HTML(sprintf(
-      "<h4>%s Audit Results</h4>
-       <p><strong>Learner:</strong> %s</p>
-       <p><strong>Sample size:</strong> %d (Treated: %d, Control: %d)</p>
-       <p><strong>Test statistic (T):</strong> %.4f</p>
-       <p><strong>P-value:</strong> %.4f</p>
-       <p><strong>Interpretation:</strong> %s</p>",
-      tools::toTitleCase(input$audit_type),
-      toupper(res$learner),
-      res$n,
-      res$treated,
-      res$n - res$treated,
-      res$T_obs,
-      res$p_value,
-      if (res$p_value < 0.05) {
-        "⚠️ Assignment is MORE predictable from satellite features than expected under random assignment (p < 0.05). This suggests potential deviation from the stated randomization mechanism."
-      } else {
-        "✓ Assignment is NOT significantly more predictable from satellite features than expected under random assignment (p >= 0.05). No evidence of deviation detected."
-      }
-    ))
-  })
-  output$audit_plot <- renderPlot({
-    res <- audit_results()
-    req(res)
-    hist(res$T_null, breaks = 50,
-         main = sprintf("%s Audit: %s Learner",
-                        tools::toTitleCase(input$audit_type),
-                        toupper(res$learner)),
-         xlab = "Out-of-sample log-likelihood improvement (T)",
-         ylab = "Count",
-         col = "lightblue",
-         border = "white")
-    abline(v = res$T_obs, col = "red", lwd = 3, lty = 2)
-    legend("topright",
-           legend = c("Null distribution", "Observed"),
-           col = c("lightblue", "red"),
-           lwd = c(10, 3),
-           lty = c(1, 2))
-    mtext(sprintf("n=%d, treated=%d (%.1f%%), B=%d, p=%.4f",
-                  res$n, res$treated, 100 * res$a_bar, res$B, res$p_value),
-          side = 3, line = 0.5, cex = 0.9)
-  })
-  output$download_results <- downloadHandler(
-    filename = function() {
-      sprintf("remote_audit_results_%s.csv", format(Sys.time(), "%Y%m%d_%H%M%S"))
-    },
-    content = function(file) {
-      res <- audit_results()
-      req(res)
-      summary_df <- data.frame(
-        audit_type = input$audit_type,
-        learner = res$learner,
-        n = res$n,
-        treated = res$treated,
-        treatment_rate = res$a_bar,
-        K = res$K,
-        B = res$B,
-        T_observed = res$T_obs,
-        p_value = res$p_value,
-        seed = input$seed,
-        features = paste(input$features, collapse = ";")
-      )
-      write.csv(summary_df, file, row.names = FALSE)
-    }
-  )
-  output$audit_complete <- reactive({
-    !is.null(audit_results())
-  })
-  outputOptions(output, "audit_complete", suspendWhenHidden = FALSE)
 }
-shinyApp(ui, server)

+# syntax=docker/dockerfile:1
+FROM rocker/r2u:22.04
+WORKDIR /code
+ARG DEBIAN_FRONTEND=noninteractive
 # ==============================================================================
+# System dependencies
 # ==============================================================================
+RUN apt-get update -y && apt-get install -y --no-install-recommends \
+    wget bzip2 git unzip ca-certificates locales tzdata \
+    build-essential gfortran \
+    libcurl4-openssl-dev libssl-dev libxml2-dev libgit2-dev \
+    libopenblas-dev liblapack-dev \
+    python3 python3-pip \
+  && rm -rf /var/lib/apt/lists/*
+ENV LC_ALL=C.UTF-8 LANG=C.UTF-8 HF_HUB_DISABLE_TELEMETRY=1
+# ==============================================================================
+# Python packages
+# ==============================================================================
+RUN pip3 install --no-cache-dir pandas earthengine-api
+# ==============================================================================
+# Install R packages (prioritize apt when available for speed)
+# ==============================================================================
+ARG APT_R_PKGS="\
+  r-cran-shiny r-cran-dplyr r-cran-dt \
+  r-cran-data.table r-cran-foreach r-cran-doparallel"
+RUN set -eux; \
+    apt-get update -y; \
+    for pkg in $APT_R_PKGS; do \
+      if apt-cache show "$pkg" >/dev/null 2>&1; then \
+        echo "Installing $pkg via apt ..."; \
+        apt-get install -y --no-install-recommends "$pkg" || true; \
+      fi; \
+    done; \
+    rm -rf /var/lib/apt/lists/*
+# ==============================================================================
+# R packages via CRAN (fallback for those not in apt)
+# ==============================================================================
+RUN Rscript - <<'RSCRIPT'
+options(Ncpus = parallel::detectCores())
+cran <- "https://cloud.r-project.org"
+req <- c(
+  "shiny", "dplyr", "DT", "data.table",
+  "bslib", "shinyWidgets", "xgboost",
+  "reticulate", "future", "future.apply"
 )
+installed <- rownames(installed.packages())
+need <- setdiff(req, installed)
+if (length(need)) {
+  if (!requireNamespace("pak", quietly = TRUE)) {
+    install.packages("pak", repos = "https://r-lib.github.io/p/pak/stable")
+  }
+  ok <- tryCatch({
+    pak::pak(need)
+    TRUE
+  }, error = function(e) FALSE)
+  if (!ok) install.packages(need, repos = cran)
 }
+RSCRIPT
+# ==============================================================================
+# Copy application files
+# ==============================================================================
+COPY . /code/
+# ==============================================================================
+# Shiny entrypoint
+# ==============================================================================
+EXPOSE 7860
+CMD ["R", "--quiet", "-e", "port <- as.integer(Sys.getenv('PORT', '7860')); shiny::runApp('/code', host='0.0.0.0', port=port)"]