edgarodriguez commited on
Commit
9edeb97
·
verified ·
1 Parent(s): 2f87354

Upload 3 files

Browse files
Files changed (3) hide show
  1. 10.11_draw_query_indicators_v10_HF.R +92 -22
  2. Dockerfile +1 -1
  3. helpers.R +5 -0
10.11_draw_query_indicators_v10_HF.R CHANGED
@@ -65,8 +65,8 @@ COL_COUNT <- "#374151"
65
  COL_RED <- "#b91c2e"
66
  COL_YEL <- "#DEB406"
67
  COL_AMBER <- "#405162"
68
- COL_PURPLE <- COL_RISK
69
- COL_GREEN <- COL_INFRA
70
  COL_BLUE <- COL_RISK
71
  COL_LIGHT_BLUE <- "#009BC1"
72
 
@@ -337,9 +337,14 @@ web_sql_qbreaks <- function(tbl, col, n_colors, filter_zero = TRUE) {
337
 
338
  # Dynamic: unified per-nivel grid join (mirrors load_grid_unified). niv 4..8.
339
  # grid_crosswalk has codigo_n4..codigo_n8, so the equi-join always applies.
340
- web_sql_grid_unified <- function(niv, has_fosas, limit = NULL) {
 
 
341
  col_n <- paste0("codigo_n", niv)
342
  limit_clause <- if (!is.null(limit)) paste0("\nLIMIT ", limit) else ""
 
 
 
343
  fosas_cols <- if (has_fosas)
344
  ",\n COALESCE(CAST(f.fosas_total AS INTEGER), 0) AS fosas_total,\n COALESCE(CAST(f.cuerpos_total AS INTEGER), 0) AS cuerpos_total"
345
  else
@@ -387,7 +392,7 @@ web_sql_grid_unified <- function(niv, has_fosas, limit = NULL) {
387
  ) all_pts
388
  GROUP BY {col_n}
389
  )
390
- SELECT g.codigo, g.CVEGEO, g.wkt,
391
  CAST(c.homicidio AS DOUBLE) AS homicidio,
392
  CAST(c.robo AS DOUBLE) AS robo,
393
  CAST(c.secuestro AS DOUBLE) AS secuestro,
@@ -410,6 +415,50 @@ web_sql_grid_unified <- function(niv, has_fosas, limit = NULL) {
410
  )
411
  }
412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  #######################################################################################
414
  # WEB_SPEC -- declarative layer / accordion / panel / palette config.
415
  # Single source consumed by the Shiny UI AND the docs converter so the two
@@ -1408,8 +1457,9 @@ server <- function(input, output, session) {
1408
  temporal_filter_wkt = NULL
1409
  )
1410
 
1411
- # Mutable quantile-break cache; populated once in renderMaplibre()
1412
- qb <- list()
 
1413
 
1414
  # Compute n_colors-1 quantile thresholds from a DuckDB table column
1415
  compute_qbreaks <- function(tbl, col, n_colors, filter_zero = TRUE) {
@@ -1761,27 +1811,27 @@ server <- function(input, output, session) {
1761
  output$map <- renderMaplibre({
1762
  req(con)
1763
 
1764
- # Compute quantile breaks from actual data (populate qb cache)
1765
- qb$crime <- lapply(CRIME_COLS, function(info) {
1766
- compute_qbreaks("crime_mun", info$col, length(CRIME_STEP_PAL))
1767
- })
1768
- qb$desap <- compute_qbreaks("dim_desap_mun", "final_desap_nl_per_100k",
1769
- length(DESAP_PAL))
1770
- qb$pam <- compute_qbreaks("dim_pam_mun", "pam_n", length(PAM_PAL))
 
 
1771
 
1772
  g <- function(col) list("get", col)
1773
  ts <- function(col) list("to-string", list("get", col))
1774
 
1775
- crime_sf <- tryCatch({
1776
- DBI::dbGetQuery(con, web_sql("src_crime")) |>
1777
- sf::st_as_sf(wkt = "wkt", crs = 4326) |>
1778
- dplyr::select(-wkt)
1779
- }, error = function(e) { message("crime_mun: ", e$message); NULL })
1780
 
1781
  load_grid_unified <- function(niv, limit = NULL) {
 
1782
  tryCatch({
1783
  DBI::dbGetQuery(con,
1784
- web_sql_grid_unified(niv, has_fosas_tbl, limit)) |>
1785
  sf::st_as_sf(wkt = "wkt", crs = 4326) |>
1786
  dplyr::select(-wkt)
1787
  }, error = function(e) { message("grid_nivel", niv, ": ", e$message); NULL })
@@ -1976,7 +2026,7 @@ server <- function(input, output, session) {
1976
  observeEvent(input$grid_nivel, {
1977
  req(input$grid_nivel == "7", !grid7_loaded(), !is.null(con))
1978
  grid7_sf <<- tryCatch({
1979
- DBI::dbGetQuery(con, web_sql_grid_unified(7, has_fosas_tbl)) |>
1980
  sf::st_as_sf(wkt = "wkt", crs = 4326) |>
1981
  dplyr::select(-wkt)
1982
  }, error = function(e) { message("N7 lazy load: ", e$message); NULL })
@@ -1990,7 +2040,7 @@ server <- function(input, output, session) {
1990
  lazy_add_fill <- function(key, sql_name, id, make_layer_fn) {
1991
  if (isTRUE(lazy_added[[key]]) || is.null(con)) return()
1992
  sf_data <- tryCatch({
1993
- DBI::dbGetQuery(con, web_sql(sql_name)) |>
1994
  sf::st_as_sf(wkt = "wkt", crs = 4326) |>
1995
  dplyr::select(-wkt)
1996
  }, error = function(e) { message("lazy ", id, ": ", e$message); NULL })
@@ -2018,8 +2068,15 @@ server <- function(input, output, session) {
2018
  lazy_add_grid <- function(nv_chr) {
2019
  key <- paste0("grid", nv_chr)
2020
  if (isTRUE(lazy_added[[key]]) || is.null(con)) return()
 
 
 
 
 
 
 
2021
  sf_data <- tryCatch({
2022
- DBI::dbGetQuery(con, web_sql_grid_unified(as.integer(nv_chr), has_fosas_tbl)) |>
2023
  sf::st_as_sf(wkt = "wkt", crs = 4326) |>
2024
  dplyr::select(-wkt)
2025
  }, error = function(e) { message("lazy grid n", nv_chr, ": ", e$message); NULL })
@@ -2124,6 +2181,19 @@ server <- function(input, output, session) {
2124
  )
2125
  updateSelectInput(session, "state_select",
2126
  choices = c("Choose a state..." = "", state_ch))
 
 
 
 
 
 
 
 
 
 
 
 
 
2127
  }, once = TRUE)
2128
 
2129
  observeEvent(input$grid_icon_click, {
 
65
  COL_RED <- "#b91c2e"
66
  COL_YEL <- "#DEB406"
67
  COL_AMBER <- "#405162"
68
+ COL_PURPLE <- "#94579D"
69
+ COL_GREEN <- "#337F5E"
70
  COL_BLUE <- COL_RISK
71
  COL_LIGHT_BLUE <- "#009BC1"
72
 
 
337
 
338
  # Dynamic: unified per-nivel grid join (mirrors load_grid_unified). niv 4..8.
339
  # grid_crosswalk has codigo_n4..codigo_n8, so the equi-join always applies.
340
+ # simplify_tol: degrees of simplification applied to g.wkt at query time
341
+ # (0 = no simplification; default 0.005 deg ~= 500 m, suitable for N4-N6).
342
+ web_sql_grid_unified <- function(niv, has_fosas, limit = NULL, simplify_tol = 0.005) {
343
  col_n <- paste0("codigo_n", niv)
344
  limit_clause <- if (!is.null(limit)) paste0("\nLIMIT ", limit) else ""
345
+ wkt_expr <- if (simplify_tol > 0)
346
+ paste0("ST_AsText(ST_SimplifyPreserveTopology(ST_GeomFromText(g.wkt), ", simplify_tol, "))")
347
+ else "g.wkt"
348
  fosas_cols <- if (has_fosas)
349
  ",\n COALESCE(CAST(f.fosas_total AS INTEGER), 0) AS fosas_total,\n COALESCE(CAST(f.cuerpos_total AS INTEGER), 0) AS cuerpos_total"
350
  else
 
392
  ) all_pts
393
  GROUP BY {col_n}
394
  )
395
+ SELECT g.codigo, g.CVEGEO, {wkt_expr} AS wkt,
396
  CAST(c.homicidio AS DOUBLE) AS homicidio,
397
  CAST(c.robo AS DOUBLE) AS robo,
398
  CAST(c.secuestro AS DOUBLE) AS secuestro,
 
415
  )
416
  }
417
 
418
+ # HF-only: wrap any WEB_SQL template to simplify its wkt column at query time.
419
+ # Avoids modifying WEB_SQL_TEMPLATES (shared with the docs converter which reads
420
+ # pre-simplified Parquet from HuggingFace; double-simplifying would degrade quality).
421
+ # tol: degrees (0.008 deg ~= 800 m -- invisible at municipality choropleth zoom).
422
+ web_sql_simplified <- function(name, tol = 0.008, ...) {
423
+ inner <- web_sql(name, ...)
424
+ paste0(
425
+ "SELECT * REPLACE (",
426
+ "ST_AsText(ST_SimplifyPreserveTopology(ST_GeomFromText(wkt), ", tol, ")) AS wkt",
427
+ ") FROM (", inner, ") __q__"
428
+ )
429
+ }
430
+
431
+ # App-level cache: crime_sf and quantile breaks loaded ONCE at startup and
432
+ # shared across all sessions. Safe because the DuckDB is read_only and baked
433
+ # into the Docker image -- the data never changes between sessions.
434
+ .load_app_cache <- function() {
435
+ con <- tryCatch(
436
+ ddb_connect(DB_PATH, read_only = TRUE),
437
+ error = function(e) { message("[cache] connect: ", e$message); NULL }
438
+ )
439
+ if (is.null(con)) return(list(crime_sf = NULL, qb = list()))
440
+ on.exit(try(DBI::dbDisconnect(con, shutdown = TRUE), silent = TRUE))
441
+ qbq <- function(tbl, col, n) tryCatch(
442
+ as.numeric(DBI::dbGetQuery(con, web_sql_qbreaks(tbl, col, n))[1L, ]),
443
+ error = function(e) seq_len(n - 1L)
444
+ )
445
+ list(
446
+ crime_sf = tryCatch(
447
+ DBI::dbGetQuery(con, web_sql_simplified("src_crime", tol = 0.005)) |>
448
+ sf::st_as_sf(wkt = "wkt", crs = 4326) |>
449
+ dplyr::select(-wkt),
450
+ error = function(e) { message("[cache] crime_sf: ", e$message); NULL }
451
+ ),
452
+ qb = list(
453
+ crime = lapply(CRIME_COLS, function(info)
454
+ qbq("crime_mun", info$col, length(CRIME_STEP_PAL))),
455
+ desap = qbq("dim_desap_mun", "final_desap_nl_per_100k", length(DESAP_PAL)),
456
+ pam = qbq("dim_pam_mun", "pam_n", length(PAM_PAL))
457
+ )
458
+ )
459
+ }
460
+ .APP_CACHE <- .load_app_cache()
461
+
462
  #######################################################################################
463
  # WEB_SPEC -- declarative layer / accordion / panel / palette config.
464
  # Single source consumed by the Shiny UI AND the docs converter so the two
 
1457
  temporal_filter_wkt = NULL
1458
  )
1459
 
1460
+ # Quantile-break cache: pre-populated from the app-level cache (loaded at startup).
1461
+ # Falls back to an empty list if the cache failed; compute_qbreaks() fills gaps lazily.
1462
+ qb <- .APP_CACHE$qb
1463
 
1464
  # Compute n_colors-1 quantile thresholds from a DuckDB table column
1465
  compute_qbreaks <- function(tbl, col, n_colors, filter_zero = TRUE) {
 
1811
  output$map <- renderMaplibre({
1812
  req(con)
1813
 
1814
+ # qb already populated from .APP_CACHE at server startup -- no DB queries here.
1815
+ # Fill any gaps (e.g. if the cache failed at startup) via compute_qbreaks().
1816
+ if (length(qb$crime) == 0)
1817
+ qb$crime <- lapply(CRIME_COLS, function(info)
1818
+ compute_qbreaks("crime_mun", info$col, length(CRIME_STEP_PAL)))
1819
+ if (is.null(qb$desap))
1820
+ qb$desap <- compute_qbreaks("dim_desap_mun", "final_desap_nl_per_100k", length(DESAP_PAL))
1821
+ if (is.null(qb$pam))
1822
+ qb$pam <- compute_qbreaks("dim_pam_mun", "pam_n", length(PAM_PAL))
1823
 
1824
  g <- function(col) list("get", col)
1825
  ts <- function(col) list("to-string", list("get", col))
1826
 
1827
+ # crime_sf from app-level cache (loaded once at startup, shared across sessions).
1828
+ crime_sf <- .APP_CACHE$crime_sf
 
 
 
1829
 
1830
  load_grid_unified <- function(niv, limit = NULL) {
1831
+ nv_tol <- if (niv >= 7L) 0.002 else 0.005
1832
  tryCatch({
1833
  DBI::dbGetQuery(con,
1834
+ web_sql_grid_unified(niv, has_fosas_tbl, limit, simplify_tol = nv_tol)) |>
1835
  sf::st_as_sf(wkt = "wkt", crs = 4326) |>
1836
  dplyr::select(-wkt)
1837
  }, error = function(e) { message("grid_nivel", niv, ": ", e$message); NULL })
 
2026
  observeEvent(input$grid_nivel, {
2027
  req(input$grid_nivel == "7", !grid7_loaded(), !is.null(con))
2028
  grid7_sf <<- tryCatch({
2029
+ DBI::dbGetQuery(con, web_sql_grid_unified(7, has_fosas_tbl, simplify_tol = 0.002)) |>
2030
  sf::st_as_sf(wkt = "wkt", crs = 4326) |>
2031
  dplyr::select(-wkt)
2032
  }, error = function(e) { message("N7 lazy load: ", e$message); NULL })
 
2040
  lazy_add_fill <- function(key, sql_name, id, make_layer_fn) {
2041
  if (isTRUE(lazy_added[[key]]) || is.null(con)) return()
2042
  sf_data <- tryCatch({
2043
+ DBI::dbGetQuery(con, web_sql_simplified(sql_name)) |>
2044
  sf::st_as_sf(wkt = "wkt", crs = 4326) |>
2045
  dplyr::select(-wkt)
2046
  }, error = function(e) { message("lazy ", id, ": ", e$message); NULL })
 
2068
  lazy_add_grid <- function(nv_chr) {
2069
  key <- paste0("grid", nv_chr)
2070
  if (isTRUE(lazy_added[[key]]) || is.null(con)) return()
2071
+ # Use pre-materialised TEMP TABLE if ready; fall back to full CTE otherwise.
2072
+ pre_tbl <- paste0("grid_pre_n", nv_chr)
2073
+ has_pre <- tryCatch(DBI::dbExistsTable(con, pre_tbl), error = function(e) FALSE)
2074
+ nv_tol <- if (nv_chr == "7") 0.002 else 0.005
2075
+ sql <- if (has_pre) paste("SELECT * FROM", pre_tbl)
2076
+ else web_sql_grid_unified(as.integer(nv_chr), has_fosas_tbl,
2077
+ simplify_tol = nv_tol)
2078
  sf_data <- tryCatch({
2079
+ DBI::dbGetQuery(con, sql) |>
2080
  sf::st_as_sf(wkt = "wkt", crs = 4326) |>
2081
  dplyr::select(-wkt)
2082
  }, error = function(e) { message("lazy grid n", nv_chr, ": ", e$message); NULL })
 
2181
  )
2182
  updateSelectInput(session, "state_select",
2183
  choices = c("Choose a state..." = "", state_ch))
2184
+ # Pre-materialise grid joins for N4/N5/N6 into session TEMP TABLEs.
2185
+ # Runs after national totals so the sidebar populates first.
2186
+ # lazy_add_grid() will use these tables instead of re-running the full CTE.
2187
+ for (.nv in c("4", "5", "6")) {
2188
+ nv_tol <- 0.005
2189
+ tryCatch(
2190
+ DBI::dbExecute(con, paste0(
2191
+ "CREATE TEMP TABLE IF NOT EXISTS grid_pre_n", .nv, " AS ",
2192
+ web_sql_grid_unified(as.integer(.nv), has_fosas_tbl, simplify_tol = nv_tol)
2193
+ )),
2194
+ error = function(e) message("grid_pre_n", .nv, ": ", e$message)
2195
+ )
2196
+ }
2197
  }, once = TRUE)
2198
 
2199
  observeEvent(input$grid_icon_click, {
Dockerfile CHANGED
@@ -9,7 +9,7 @@ FROM rocker/geospatial:4.4
9
  # $(cat /run/secrets/hf_token)" -fL "${DDB_URL}" -o /app/depth_mexico.duckdb
10
  ARG DDB_URL=https://huggingface.co/datasets/edgarodriguez/depth_alpha/resolve/main/depth_mexico.duckdb
11
 
12
- RUN install2.r --error --skipinstalled \
13
  shiny bslib DBI duckdb dplyr jsonlite htmltools glue writexl ggplot2 stringr \
14
  && R -e "install.packages('mapgl', repos = c('https://walkerke.r-universe.dev', 'https://cloud.r-project.org'))" \
15
  && R -e "library(mapgl); library(duckdb); library(sf); library(shiny)"
 
9
  # $(cat /run/secrets/hf_token)" -fL "${DDB_URL}" -o /app/depth_mexico.duckdb
10
  ARG DDB_URL=https://huggingface.co/datasets/edgarodriguez/depth_alpha/resolve/main/depth_mexico.duckdb
11
 
12
+ RUN install2.r --error --skipinstalled --no-docs --no-test --ncpus 4 \
13
  shiny bslib DBI duckdb dplyr jsonlite htmltools glue writexl ggplot2 stringr \
14
  && R -e "install.packages('mapgl', repos = c('https://walkerke.r-universe.dev', 'https://cloud.r-project.org'))" \
15
  && R -e "library(mapgl); library(duckdb); library(sf); library(shiny)"
helpers.R CHANGED
@@ -46,6 +46,11 @@ ddb_connect <- function(path = ":memory:", remote = FALSE, read_only = FALSE) {
46
  DBI::dbExecute(con, "LOAD spatial")
47
  }
48
  )
 
 
 
 
 
49
  if (remote) {
50
  tryCatch(
51
  DBI::dbExecute(con, "LOAD httpfs"),
 
46
  DBI::dbExecute(con, "LOAD spatial")
47
  }
48
  )
49
+ # Allow DuckDB to use multiple cores and up to 4 GB RAM per connection.
50
+ tryCatch({
51
+ DBI::dbExecute(con, "SET threads = 4")
52
+ DBI::dbExecute(con, "SET memory_limit = '4GB'")
53
+ }, error = function(e) invisible(NULL))
54
  if (remote) {
55
  tryCatch(
56
  DBI::dbExecute(con, "LOAD httpfs"),