cjerzak commited on
Commit
a59174a
·
1 Parent(s): 5a732e8
Files changed (4) hide show
  1. Dockerfile +6 -1
  2. R/asa_api_helpers.R +105 -3
  3. README.md +50 -12
  4. Tests/example_API_call.R +1 -1
Dockerfile CHANGED
@@ -3,6 +3,9 @@ FROM rocker/r2u:24.04
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  TZ=Etc/UTC \
5
  PORT=7860 \
 
 
 
6
  RETICULATE_MINICONDA_PATH=/opt/conda \
7
  RETICULATE_CONDA=/opt/conda/bin/conda \
8
  RETICULATE_PYTHON=/opt/conda/envs/asa_env/bin/python \
@@ -15,6 +18,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
15
  curl \
16
  wget \
17
  git \
 
18
  bzip2 \
19
  build-essential \
20
  libcurl4-openssl-dev \
@@ -71,7 +75,8 @@ RUN git clone --depth 1 --branch "${ASA_SOFTWARE_REF}" "${ASA_SOFTWARE_REPO}" /o
71
 
72
  WORKDIR /app
73
  COPY . /app
 
74
 
75
  EXPOSE 7860
76
 
77
- CMD ["Rscript", "-e", "pr <- plumber::plumb('R/plumber.R'); pr$run(host='0.0.0.0', port=as.integer(Sys.getenv('PORT', '7860')))" ]
 
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  TZ=Etc/UTC \
5
  PORT=7860 \
6
+ ASA_PROXY=socks5h://127.0.0.1:9050 \
7
+ TOR_CONTROL_PORT=9051 \
8
+ ASA_TOR_CONTROL_COOKIE=/tmp/tor/control.authcookie \
9
  RETICULATE_MINICONDA_PATH=/opt/conda \
10
  RETICULATE_CONDA=/opt/conda/bin/conda \
11
  RETICULATE_PYTHON=/opt/conda/envs/asa_env/bin/python \
 
18
  curl \
19
  wget \
20
  git \
21
+ tor \
22
  bzip2 \
23
  build-essential \
24
  libcurl4-openssl-dev \
 
75
 
76
  WORKDIR /app
77
  COPY . /app
78
+ RUN chmod +x /app/scripts/start-with-tor.sh
79
 
80
  EXPOSE 7860
81
 
82
+ CMD ["/app/scripts/start-with-tor.sh"]
R/asa_api_helpers.R CHANGED
@@ -48,6 +48,18 @@ asa_api_scalar_num <- function(value, default = NA_real_) {
48
  }
49
  }
50
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  asa_api_named_list <- function(value) {
52
  if (is.null(value)) {
53
  return(list())
@@ -119,6 +131,94 @@ asa_api_apply_env_defaults <- function() {
119
  }
120
  }
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  asa_api_bootstrap <- function() {
123
  asa_api_apply_env_defaults()
124
  if (!requireNamespace("asa", quietly = TRUE)) {
@@ -362,9 +462,11 @@ asa_api_run_batch <- function(payload) {
362
  asa_api_health_payload <- function(boot_error = NULL) {
363
  asa_installed <- requireNamespace("asa", quietly = TRUE)
364
  has_boot_error <- is.character(boot_error) && nzchar(trimws(boot_error))
 
 
365
 
366
- list(
367
- status = if (asa_installed && !has_boot_error) "ok" else "degraded",
368
  service = "asa-api",
369
  time_utc = format(Sys.time(), tz = "UTC", usetz = TRUE),
370
  asa_installed = asa_installed,
@@ -375,5 +477,5 @@ asa_api_health_payload <- function(boot_error = NULL) {
375
  conda_env = getOption("asa.default_conda_env", "asa_env"),
376
  use_browser = asa_api_to_bool(Sys.getenv("ASA_USE_BROWSER_DEFAULT", unset = "false"), default = FALSE)
377
  )
378
- )
379
  }
 
48
  }
49
  }
50
 
51
+ asa_api_scalar_int <- function(value, default = NA_integer_) {
52
+ if (is.null(value) || length(value) == 0L) {
53
+ return(default)
54
+ }
55
+ number <- suppressWarnings(as.integer(value[[1]]))
56
+ if (is.na(number)) {
57
+ default
58
+ } else {
59
+ number
60
+ }
61
+ }
62
+
63
  asa_api_named_list <- function(value) {
64
  if (is.null(value)) {
65
  return(list())
 
131
  }
132
  }
133
 
134
+ asa_api_parse_proxy_url <- function(proxy_url) {
135
+ proxy_url <- trimws(asa_api_scalar_chr(proxy_url, default = ""))
136
+ if (!nzchar(proxy_url)) {
137
+ return(NULL)
138
+ }
139
+
140
+ matches <- regexec("^([A-Za-z][A-Za-z0-9+.-]*)://([^/:?#]+):(\\d+)$", proxy_url)
141
+ parsed <- regmatches(proxy_url, matches)[[1]]
142
+ if (length(parsed) != 4L) {
143
+ return(NULL)
144
+ }
145
+
146
+ list(
147
+ scheme = parsed[[2]],
148
+ host = parsed[[3]],
149
+ port = asa_api_scalar_int(parsed[[4]], default = NA_integer_)
150
+ )
151
+ }
152
+
153
+ asa_api_port_open <- function(host, port, timeout = 1) {
154
+ host <- trimws(asa_api_scalar_chr(host, default = ""))
155
+ port <- asa_api_scalar_int(port, default = NA_integer_)
156
+ if (!nzchar(host) || is.na(port) || port <= 0L) {
157
+ return(FALSE)
158
+ }
159
+
160
+ con <- NULL
161
+ tryCatch(
162
+ {
163
+ con <- socketConnection(
164
+ host = host,
165
+ port = as.integer(port),
166
+ open = "r+b",
167
+ blocking = TRUE,
168
+ timeout = as.numeric(timeout)
169
+ )
170
+ TRUE
171
+ },
172
+ error = function(e) FALSE,
173
+ finally = {
174
+ if (!is.null(con)) {
175
+ try(close(con), silent = TRUE)
176
+ }
177
+ }
178
+ )
179
+ }
180
+
181
+ asa_api_tor_health <- function() {
182
+ proxy_url <- trimws(Sys.getenv("ASA_PROXY", unset = ""))
183
+ proxy_info <- asa_api_parse_proxy_url(proxy_url)
184
+ control_port <- asa_api_scalar_int(Sys.getenv("TOR_CONTROL_PORT", unset = ""), default = NA_integer_)
185
+ cookie_path <- trimws(Sys.getenv("ASA_TOR_CONTROL_COOKIE", unset = ""))
186
+
187
+ tor_enabled <- nzchar(proxy_url)
188
+ tor_proxy_host <- if (!is.null(proxy_info)) proxy_info$host else NULL
189
+ tor_proxy_port <- if (!is.null(proxy_info)) proxy_info$port else NULL
190
+ tor_proxy_port_open <- if (!is.null(proxy_info)) {
191
+ asa_api_port_open(proxy_info$host, proxy_info$port)
192
+ } else {
193
+ FALSE
194
+ }
195
+ tor_control_port_open <- if (!is.na(control_port) && control_port > 0L) {
196
+ asa_api_port_open("127.0.0.1", control_port)
197
+ } else {
198
+ FALSE
199
+ }
200
+ tor_cookie_present <- nzchar(cookie_path) && file.exists(cookie_path)
201
+ tor_cookie_readable <- tor_cookie_present && file.access(cookie_path, 4L) == 0
202
+ tor_ready <- tor_enabled &&
203
+ isTRUE(tor_proxy_port_open) &&
204
+ isTRUE(tor_control_port_open) &&
205
+ isTRUE(tor_cookie_readable)
206
+
207
+ list(
208
+ tor_enabled = tor_enabled,
209
+ tor_ready = tor_ready,
210
+ tor_proxy = if (tor_enabled) proxy_url else NULL,
211
+ tor_proxy_host = tor_proxy_host,
212
+ tor_proxy_port = tor_proxy_port,
213
+ tor_proxy_port_open = tor_proxy_port_open,
214
+ tor_control_port = if (!is.na(control_port) && control_port > 0L) control_port else NULL,
215
+ tor_control_port_open = tor_control_port_open,
216
+ tor_cookie_path = if (nzchar(cookie_path)) cookie_path else NULL,
217
+ tor_cookie_present = tor_cookie_present,
218
+ tor_cookie_readable = tor_cookie_readable
219
+ )
220
+ }
221
+
222
  asa_api_bootstrap <- function() {
223
  asa_api_apply_env_defaults()
224
  if (!requireNamespace("asa", quietly = TRUE)) {
 
462
  asa_api_health_payload <- function(boot_error = NULL) {
463
  asa_installed <- requireNamespace("asa", quietly = TRUE)
464
  has_boot_error <- is.character(boot_error) && nzchar(trimws(boot_error))
465
+ tor_health <- asa_api_tor_health()
466
+ healthy <- asa_installed && !has_boot_error && (!isTRUE(tor_health$tor_enabled) || isTRUE(tor_health$tor_ready))
467
 
468
+ c(list(
469
+ status = if (healthy) "ok" else "degraded",
470
  service = "asa-api",
471
  time_utc = format(Sys.time(), tz = "UTC", usetz = TRUE),
472
  asa_installed = asa_installed,
 
477
  conda_env = getOption("asa.default_conda_env", "asa_env"),
478
  use_browser = asa_api_to_bool(Sys.getenv("ASA_USE_BROWSER_DEFAULT", unset = "false"), default = FALSE)
479
  )
480
+ ), tor_health)
481
  }
README.md CHANGED
@@ -8,16 +8,17 @@ pinned: false
8
  license: mit
9
  ---
10
 
11
- # asa-api (Hugging Face Space)
12
 
13
- `asa-api` is a Docker-based API + GUI wrapper around [`asa::run_task()`](https://github.com/cjerzak/asa-software) designed for deployment as a Hugging Face Space.
14
 
15
  It uses:
16
  - `R` + `plumber` for HTTP endpoints
17
  - The `asa` package for orchestration (`run_task`, `run_task_batch`)
18
  - A lightweight password-protected browser GUI at `/`
 
19
 
20
- ## What This Space Exposes
21
 
22
  1. `GET /healthz`
23
  2. `POST /v1/run` for a single prompt
@@ -35,9 +36,9 @@ It uses:
35
  - GUI auth is password-based:
36
  - `/gui/query` checks `GUI_PASSWORD`
37
 
38
- ## Required / Recommended HF Space Secrets
39
 
40
- Set these in the Hugging Face Space (`Settings` -> `Variables and secrets`):
41
 
42
  - `GOOGLE_API_KEY` (or the provider key you use)
43
  - `GUI_PASSWORD`
@@ -48,7 +49,7 @@ Optional secrets / vars:
48
  - `ASA_DEFAULT_BACKEND` (defaults to `gemini` if unset; examples: `openai`, `groq`, `anthropic`, `gemini`, `openrouter`)
49
  - `ASA_DEFAULT_MODEL` (example: `gemini-2.5-flash`)
50
  - `ASA_CONDA_ENV` (default: `asa_env`)
51
- - `ASA_USE_BROWSER_DEFAULT` (default: `false`, recommended for Space stability)
52
  - `CORS_ALLOW_ORIGIN` (default: `*`)
53
 
54
  Provider-specific keys supported by `asa` include:
@@ -65,13 +66,13 @@ Provider-specific keys supported by `asa` include:
65
  ### 1) Health check
66
 
67
  ```bash
68
- curl -s https://<your-space>.hf.space/healthz
69
  ```
70
 
71
  ### 2) Single query
72
 
73
  ```bash
74
- curl -s https://<your-space>.hf.space/v1/run \
75
  -H "Content-Type: application/json" \
76
  -H "Authorization: Bearer $ASA_API_KEY" \
77
  -d '{
@@ -90,7 +91,7 @@ curl -s https://<your-space>.hf.space/v1/run \
90
  ### 3) Structured JSON output
91
 
92
  ```bash
93
- curl -s https://<your-space>.hf.space/v1/run \
94
  -H "Content-Type: application/json" \
95
  -d '{
96
  "prompt": "Find Marie Curie birth year and nationality. Return JSON.",
@@ -103,7 +104,7 @@ curl -s https://<your-space>.hf.space/v1/run \
103
  ### 4) Batch query
104
 
105
  ```bash
106
- curl -s https://<your-space>.hf.space/v1/batch \
107
  -H "Content-Type: application/json" \
108
  -d '{
109
  "prompts": [
@@ -179,6 +180,7 @@ R dependency strategy in this image:
179
  - Core R runtime packages (`plumber`, `jsonlite`, `reticulate`, `remotes`) are installed as binary apt packages (`r-cran-*`), not compiled from CRAN source.
180
  - This avoids source-compile failures such as `sodium` -> `plumber` install breaks.
181
  - Runtime linker guardrails are set so `reticulate` prefers conda environment libraries (`/opt/conda/envs/asa_env/lib` and `/opt/conda/lib`) to avoid C++ ABI loader mismatches.
 
182
 
183
  Build args:
184
 
@@ -205,8 +207,44 @@ Then open:
205
  - `http://localhost:7860/healthz`
206
  - `http://localhost:7860/`
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  ## Notes
209
 
210
- - Browser/Selenium tier is disabled by default (`use_browser = FALSE`) for better reliability in Space containers.
211
  - If you want browser tier, set `config.use_browser = true` explicitly per request and ensure supporting binaries are installed.
212
- - If startup fails with Python import/linker errors (for example `CXXABI_1.3.15 not found`), check Space startup logs and verify `GET /healthz` for `boot_error` details.
 
8
  license: mit
9
  ---
10
 
11
+ # asa-api (Self-Hosted Docker + Tor)
12
 
13
+ `asa-api` is a self-hosted Docker API + GUI wrapper around [`asa::run_task()`](https://github.com/cjerzak/asa-software).
14
 
15
  It uses:
16
  - `R` + `plumber` for HTTP endpoints
17
  - The `asa` package for orchestration (`run_task`, `run_task_batch`)
18
  - A lightweight password-protected browser GUI at `/`
19
+ - A local Tor daemon for search/web egress inside the container
20
 
21
+ ## What This Service Exposes
22
 
23
  1. `GET /healthz`
24
  2. `POST /v1/run` for a single prompt
 
36
  - GUI auth is password-based:
37
  - `/gui/query` checks `GUI_PASSWORD`
38
 
39
+ ## Required Environment Variables
40
 
41
+ Set these when running the container:
42
 
43
  - `GOOGLE_API_KEY` (or the provider key you use)
44
  - `GUI_PASSWORD`
 
49
  - `ASA_DEFAULT_BACKEND` (defaults to `gemini` if unset; examples: `openai`, `groq`, `anthropic`, `gemini`, `openrouter`)
50
  - `ASA_DEFAULT_MODEL` (example: `gemini-2.5-flash`)
51
  - `ASA_CONDA_ENV` (default: `asa_env`)
52
+ - `ASA_USE_BROWSER_DEFAULT` (default: `false`, recommended for container stability)
53
  - `CORS_ALLOW_ORIGIN` (default: `*`)
54
 
55
  Provider-specific keys supported by `asa` include:
 
66
  ### 1) Health check
67
 
68
  ```bash
69
+ curl -s http://localhost:7860/healthz
70
  ```
71
 
72
  ### 2) Single query
73
 
74
  ```bash
75
+ curl -s http://localhost:7860/v1/run \
76
  -H "Content-Type: application/json" \
77
  -H "Authorization: Bearer $ASA_API_KEY" \
78
  -d '{
 
91
  ### 3) Structured JSON output
92
 
93
  ```bash
94
+ curl -s http://localhost:7860/v1/run \
95
  -H "Content-Type: application/json" \
96
  -d '{
97
  "prompt": "Find Marie Curie birth year and nationality. Return JSON.",
 
104
  ### 4) Batch query
105
 
106
  ```bash
107
+ curl -s http://localhost:7860/v1/batch \
108
  -H "Content-Type: application/json" \
109
  -d '{
110
  "prompts": [
 
180
  - Core R runtime packages (`plumber`, `jsonlite`, `reticulate`, `remotes`) are installed as binary apt packages (`r-cran-*`), not compiled from CRAN source.
181
  - This avoids source-compile failures such as `sodium` -> `plumber` install breaks.
182
  - Runtime linker guardrails are set so `reticulate` prefers conda environment libraries (`/opt/conda/envs/asa_env/lib` and `/opt/conda/lib`) to avoid C++ ABI loader mismatches.
183
+ - Tor is installed in the image and started before the API. If Tor does not become ready, the container exits instead of serving direct search traffic.
184
 
185
  Build args:
186
 
 
207
  - `http://localhost:7860/healthz`
208
  - `http://localhost:7860/`
209
 
210
+ ## Tor Deployment Defaults
211
+
212
+ The container deploys Tor locally and exports these runtime defaults before starting `plumber`:
213
+
214
+ - `ASA_PROXY=socks5h://127.0.0.1:9050`
215
+ - `TOR_CONTROL_PORT=9051`
216
+ - `ASA_TOR_CONTROL_COOKIE=/tmp/tor/control.authcookie`
217
+
218
+ Tor scope is intentionally limited:
219
+
220
+ - Search and webpage retrieval traffic uses Tor.
221
+ - LLM provider API traffic remains direct, matching upstream `asa` behavior.
222
+ - Browser/Selenium search remains disabled by default.
223
+
224
+ Startup behavior is fail-closed:
225
+
226
+ - Tor must answer a probe to `https://check.torproject.org/api/ip` with `IsTor=true`.
227
+ - If the SOCKS proxy, ControlPort, or cookie setup never becomes ready, the container exits non-zero.
228
+
229
+ `GET /healthz` now includes Tor readiness fields:
230
+
231
+ - `tor_enabled`
232
+ - `tor_ready`
233
+ - `tor_proxy`
234
+ - `tor_control_port`
235
+ - `tor_cookie_present`
236
+ - `tor_cookie_readable`
237
+
238
+ You can override the local proxy defaults if needed:
239
+
240
+ - `ASA_PROXY`
241
+ - `TOR_CONTROL_PORT`
242
+ - `ASA_TOR_CONTROL_COOKIE`
243
+ - `ASA_TOR_PROBE_URL`
244
+ - `ASA_TOR_STARTUP_TIMEOUT_SEC`
245
+
246
  ## Notes
247
 
248
+ - Browser/Selenium tier is disabled by default (`use_browser = FALSE`) for better reliability in minimal containers.
249
  - If you want browser tier, set `config.use_browser = true` explicitly per request and ensure supporting binaries are installed.
250
+ - If startup fails with Python import/linker errors (for example `CXXABI_1.3.15 not found`), inspect container logs and verify `GET /healthz` for `boot_error` details once the service is up.
Tests/example_API_call.R CHANGED
@@ -2,7 +2,7 @@
2
  library(httr2)
3
  library(jsonlite)
4
 
5
- base_url <- "https://cjerzak-asa-api.hf.space"
6
 
7
  resp <- request(paste0(base_url, "/v1/run")) |>
8
  req_headers(`Content-Type` = "application/json") |>
 
2
  library(httr2)
3
  library(jsonlite)
4
 
5
+ base_url <- "http://localhost:7860"
6
 
7
  resp <- request(paste0(base_url, "/v1/run")) |>
8
  req_headers(`Content-Type` = "application/json") |>