File size: 11,144 Bytes
611bfd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
from __future__ import annotations

import sys
from types import UnionType
from pathlib import Path
from typing import Any, Union, get_args, get_origin

BACKEND_ROOT = Path(__file__).resolve().parents[1]
if str(BACKEND_ROOT) not in sys.path:
    sys.path.insert(0, str(BACKEND_ROOT))

from pots_shutdown_tracker.config import Settings


SECRET_ENV_VARS = {
    "POTS_TRACKER_ADMIN_API_KEY",
    "POTS_TRACKER_DB_URL",
    "POTS_TRACKER_FCC_ECFS_API_KEY",
    "POTS_TRACKER_HF_STORAGE_TOKEN",
}

FIELD_DESCRIPTION_OVERRIDES = {
    "active_window_post_target_grace_days": "Days a notice can remain active after its target date.",
    "admin_api_key": "Shared secret required to authorize admin endpoints.",
    "admin_api_key_header": "Header name expected on admin requests.",
    "api_prefix": "Base URL prefix mounted in FastAPI.",
    "area_risk_airport_promotes_to_direct": (
        "When true, `<city> Airport` notices promote to a direct match for searches on `<city>`. "
        "Set false to surface them in the nearby-municipality section instead."
    ),
    "app_name": "Human-readable application name.",
    "auto_create_schema": "Create the database schema automatically at startup.",
    "bulk_lookup_concurrent_workers": "Maximum number of background bulk lookup jobs processed concurrently.",
    "bulk_lookup_file_size_mb": "Maximum uploaded bulk lookup workbook size in megabytes.",
    "bulk_lookup_max_rows": "Maximum data rows accepted in a bulk lookup workbook.",
    "bulk_lookup_retention_days": "Days to retain bulk lookup input and output blobs before cleanup.",
    "cors_allow_origins": "Comma-separated list of allowed browser origins.",
    "db_max_overflow": "Extra SQLAlchemy pool connections allowed above the base size.",
    "db_pool_pre_ping": "Ping pooled database connections before use.",
    "db_pool_recycle_seconds": "Lifetime of pooled database connections before recycle.",
    "db_pool_size": "Base SQLAlchemy database pool size.",
    "db_pool_timeout_seconds": "Seconds to wait for a pooled database connection.",
    "db_url": "Database connection string.",
    "enable_ai": "Enable AI-backed parsing, summarization, and search helpers.",
    "enable_weekly_jobs": "Enable the weekly APScheduler job set.",
    "fcc_ecfs_api_key": "Optional ECFS API key reserved for FCC watch discovery.",
    "fcc_ecfs_base_url": "Base URL for the FCC ECFS public API.",
    "fcc_watch_lookback_months": "Historical lookback window for FCC watch backfill phases.",
    "fcc_watch_proceedings": "Comma-separated ECFS proceeding numbers for targeted FCC watch scans.",
    "fetch_max_content_length_mb": "Maximum fetched body size in megabytes.",
    "fetch_read_timeout_seconds": "Read timeout for fetched responses.",
    "fetch_timeout_seconds": "Overall fetch timeout in seconds.",
    "frontend_dist_path": "Path to the built frontend bundle served by the app.",
    "hf_storage_path_prefix": "Path prefix used for stored blobs in the dataset repo.",
    "hf_storage_require_private": "Refuse to use a public Hugging Face dataset repo.",
    "hf_storage_repo_id": "Hugging Face dataset repo that stores crawler blobs.",
    "hf_storage_revision": "Revision used for Hugging Face dataset uploads and downloads.",
    "hf_storage_token": "Write token for the Hugging Face dataset repo.",
    "lookback_months": "Active corpus lookback window in months.",
    "log_level": "Application log level.",
    "openai_api_key": "OpenAI API key used by AI features.",
    "query_embedding_cache_enabled": "Enable caching for query embeddings.",
    "query_embedding_cache_size": "Maximum number of cached query embeddings.",
    "request_timing_enabled": "Emit request timing logs.",
    "run_migrations_on_startup": "Run Alembic migrations during app startup.",
    "scheduler_enabled_instances": "Number of instances allowed to run scheduler jobs.",
    "search_candidate_limit": "Maximum number of candidate notices considered during search.",
    "search_refinement_limit": "Maximum number of notices retained after search refinement.",
    "search_prewarm_enabled": "Enable search prewarm at startup.",
    "search_prewarm_queries": "Comma-separated search queries used for prewarming.",
    "search_result_cache_enabled": "Enable caching for search responses.",
    "search_result_cache_size": "Maximum number of cached search responses.",
    "search_result_cache_ttl_seconds": "TTL for cached search responses in seconds.",
    "search_trace_enabled": "Emit detailed search trace logs.",
    "search_vector_candidate_limit": "Maximum number of vector candidates considered during search.",
    "slow_request_threshold_ms": "Threshold for logging slow requests in milliseconds.",
    "source_coverage_matrix_file": "Path to the source coverage matrix JSON file.",
    "startup_db_wait_seconds": "Maximum time to wait for the database during startup.",
    "storage_backend": "Select the storage backend (`filesystem` or `huggingface_dataset`).",
    "storage_path": "Local storage and cache root.",
    "serve_frontend": "Serve the frontend bundle from FastAPI.",
    "ops_presets_file": "Path to the curated ops preset list.",
    "timezone": "Application timezone.",
    "trust_matrix_empty_threshold": "Maximum empty-matrix ratio before queryability is disabled.",
    "trust_stale_days": "Days after which the corpus is considered stale.",
    "user_agent": "User-Agent string used for outbound HTTP requests.",
    "weekly_schedule": "Cron expression for the weekly job schedule.",
}

SPECIAL_SCOPE_OVERRIDES = {
    "POTS_TRACKER_ADMIN_API_KEY": "hosted",
    "POTS_TRACKER_CORS_ALLOW_ORIGINS": "hosted",
    "POTS_TRACKER_FCC_ECFS_API_KEY": "hosted",
    "POTS_TRACKER_FCC_ECFS_BASE_URL": "hosted",
    "POTS_TRACKER_FCC_WATCH_LOOKBACK_MONTHS": "hosted",
    "POTS_TRACKER_FCC_WATCH_PROCEEDINGS": "hosted",
    "POTS_TRACKER_FRONTEND_DIST_PATH": "hosted",
    "POTS_TRACKER_HF_STORAGE_PATH_PREFIX": "hosted",
    "POTS_TRACKER_HF_STORAGE_REPO_ID": "hosted",
    "POTS_TRACKER_HF_STORAGE_REQUIRE_PRIVATE": "hosted",
    "POTS_TRACKER_HF_STORAGE_REVISION": "hosted",
    "POTS_TRACKER_HF_STORAGE_TOKEN": "hosted",
    "POTS_TRACKER_RUN_MIGRATIONS_ON_STARTUP": "hosted",
    "POTS_TRACKER_SERVE_FRONTEND": "hosted",
}

TOKEN_DISPLAY = {
    "ai": "AI",
    "api": "API",
    "att": "AT&T",
    "clli": "CLLI",
    "db": "DB",
    "fcc": "FCC",
    "hf": "HF",
    "id": "ID",
    "ip": "IP",
    "json": "JSON",
    "lbs": "lbs",
    "ops": "ops",
    "p90": "p90",
    "png": "PNG",
    "sql": "SQL",
    "ttl": "TTL",
    "ui": "UI",
    "url": "URL",
    "urls": "URLs",
}


def _humanize_token(token: str) -> str:
    if token in TOKEN_DISPLAY:
        return TOKEN_DISPLAY[token]
    if token.isdigit():
        return token
    if token.isupper():
        return token
    return token.replace("-", " ").capitalize()


def _humanize_name(name: str) -> str:
    return " ".join(_humanize_token(part) for part in name.split("_") if part)


def _render_description(field_name: str, env_name: str) -> str:
    if field_name in FIELD_DESCRIPTION_OVERRIDES:
        return FIELD_DESCRIPTION_OVERRIDES[field_name]

    if field_name.endswith("_index_urls"):
        carrier = _humanize_name(field_name.removesuffix("_index_urls"))
        return f"Comma-separated index URLs for {carrier}."
    if field_name.endswith("_tracker_urls"):
        carrier = _humanize_name(field_name.removesuffix("_tracker_urls"))
        return f"Comma-separated tracker URLs for {carrier}."
    if field_name.endswith("_seed_documents_file"):
        carrier = _humanize_name(field_name.removesuffix("_seed_documents_file"))
        return f"Path to the seed document URL file for {carrier}."
    if field_name.endswith("_document_urls_file"):
        carrier = _humanize_name(field_name.removesuffix("_document_urls_file"))
        return f"Path to extra document URLs for {carrier}."
    if field_name.endswith("_document_urls"):
        carrier = _humanize_name(field_name.removesuffix("_document_urls"))
        return f"Comma-separated explicit document URLs for {carrier}."
    if field_name.endswith("_urls_file"):
        carrier = _humanize_name(field_name.removesuffix("_urls_file"))
        return f"Path to an additional URL file for {carrier}."
    if field_name.endswith("_urls"):
        carrier = _humanize_name(field_name.removesuffix("_urls"))
        return f"Comma-separated URLs for {carrier}."

    return f"{_humanize_name(field_name)} setting."


def _render_scope(env_name: str) -> str:
    return SPECIAL_SCOPE_OVERRIDES.get(env_name, "both")


def _render_sensitivity(env_name: str) -> str:
    return "secret" if env_name in SECRET_ENV_VARS else "non-secret"


def _render_type(annotation: Any) -> str:
    if annotation is Any:
        return "Any"
    if annotation is type(None):
        return "None"
    origin = get_origin(annotation)
    if origin in {Union, UnionType}:
        return " | ".join(_render_type(arg) for arg in get_args(annotation))
    if isinstance(annotation, type):
        return annotation.__name__
    text = str(annotation).replace("typing.", "").replace("pathlib.", "")
    return text.replace("<class '", "").replace("'>", "")


def _render_default(value: Any) -> str:
    if value is None:
        return "`unset`"
    if isinstance(value, bool):
        return f"`{str(value).lower()}`"
    if isinstance(value, Path):
        text = value.as_posix()
        if not value.is_absolute() and not text.startswith("./"):
            text = f"./{text}"
        return f"`{text}`"
    if isinstance(value, str):
        return '`""`' if value == "" else f"`{value}`"
    return f"`{value}`"


def iter_reference_rows() -> list[dict[str, str]]:
    rows: list[dict[str, str]] = []
    for field_name, field in Settings.model_fields.items():
        env_name = field.alias or field_name
        if not env_name.startswith("POTS_TRACKER_"):
            continue
        rows.append(
            {
                "name": env_name,
                "type": _render_type(field.annotation),
                "default": _render_default(field.default),
                "description": _render_description(field_name, env_name),
                "scope": _render_scope(env_name),
                "sensitivity": _render_sensitivity(env_name),
            }
        )
    return rows


def render_env_reference() -> str:
    rows = iter_reference_rows()
    lines = [
        "# Environment Reference",
        "",
        "Generated from `backend/app/pots_shutdown_tracker/config.py`. Regenerate this file with `python backend/scripts/dump_env_reference.py > docs/ENV.md` after changing `Settings`.",
        "",
        "| Name | Type | Default | Description | Scope | Sensitivity |",
        "| --- | --- | --- | --- | --- | --- |",
    ]
    for row in rows:
        lines.append(
            f"| `{row['name']}` | {row['type']} | {row['default']} | {row['description']} | {row['scope']} | {row['sensitivity']} |"
        )
    lines.append("")
    return "\n".join(lines)


def main() -> None:
    sys.stdout.write(render_env_reference())


if __name__ == "__main__":
    main()