File size: 11,144 Bytes
611bfd9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 | from __future__ import annotations
import sys
from types import UnionType
from pathlib import Path
from typing import Any, Union, get_args, get_origin
BACKEND_ROOT = Path(__file__).resolve().parents[1]
if str(BACKEND_ROOT) not in sys.path:
sys.path.insert(0, str(BACKEND_ROOT))
from pots_shutdown_tracker.config import Settings
SECRET_ENV_VARS = {
"POTS_TRACKER_ADMIN_API_KEY",
"POTS_TRACKER_DB_URL",
"POTS_TRACKER_FCC_ECFS_API_KEY",
"POTS_TRACKER_HF_STORAGE_TOKEN",
}
FIELD_DESCRIPTION_OVERRIDES = {
"active_window_post_target_grace_days": "Days a notice can remain active after its target date.",
"admin_api_key": "Shared secret required to authorize admin endpoints.",
"admin_api_key_header": "Header name expected on admin requests.",
"api_prefix": "Base URL prefix mounted in FastAPI.",
"area_risk_airport_promotes_to_direct": (
"When true, `<city> Airport` notices promote to a direct match for searches on `<city>`. "
"Set false to surface them in the nearby-municipality section instead."
),
"app_name": "Human-readable application name.",
"auto_create_schema": "Create the database schema automatically at startup.",
"bulk_lookup_concurrent_workers": "Maximum number of background bulk lookup jobs processed concurrently.",
"bulk_lookup_file_size_mb": "Maximum uploaded bulk lookup workbook size in megabytes.",
"bulk_lookup_max_rows": "Maximum data rows accepted in a bulk lookup workbook.",
"bulk_lookup_retention_days": "Days to retain bulk lookup input and output blobs before cleanup.",
"cors_allow_origins": "Comma-separated list of allowed browser origins.",
"db_max_overflow": "Extra SQLAlchemy pool connections allowed above the base size.",
"db_pool_pre_ping": "Ping pooled database connections before use.",
"db_pool_recycle_seconds": "Lifetime of pooled database connections before recycle.",
"db_pool_size": "Base SQLAlchemy database pool size.",
"db_pool_timeout_seconds": "Seconds to wait for a pooled database connection.",
"db_url": "Database connection string.",
"enable_ai": "Enable AI-backed parsing, summarization, and search helpers.",
"enable_weekly_jobs": "Enable the weekly APScheduler job set.",
"fcc_ecfs_api_key": "Optional ECFS API key reserved for FCC watch discovery.",
"fcc_ecfs_base_url": "Base URL for the FCC ECFS public API.",
"fcc_watch_lookback_months": "Historical lookback window for FCC watch backfill phases.",
"fcc_watch_proceedings": "Comma-separated ECFS proceeding numbers for targeted FCC watch scans.",
"fetch_max_content_length_mb": "Maximum fetched body size in megabytes.",
"fetch_read_timeout_seconds": "Read timeout for fetched responses.",
"fetch_timeout_seconds": "Overall fetch timeout in seconds.",
"frontend_dist_path": "Path to the built frontend bundle served by the app.",
"hf_storage_path_prefix": "Path prefix used for stored blobs in the dataset repo.",
"hf_storage_require_private": "Refuse to use a public Hugging Face dataset repo.",
"hf_storage_repo_id": "Hugging Face dataset repo that stores crawler blobs.",
"hf_storage_revision": "Revision used for Hugging Face dataset uploads and downloads.",
"hf_storage_token": "Write token for the Hugging Face dataset repo.",
"lookback_months": "Active corpus lookback window in months.",
"log_level": "Application log level.",
"openai_api_key": "OpenAI API key used by AI features.",
"query_embedding_cache_enabled": "Enable caching for query embeddings.",
"query_embedding_cache_size": "Maximum number of cached query embeddings.",
"request_timing_enabled": "Emit request timing logs.",
"run_migrations_on_startup": "Run Alembic migrations during app startup.",
"scheduler_enabled_instances": "Number of instances allowed to run scheduler jobs.",
"search_candidate_limit": "Maximum number of candidate notices considered during search.",
"search_refinement_limit": "Maximum number of notices retained after search refinement.",
"search_prewarm_enabled": "Enable search prewarm at startup.",
"search_prewarm_queries": "Comma-separated search queries used for prewarming.",
"search_result_cache_enabled": "Enable caching for search responses.",
"search_result_cache_size": "Maximum number of cached search responses.",
"search_result_cache_ttl_seconds": "TTL for cached search responses in seconds.",
"search_trace_enabled": "Emit detailed search trace logs.",
"search_vector_candidate_limit": "Maximum number of vector candidates considered during search.",
"slow_request_threshold_ms": "Threshold for logging slow requests in milliseconds.",
"source_coverage_matrix_file": "Path to the source coverage matrix JSON file.",
"startup_db_wait_seconds": "Maximum time to wait for the database during startup.",
"storage_backend": "Select the storage backend (`filesystem` or `huggingface_dataset`).",
"storage_path": "Local storage and cache root.",
"serve_frontend": "Serve the frontend bundle from FastAPI.",
"ops_presets_file": "Path to the curated ops preset list.",
"timezone": "Application timezone.",
"trust_matrix_empty_threshold": "Maximum empty-matrix ratio before queryability is disabled.",
"trust_stale_days": "Days after which the corpus is considered stale.",
"user_agent": "User-Agent string used for outbound HTTP requests.",
"weekly_schedule": "Cron expression for the weekly job schedule.",
}
SPECIAL_SCOPE_OVERRIDES = {
"POTS_TRACKER_ADMIN_API_KEY": "hosted",
"POTS_TRACKER_CORS_ALLOW_ORIGINS": "hosted",
"POTS_TRACKER_FCC_ECFS_API_KEY": "hosted",
"POTS_TRACKER_FCC_ECFS_BASE_URL": "hosted",
"POTS_TRACKER_FCC_WATCH_LOOKBACK_MONTHS": "hosted",
"POTS_TRACKER_FCC_WATCH_PROCEEDINGS": "hosted",
"POTS_TRACKER_FRONTEND_DIST_PATH": "hosted",
"POTS_TRACKER_HF_STORAGE_PATH_PREFIX": "hosted",
"POTS_TRACKER_HF_STORAGE_REPO_ID": "hosted",
"POTS_TRACKER_HF_STORAGE_REQUIRE_PRIVATE": "hosted",
"POTS_TRACKER_HF_STORAGE_REVISION": "hosted",
"POTS_TRACKER_HF_STORAGE_TOKEN": "hosted",
"POTS_TRACKER_RUN_MIGRATIONS_ON_STARTUP": "hosted",
"POTS_TRACKER_SERVE_FRONTEND": "hosted",
}
TOKEN_DISPLAY = {
"ai": "AI",
"api": "API",
"att": "AT&T",
"clli": "CLLI",
"db": "DB",
"fcc": "FCC",
"hf": "HF",
"id": "ID",
"ip": "IP",
"json": "JSON",
"lbs": "lbs",
"ops": "ops",
"p90": "p90",
"png": "PNG",
"sql": "SQL",
"ttl": "TTL",
"ui": "UI",
"url": "URL",
"urls": "URLs",
}
def _humanize_token(token: str) -> str:
if token in TOKEN_DISPLAY:
return TOKEN_DISPLAY[token]
if token.isdigit():
return token
if token.isupper():
return token
return token.replace("-", " ").capitalize()
def _humanize_name(name: str) -> str:
return " ".join(_humanize_token(part) for part in name.split("_") if part)
def _render_description(field_name: str, env_name: str) -> str:
if field_name in FIELD_DESCRIPTION_OVERRIDES:
return FIELD_DESCRIPTION_OVERRIDES[field_name]
if field_name.endswith("_index_urls"):
carrier = _humanize_name(field_name.removesuffix("_index_urls"))
return f"Comma-separated index URLs for {carrier}."
if field_name.endswith("_tracker_urls"):
carrier = _humanize_name(field_name.removesuffix("_tracker_urls"))
return f"Comma-separated tracker URLs for {carrier}."
if field_name.endswith("_seed_documents_file"):
carrier = _humanize_name(field_name.removesuffix("_seed_documents_file"))
return f"Path to the seed document URL file for {carrier}."
if field_name.endswith("_document_urls_file"):
carrier = _humanize_name(field_name.removesuffix("_document_urls_file"))
return f"Path to extra document URLs for {carrier}."
if field_name.endswith("_document_urls"):
carrier = _humanize_name(field_name.removesuffix("_document_urls"))
return f"Comma-separated explicit document URLs for {carrier}."
if field_name.endswith("_urls_file"):
carrier = _humanize_name(field_name.removesuffix("_urls_file"))
return f"Path to an additional URL file for {carrier}."
if field_name.endswith("_urls"):
carrier = _humanize_name(field_name.removesuffix("_urls"))
return f"Comma-separated URLs for {carrier}."
return f"{_humanize_name(field_name)} setting."
def _render_scope(env_name: str) -> str:
return SPECIAL_SCOPE_OVERRIDES.get(env_name, "both")
def _render_sensitivity(env_name: str) -> str:
return "secret" if env_name in SECRET_ENV_VARS else "non-secret"
def _render_type(annotation: Any) -> str:
if annotation is Any:
return "Any"
if annotation is type(None):
return "None"
origin = get_origin(annotation)
if origin in {Union, UnionType}:
return " | ".join(_render_type(arg) for arg in get_args(annotation))
if isinstance(annotation, type):
return annotation.__name__
text = str(annotation).replace("typing.", "").replace("pathlib.", "")
return text.replace("<class '", "").replace("'>", "")
def _render_default(value: Any) -> str:
if value is None:
return "`unset`"
if isinstance(value, bool):
return f"`{str(value).lower()}`"
if isinstance(value, Path):
text = value.as_posix()
if not value.is_absolute() and not text.startswith("./"):
text = f"./{text}"
return f"`{text}`"
if isinstance(value, str):
return '`""`' if value == "" else f"`{value}`"
return f"`{value}`"
def iter_reference_rows() -> list[dict[str, str]]:
rows: list[dict[str, str]] = []
for field_name, field in Settings.model_fields.items():
env_name = field.alias or field_name
if not env_name.startswith("POTS_TRACKER_"):
continue
rows.append(
{
"name": env_name,
"type": _render_type(field.annotation),
"default": _render_default(field.default),
"description": _render_description(field_name, env_name),
"scope": _render_scope(env_name),
"sensitivity": _render_sensitivity(env_name),
}
)
return rows
def render_env_reference() -> str:
rows = iter_reference_rows()
lines = [
"# Environment Reference",
"",
"Generated from `backend/app/pots_shutdown_tracker/config.py`. Regenerate this file with `python backend/scripts/dump_env_reference.py > docs/ENV.md` after changing `Settings`.",
"",
"| Name | Type | Default | Description | Scope | Sensitivity |",
"| --- | --- | --- | --- | --- | --- |",
]
for row in rows:
lines.append(
f"| `{row['name']}` | {row['type']} | {row['default']} | {row['description']} | {row['scope']} | {row['sensitivity']} |"
)
lines.append("")
return "\n".join(lines)
def main() -> None:
sys.stdout.write(render_env_reference())
if __name__ == "__main__":
main()
|