pots-shutdown-tracker / backend /scripts /dump_env_reference.py
github-actions
Deploy e9638c4ddc3ed29a18779b38f43922aa3139b311
611bfd9
from __future__ import annotations
import sys
from types import UnionType
from pathlib import Path
from typing import Any, Union, get_args, get_origin
BACKEND_ROOT = Path(__file__).resolve().parents[1]
if str(BACKEND_ROOT) not in sys.path:
sys.path.insert(0, str(BACKEND_ROOT))
from pots_shutdown_tracker.config import Settings
SECRET_ENV_VARS = {
"POTS_TRACKER_ADMIN_API_KEY",
"POTS_TRACKER_DB_URL",
"POTS_TRACKER_FCC_ECFS_API_KEY",
"POTS_TRACKER_HF_STORAGE_TOKEN",
}
FIELD_DESCRIPTION_OVERRIDES = {
"active_window_post_target_grace_days": "Days a notice can remain active after its target date.",
"admin_api_key": "Shared secret required to authorize admin endpoints.",
"admin_api_key_header": "Header name expected on admin requests.",
"api_prefix": "Base URL prefix mounted in FastAPI.",
"area_risk_airport_promotes_to_direct": (
"When true, `<city> Airport` notices promote to a direct match for searches on `<city>`. "
"Set false to surface them in the nearby-municipality section instead."
),
"app_name": "Human-readable application name.",
"auto_create_schema": "Create the database schema automatically at startup.",
"bulk_lookup_concurrent_workers": "Maximum number of background bulk lookup jobs processed concurrently.",
"bulk_lookup_file_size_mb": "Maximum uploaded bulk lookup workbook size in megabytes.",
"bulk_lookup_max_rows": "Maximum data rows accepted in a bulk lookup workbook.",
"bulk_lookup_retention_days": "Days to retain bulk lookup input and output blobs before cleanup.",
"cors_allow_origins": "Comma-separated list of allowed browser origins.",
"db_max_overflow": "Extra SQLAlchemy pool connections allowed above the base size.",
"db_pool_pre_ping": "Ping pooled database connections before use.",
"db_pool_recycle_seconds": "Lifetime of pooled database connections before recycle.",
"db_pool_size": "Base SQLAlchemy database pool size.",
"db_pool_timeout_seconds": "Seconds to wait for a pooled database connection.",
"db_url": "Database connection string.",
"enable_ai": "Enable AI-backed parsing, summarization, and search helpers.",
"enable_weekly_jobs": "Enable the weekly APScheduler job set.",
"fcc_ecfs_api_key": "Optional ECFS API key reserved for FCC watch discovery.",
"fcc_ecfs_base_url": "Base URL for the FCC ECFS public API.",
"fcc_watch_lookback_months": "Historical lookback window for FCC watch backfill phases.",
"fcc_watch_proceedings": "Comma-separated ECFS proceeding numbers for targeted FCC watch scans.",
"fetch_max_content_length_mb": "Maximum fetched body size in megabytes.",
"fetch_read_timeout_seconds": "Read timeout for fetched responses.",
"fetch_timeout_seconds": "Overall fetch timeout in seconds.",
"frontend_dist_path": "Path to the built frontend bundle served by the app.",
"hf_storage_path_prefix": "Path prefix used for stored blobs in the dataset repo.",
"hf_storage_require_private": "Refuse to use a public Hugging Face dataset repo.",
"hf_storage_repo_id": "Hugging Face dataset repo that stores crawler blobs.",
"hf_storage_revision": "Revision used for Hugging Face dataset uploads and downloads.",
"hf_storage_token": "Write token for the Hugging Face dataset repo.",
"lookback_months": "Active corpus lookback window in months.",
"log_level": "Application log level.",
"openai_api_key": "OpenAI API key used by AI features.",
"query_embedding_cache_enabled": "Enable caching for query embeddings.",
"query_embedding_cache_size": "Maximum number of cached query embeddings.",
"request_timing_enabled": "Emit request timing logs.",
"run_migrations_on_startup": "Run Alembic migrations during app startup.",
"scheduler_enabled_instances": "Number of instances allowed to run scheduler jobs.",
"search_candidate_limit": "Maximum number of candidate notices considered during search.",
"search_refinement_limit": "Maximum number of notices retained after search refinement.",
"search_prewarm_enabled": "Enable search prewarm at startup.",
"search_prewarm_queries": "Comma-separated search queries used for prewarming.",
"search_result_cache_enabled": "Enable caching for search responses.",
"search_result_cache_size": "Maximum number of cached search responses.",
"search_result_cache_ttl_seconds": "TTL for cached search responses in seconds.",
"search_trace_enabled": "Emit detailed search trace logs.",
"search_vector_candidate_limit": "Maximum number of vector candidates considered during search.",
"slow_request_threshold_ms": "Threshold for logging slow requests in milliseconds.",
"source_coverage_matrix_file": "Path to the source coverage matrix JSON file.",
"startup_db_wait_seconds": "Maximum time to wait for the database during startup.",
"storage_backend": "Select the storage backend (`filesystem` or `huggingface_dataset`).",
"storage_path": "Local storage and cache root.",
"serve_frontend": "Serve the frontend bundle from FastAPI.",
"ops_presets_file": "Path to the curated ops preset list.",
"timezone": "Application timezone.",
"trust_matrix_empty_threshold": "Maximum empty-matrix ratio before queryability is disabled.",
"trust_stale_days": "Days after which the corpus is considered stale.",
"user_agent": "User-Agent string used for outbound HTTP requests.",
"weekly_schedule": "Cron expression for the weekly job schedule.",
}
SPECIAL_SCOPE_OVERRIDES = {
"POTS_TRACKER_ADMIN_API_KEY": "hosted",
"POTS_TRACKER_CORS_ALLOW_ORIGINS": "hosted",
"POTS_TRACKER_FCC_ECFS_API_KEY": "hosted",
"POTS_TRACKER_FCC_ECFS_BASE_URL": "hosted",
"POTS_TRACKER_FCC_WATCH_LOOKBACK_MONTHS": "hosted",
"POTS_TRACKER_FCC_WATCH_PROCEEDINGS": "hosted",
"POTS_TRACKER_FRONTEND_DIST_PATH": "hosted",
"POTS_TRACKER_HF_STORAGE_PATH_PREFIX": "hosted",
"POTS_TRACKER_HF_STORAGE_REPO_ID": "hosted",
"POTS_TRACKER_HF_STORAGE_REQUIRE_PRIVATE": "hosted",
"POTS_TRACKER_HF_STORAGE_REVISION": "hosted",
"POTS_TRACKER_HF_STORAGE_TOKEN": "hosted",
"POTS_TRACKER_RUN_MIGRATIONS_ON_STARTUP": "hosted",
"POTS_TRACKER_SERVE_FRONTEND": "hosted",
}
TOKEN_DISPLAY = {
"ai": "AI",
"api": "API",
"att": "AT&T",
"clli": "CLLI",
"db": "DB",
"fcc": "FCC",
"hf": "HF",
"id": "ID",
"ip": "IP",
"json": "JSON",
"lbs": "lbs",
"ops": "ops",
"p90": "p90",
"png": "PNG",
"sql": "SQL",
"ttl": "TTL",
"ui": "UI",
"url": "URL",
"urls": "URLs",
}
def _humanize_token(token: str) -> str:
if token in TOKEN_DISPLAY:
return TOKEN_DISPLAY[token]
if token.isdigit():
return token
if token.isupper():
return token
return token.replace("-", " ").capitalize()
def _humanize_name(name: str) -> str:
return " ".join(_humanize_token(part) for part in name.split("_") if part)
def _render_description(field_name: str, env_name: str) -> str:
if field_name in FIELD_DESCRIPTION_OVERRIDES:
return FIELD_DESCRIPTION_OVERRIDES[field_name]
if field_name.endswith("_index_urls"):
carrier = _humanize_name(field_name.removesuffix("_index_urls"))
return f"Comma-separated index URLs for {carrier}."
if field_name.endswith("_tracker_urls"):
carrier = _humanize_name(field_name.removesuffix("_tracker_urls"))
return f"Comma-separated tracker URLs for {carrier}."
if field_name.endswith("_seed_documents_file"):
carrier = _humanize_name(field_name.removesuffix("_seed_documents_file"))
return f"Path to the seed document URL file for {carrier}."
if field_name.endswith("_document_urls_file"):
carrier = _humanize_name(field_name.removesuffix("_document_urls_file"))
return f"Path to extra document URLs for {carrier}."
if field_name.endswith("_document_urls"):
carrier = _humanize_name(field_name.removesuffix("_document_urls"))
return f"Comma-separated explicit document URLs for {carrier}."
if field_name.endswith("_urls_file"):
carrier = _humanize_name(field_name.removesuffix("_urls_file"))
return f"Path to an additional URL file for {carrier}."
if field_name.endswith("_urls"):
carrier = _humanize_name(field_name.removesuffix("_urls"))
return f"Comma-separated URLs for {carrier}."
return f"{_humanize_name(field_name)} setting."
def _render_scope(env_name: str) -> str:
return SPECIAL_SCOPE_OVERRIDES.get(env_name, "both")
def _render_sensitivity(env_name: str) -> str:
return "secret" if env_name in SECRET_ENV_VARS else "non-secret"
def _render_type(annotation: Any) -> str:
if annotation is Any:
return "Any"
if annotation is type(None):
return "None"
origin = get_origin(annotation)
if origin in {Union, UnionType}:
return " | ".join(_render_type(arg) for arg in get_args(annotation))
if isinstance(annotation, type):
return annotation.__name__
text = str(annotation).replace("typing.", "").replace("pathlib.", "")
return text.replace("<class '", "").replace("'>", "")
def _render_default(value: Any) -> str:
if value is None:
return "`unset`"
if isinstance(value, bool):
return f"`{str(value).lower()}`"
if isinstance(value, Path):
text = value.as_posix()
if not value.is_absolute() and not text.startswith("./"):
text = f"./{text}"
return f"`{text}`"
if isinstance(value, str):
return '`""`' if value == "" else f"`{value}`"
return f"`{value}`"
def iter_reference_rows() -> list[dict[str, str]]:
rows: list[dict[str, str]] = []
for field_name, field in Settings.model_fields.items():
env_name = field.alias or field_name
if not env_name.startswith("POTS_TRACKER_"):
continue
rows.append(
{
"name": env_name,
"type": _render_type(field.annotation),
"default": _render_default(field.default),
"description": _render_description(field_name, env_name),
"scope": _render_scope(env_name),
"sensitivity": _render_sensitivity(env_name),
}
)
return rows
def render_env_reference() -> str:
rows = iter_reference_rows()
lines = [
"# Environment Reference",
"",
"Generated from `backend/app/pots_shutdown_tracker/config.py`. Regenerate this file with `python backend/scripts/dump_env_reference.py > docs/ENV.md` after changing `Settings`.",
"",
"| Name | Type | Default | Description | Scope | Sensitivity |",
"| --- | --- | --- | --- | --- | --- |",
]
for row in rows:
lines.append(
f"| `{row['name']}` | {row['type']} | {row['default']} | {row['description']} | {row['scope']} | {row['sensitivity']} |"
)
lines.append("")
return "\n".join(lines)
def main() -> None:
sys.stdout.write(render_env_reference())
if __name__ == "__main__":
main()