from __future__ import annotations import sys from types import UnionType from pathlib import Path from typing import Any, Union, get_args, get_origin BACKEND_ROOT = Path(__file__).resolve().parents[1] if str(BACKEND_ROOT) not in sys.path: sys.path.insert(0, str(BACKEND_ROOT)) from pots_shutdown_tracker.config import Settings SECRET_ENV_VARS = { "POTS_TRACKER_ADMIN_API_KEY", "POTS_TRACKER_DB_URL", "POTS_TRACKER_FCC_ECFS_API_KEY", "POTS_TRACKER_HF_STORAGE_TOKEN", } FIELD_DESCRIPTION_OVERRIDES = { "active_window_post_target_grace_days": "Days a notice can remain active after its target date.", "admin_api_key": "Shared secret required to authorize admin endpoints.", "admin_api_key_header": "Header name expected on admin requests.", "api_prefix": "Base URL prefix mounted in FastAPI.", "area_risk_airport_promotes_to_direct": ( "When true, ` Airport` notices promote to a direct match for searches on ``. " "Set false to surface them in the nearby-municipality section instead." ), "app_name": "Human-readable application name.", "auto_create_schema": "Create the database schema automatically at startup.", "bulk_lookup_concurrent_workers": "Maximum number of background bulk lookup jobs processed concurrently.", "bulk_lookup_file_size_mb": "Maximum uploaded bulk lookup workbook size in megabytes.", "bulk_lookup_max_rows": "Maximum data rows accepted in a bulk lookup workbook.", "bulk_lookup_retention_days": "Days to retain bulk lookup input and output blobs before cleanup.", "cors_allow_origins": "Comma-separated list of allowed browser origins.", "db_max_overflow": "Extra SQLAlchemy pool connections allowed above the base size.", "db_pool_pre_ping": "Ping pooled database connections before use.", "db_pool_recycle_seconds": "Lifetime of pooled database connections before recycle.", "db_pool_size": "Base SQLAlchemy database pool size.", "db_pool_timeout_seconds": "Seconds to wait for a pooled database connection.", "db_url": "Database connection string.", "enable_ai": "Enable AI-backed parsing, summarization, and search helpers.", "enable_weekly_jobs": "Enable the weekly APScheduler job set.", "fcc_ecfs_api_key": "Optional ECFS API key reserved for FCC watch discovery.", "fcc_ecfs_base_url": "Base URL for the FCC ECFS public API.", "fcc_watch_lookback_months": "Historical lookback window for FCC watch backfill phases.", "fcc_watch_proceedings": "Comma-separated ECFS proceeding numbers for targeted FCC watch scans.", "fetch_max_content_length_mb": "Maximum fetched body size in megabytes.", "fetch_read_timeout_seconds": "Read timeout for fetched responses.", "fetch_timeout_seconds": "Overall fetch timeout in seconds.", "frontend_dist_path": "Path to the built frontend bundle served by the app.", "hf_storage_path_prefix": "Path prefix used for stored blobs in the dataset repo.", "hf_storage_require_private": "Refuse to use a public Hugging Face dataset repo.", "hf_storage_repo_id": "Hugging Face dataset repo that stores crawler blobs.", "hf_storage_revision": "Revision used for Hugging Face dataset uploads and downloads.", "hf_storage_token": "Write token for the Hugging Face dataset repo.", "lookback_months": "Active corpus lookback window in months.", "log_level": "Application log level.", "openai_api_key": "OpenAI API key used by AI features.", "query_embedding_cache_enabled": "Enable caching for query embeddings.", "query_embedding_cache_size": "Maximum number of cached query embeddings.", "request_timing_enabled": "Emit request timing logs.", "run_migrations_on_startup": "Run Alembic migrations during app startup.", "scheduler_enabled_instances": "Number of instances allowed to run scheduler jobs.", "search_candidate_limit": "Maximum number of candidate notices considered during search.", "search_refinement_limit": "Maximum number of notices retained after search refinement.", "search_prewarm_enabled": "Enable search prewarm at startup.", "search_prewarm_queries": "Comma-separated search queries used for prewarming.", "search_result_cache_enabled": "Enable caching for search responses.", "search_result_cache_size": "Maximum number of cached search responses.", "search_result_cache_ttl_seconds": "TTL for cached search responses in seconds.", "search_trace_enabled": "Emit detailed search trace logs.", "search_vector_candidate_limit": "Maximum number of vector candidates considered during search.", "slow_request_threshold_ms": "Threshold for logging slow requests in milliseconds.", "source_coverage_matrix_file": "Path to the source coverage matrix JSON file.", "startup_db_wait_seconds": "Maximum time to wait for the database during startup.", "storage_backend": "Select the storage backend (`filesystem` or `huggingface_dataset`).", "storage_path": "Local storage and cache root.", "serve_frontend": "Serve the frontend bundle from FastAPI.", "ops_presets_file": "Path to the curated ops preset list.", "timezone": "Application timezone.", "trust_matrix_empty_threshold": "Maximum empty-matrix ratio before queryability is disabled.", "trust_stale_days": "Days after which the corpus is considered stale.", "user_agent": "User-Agent string used for outbound HTTP requests.", "weekly_schedule": "Cron expression for the weekly job schedule.", } SPECIAL_SCOPE_OVERRIDES = { "POTS_TRACKER_ADMIN_API_KEY": "hosted", "POTS_TRACKER_CORS_ALLOW_ORIGINS": "hosted", "POTS_TRACKER_FCC_ECFS_API_KEY": "hosted", "POTS_TRACKER_FCC_ECFS_BASE_URL": "hosted", "POTS_TRACKER_FCC_WATCH_LOOKBACK_MONTHS": "hosted", "POTS_TRACKER_FCC_WATCH_PROCEEDINGS": "hosted", "POTS_TRACKER_FRONTEND_DIST_PATH": "hosted", "POTS_TRACKER_HF_STORAGE_PATH_PREFIX": "hosted", "POTS_TRACKER_HF_STORAGE_REPO_ID": "hosted", "POTS_TRACKER_HF_STORAGE_REQUIRE_PRIVATE": "hosted", "POTS_TRACKER_HF_STORAGE_REVISION": "hosted", "POTS_TRACKER_HF_STORAGE_TOKEN": "hosted", "POTS_TRACKER_RUN_MIGRATIONS_ON_STARTUP": "hosted", "POTS_TRACKER_SERVE_FRONTEND": "hosted", } TOKEN_DISPLAY = { "ai": "AI", "api": "API", "att": "AT&T", "clli": "CLLI", "db": "DB", "fcc": "FCC", "hf": "HF", "id": "ID", "ip": "IP", "json": "JSON", "lbs": "lbs", "ops": "ops", "p90": "p90", "png": "PNG", "sql": "SQL", "ttl": "TTL", "ui": "UI", "url": "URL", "urls": "URLs", } def _humanize_token(token: str) -> str: if token in TOKEN_DISPLAY: return TOKEN_DISPLAY[token] if token.isdigit(): return token if token.isupper(): return token return token.replace("-", " ").capitalize() def _humanize_name(name: str) -> str: return " ".join(_humanize_token(part) for part in name.split("_") if part) def _render_description(field_name: str, env_name: str) -> str: if field_name in FIELD_DESCRIPTION_OVERRIDES: return FIELD_DESCRIPTION_OVERRIDES[field_name] if field_name.endswith("_index_urls"): carrier = _humanize_name(field_name.removesuffix("_index_urls")) return f"Comma-separated index URLs for {carrier}." if field_name.endswith("_tracker_urls"): carrier = _humanize_name(field_name.removesuffix("_tracker_urls")) return f"Comma-separated tracker URLs for {carrier}." if field_name.endswith("_seed_documents_file"): carrier = _humanize_name(field_name.removesuffix("_seed_documents_file")) return f"Path to the seed document URL file for {carrier}." if field_name.endswith("_document_urls_file"): carrier = _humanize_name(field_name.removesuffix("_document_urls_file")) return f"Path to extra document URLs for {carrier}." if field_name.endswith("_document_urls"): carrier = _humanize_name(field_name.removesuffix("_document_urls")) return f"Comma-separated explicit document URLs for {carrier}." if field_name.endswith("_urls_file"): carrier = _humanize_name(field_name.removesuffix("_urls_file")) return f"Path to an additional URL file for {carrier}." if field_name.endswith("_urls"): carrier = _humanize_name(field_name.removesuffix("_urls")) return f"Comma-separated URLs for {carrier}." return f"{_humanize_name(field_name)} setting." def _render_scope(env_name: str) -> str: return SPECIAL_SCOPE_OVERRIDES.get(env_name, "both") def _render_sensitivity(env_name: str) -> str: return "secret" if env_name in SECRET_ENV_VARS else "non-secret" def _render_type(annotation: Any) -> str: if annotation is Any: return "Any" if annotation is type(None): return "None" origin = get_origin(annotation) if origin in {Union, UnionType}: return " | ".join(_render_type(arg) for arg in get_args(annotation)) if isinstance(annotation, type): return annotation.__name__ text = str(annotation).replace("typing.", "").replace("pathlib.", "") return text.replace("", "") def _render_default(value: Any) -> str: if value is None: return "`unset`" if isinstance(value, bool): return f"`{str(value).lower()}`" if isinstance(value, Path): text = value.as_posix() if not value.is_absolute() and not text.startswith("./"): text = f"./{text}" return f"`{text}`" if isinstance(value, str): return '`""`' if value == "" else f"`{value}`" return f"`{value}`" def iter_reference_rows() -> list[dict[str, str]]: rows: list[dict[str, str]] = [] for field_name, field in Settings.model_fields.items(): env_name = field.alias or field_name if not env_name.startswith("POTS_TRACKER_"): continue rows.append( { "name": env_name, "type": _render_type(field.annotation), "default": _render_default(field.default), "description": _render_description(field_name, env_name), "scope": _render_scope(env_name), "sensitivity": _render_sensitivity(env_name), } ) return rows def render_env_reference() -> str: rows = iter_reference_rows() lines = [ "# Environment Reference", "", "Generated from `backend/app/pots_shutdown_tracker/config.py`. Regenerate this file with `python backend/scripts/dump_env_reference.py > docs/ENV.md` after changing `Settings`.", "", "| Name | Type | Default | Description | Scope | Sensitivity |", "| --- | --- | --- | --- | --- | --- |", ] for row in rows: lines.append( f"| `{row['name']}` | {row['type']} | {row['default']} | {row['description']} | {row['scope']} | {row['sensitivity']} |" ) lines.append("") return "\n".join(lines) def main() -> None: sys.stdout.write(render_env_reference()) if __name__ == "__main__": main()