| from __future__ import annotations |
|
|
| import sys |
| from types import UnionType |
| from pathlib import Path |
| from typing import Any, Union, get_args, get_origin |
|
|
| BACKEND_ROOT = Path(__file__).resolve().parents[1] |
| if str(BACKEND_ROOT) not in sys.path: |
| sys.path.insert(0, str(BACKEND_ROOT)) |
|
|
| from pots_shutdown_tracker.config import Settings |
|
|
|
|
| SECRET_ENV_VARS = { |
| "POTS_TRACKER_ADMIN_API_KEY", |
| "POTS_TRACKER_DB_URL", |
| "POTS_TRACKER_FCC_ECFS_API_KEY", |
| "POTS_TRACKER_HF_STORAGE_TOKEN", |
| } |
|
|
| FIELD_DESCRIPTION_OVERRIDES = { |
| "active_window_post_target_grace_days": "Days a notice can remain active after its target date.", |
| "admin_api_key": "Shared secret required to authorize admin endpoints.", |
| "admin_api_key_header": "Header name expected on admin requests.", |
| "api_prefix": "Base URL prefix mounted in FastAPI.", |
| "area_risk_airport_promotes_to_direct": ( |
| "When true, `<city> Airport` notices promote to a direct match for searches on `<city>`. " |
| "Set false to surface them in the nearby-municipality section instead." |
| ), |
| "app_name": "Human-readable application name.", |
| "auto_create_schema": "Create the database schema automatically at startup.", |
| "bulk_lookup_concurrent_workers": "Maximum number of background bulk lookup jobs processed concurrently.", |
| "bulk_lookup_file_size_mb": "Maximum uploaded bulk lookup workbook size in megabytes.", |
| "bulk_lookup_max_rows": "Maximum data rows accepted in a bulk lookup workbook.", |
| "bulk_lookup_retention_days": "Days to retain bulk lookup input and output blobs before cleanup.", |
| "cors_allow_origins": "Comma-separated list of allowed browser origins.", |
| "db_max_overflow": "Extra SQLAlchemy pool connections allowed above the base size.", |
| "db_pool_pre_ping": "Ping pooled database connections before use.", |
| "db_pool_recycle_seconds": "Lifetime of pooled database connections before recycle.", |
| "db_pool_size": "Base SQLAlchemy database pool size.", |
| "db_pool_timeout_seconds": "Seconds to wait for a pooled database connection.", |
| "db_url": "Database connection string.", |
| "enable_ai": "Enable AI-backed parsing, summarization, and search helpers.", |
| "enable_weekly_jobs": "Enable the weekly APScheduler job set.", |
| "fcc_ecfs_api_key": "Optional ECFS API key reserved for FCC watch discovery.", |
| "fcc_ecfs_base_url": "Base URL for the FCC ECFS public API.", |
| "fcc_watch_lookback_months": "Historical lookback window for FCC watch backfill phases.", |
| "fcc_watch_proceedings": "Comma-separated ECFS proceeding numbers for targeted FCC watch scans.", |
| "fetch_max_content_length_mb": "Maximum fetched body size in megabytes.", |
| "fetch_read_timeout_seconds": "Read timeout for fetched responses.", |
| "fetch_timeout_seconds": "Overall fetch timeout in seconds.", |
| "frontend_dist_path": "Path to the built frontend bundle served by the app.", |
| "hf_storage_path_prefix": "Path prefix used for stored blobs in the dataset repo.", |
| "hf_storage_require_private": "Refuse to use a public Hugging Face dataset repo.", |
| "hf_storage_repo_id": "Hugging Face dataset repo that stores crawler blobs.", |
| "hf_storage_revision": "Revision used for Hugging Face dataset uploads and downloads.", |
| "hf_storage_token": "Write token for the Hugging Face dataset repo.", |
| "lookback_months": "Active corpus lookback window in months.", |
| "log_level": "Application log level.", |
| "openai_api_key": "OpenAI API key used by AI features.", |
| "query_embedding_cache_enabled": "Enable caching for query embeddings.", |
| "query_embedding_cache_size": "Maximum number of cached query embeddings.", |
| "request_timing_enabled": "Emit request timing logs.", |
| "run_migrations_on_startup": "Run Alembic migrations during app startup.", |
| "scheduler_enabled_instances": "Number of instances allowed to run scheduler jobs.", |
| "search_candidate_limit": "Maximum number of candidate notices considered during search.", |
| "search_refinement_limit": "Maximum number of notices retained after search refinement.", |
| "search_prewarm_enabled": "Enable search prewarm at startup.", |
| "search_prewarm_queries": "Comma-separated search queries used for prewarming.", |
| "search_result_cache_enabled": "Enable caching for search responses.", |
| "search_result_cache_size": "Maximum number of cached search responses.", |
| "search_result_cache_ttl_seconds": "TTL for cached search responses in seconds.", |
| "search_trace_enabled": "Emit detailed search trace logs.", |
| "search_vector_candidate_limit": "Maximum number of vector candidates considered during search.", |
| "slow_request_threshold_ms": "Threshold for logging slow requests in milliseconds.", |
| "source_coverage_matrix_file": "Path to the source coverage matrix JSON file.", |
| "startup_db_wait_seconds": "Maximum time to wait for the database during startup.", |
| "storage_backend": "Select the storage backend (`filesystem` or `huggingface_dataset`).", |
| "storage_path": "Local storage and cache root.", |
| "serve_frontend": "Serve the frontend bundle from FastAPI.", |
| "ops_presets_file": "Path to the curated ops preset list.", |
| "timezone": "Application timezone.", |
| "trust_matrix_empty_threshold": "Maximum empty-matrix ratio before queryability is disabled.", |
| "trust_stale_days": "Days after which the corpus is considered stale.", |
| "user_agent": "User-Agent string used for outbound HTTP requests.", |
| "weekly_schedule": "Cron expression for the weekly job schedule.", |
| } |
|
|
| SPECIAL_SCOPE_OVERRIDES = { |
| "POTS_TRACKER_ADMIN_API_KEY": "hosted", |
| "POTS_TRACKER_CORS_ALLOW_ORIGINS": "hosted", |
| "POTS_TRACKER_FCC_ECFS_API_KEY": "hosted", |
| "POTS_TRACKER_FCC_ECFS_BASE_URL": "hosted", |
| "POTS_TRACKER_FCC_WATCH_LOOKBACK_MONTHS": "hosted", |
| "POTS_TRACKER_FCC_WATCH_PROCEEDINGS": "hosted", |
| "POTS_TRACKER_FRONTEND_DIST_PATH": "hosted", |
| "POTS_TRACKER_HF_STORAGE_PATH_PREFIX": "hosted", |
| "POTS_TRACKER_HF_STORAGE_REPO_ID": "hosted", |
| "POTS_TRACKER_HF_STORAGE_REQUIRE_PRIVATE": "hosted", |
| "POTS_TRACKER_HF_STORAGE_REVISION": "hosted", |
| "POTS_TRACKER_HF_STORAGE_TOKEN": "hosted", |
| "POTS_TRACKER_RUN_MIGRATIONS_ON_STARTUP": "hosted", |
| "POTS_TRACKER_SERVE_FRONTEND": "hosted", |
| } |
|
|
| TOKEN_DISPLAY = { |
| "ai": "AI", |
| "api": "API", |
| "att": "AT&T", |
| "clli": "CLLI", |
| "db": "DB", |
| "fcc": "FCC", |
| "hf": "HF", |
| "id": "ID", |
| "ip": "IP", |
| "json": "JSON", |
| "lbs": "lbs", |
| "ops": "ops", |
| "p90": "p90", |
| "png": "PNG", |
| "sql": "SQL", |
| "ttl": "TTL", |
| "ui": "UI", |
| "url": "URL", |
| "urls": "URLs", |
| } |
|
|
|
|
| def _humanize_token(token: str) -> str: |
| if token in TOKEN_DISPLAY: |
| return TOKEN_DISPLAY[token] |
| if token.isdigit(): |
| return token |
| if token.isupper(): |
| return token |
| return token.replace("-", " ").capitalize() |
|
|
|
|
| def _humanize_name(name: str) -> str: |
| return " ".join(_humanize_token(part) for part in name.split("_") if part) |
|
|
|
|
| def _render_description(field_name: str, env_name: str) -> str: |
| if field_name in FIELD_DESCRIPTION_OVERRIDES: |
| return FIELD_DESCRIPTION_OVERRIDES[field_name] |
|
|
| if field_name.endswith("_index_urls"): |
| carrier = _humanize_name(field_name.removesuffix("_index_urls")) |
| return f"Comma-separated index URLs for {carrier}." |
| if field_name.endswith("_tracker_urls"): |
| carrier = _humanize_name(field_name.removesuffix("_tracker_urls")) |
| return f"Comma-separated tracker URLs for {carrier}." |
| if field_name.endswith("_seed_documents_file"): |
| carrier = _humanize_name(field_name.removesuffix("_seed_documents_file")) |
| return f"Path to the seed document URL file for {carrier}." |
| if field_name.endswith("_document_urls_file"): |
| carrier = _humanize_name(field_name.removesuffix("_document_urls_file")) |
| return f"Path to extra document URLs for {carrier}." |
| if field_name.endswith("_document_urls"): |
| carrier = _humanize_name(field_name.removesuffix("_document_urls")) |
| return f"Comma-separated explicit document URLs for {carrier}." |
| if field_name.endswith("_urls_file"): |
| carrier = _humanize_name(field_name.removesuffix("_urls_file")) |
| return f"Path to an additional URL file for {carrier}." |
| if field_name.endswith("_urls"): |
| carrier = _humanize_name(field_name.removesuffix("_urls")) |
| return f"Comma-separated URLs for {carrier}." |
|
|
| return f"{_humanize_name(field_name)} setting." |
|
|
|
|
| def _render_scope(env_name: str) -> str: |
| return SPECIAL_SCOPE_OVERRIDES.get(env_name, "both") |
|
|
|
|
| def _render_sensitivity(env_name: str) -> str: |
| return "secret" if env_name in SECRET_ENV_VARS else "non-secret" |
|
|
|
|
| def _render_type(annotation: Any) -> str: |
| if annotation is Any: |
| return "Any" |
| if annotation is type(None): |
| return "None" |
| origin = get_origin(annotation) |
| if origin in {Union, UnionType}: |
| return " | ".join(_render_type(arg) for arg in get_args(annotation)) |
| if isinstance(annotation, type): |
| return annotation.__name__ |
| text = str(annotation).replace("typing.", "").replace("pathlib.", "") |
| return text.replace("<class '", "").replace("'>", "") |
|
|
|
|
| def _render_default(value: Any) -> str: |
| if value is None: |
| return "`unset`" |
| if isinstance(value, bool): |
| return f"`{str(value).lower()}`" |
| if isinstance(value, Path): |
| text = value.as_posix() |
| if not value.is_absolute() and not text.startswith("./"): |
| text = f"./{text}" |
| return f"`{text}`" |
| if isinstance(value, str): |
| return '`""`' if value == "" else f"`{value}`" |
| return f"`{value}`" |
|
|
|
|
| def iter_reference_rows() -> list[dict[str, str]]: |
| rows: list[dict[str, str]] = [] |
| for field_name, field in Settings.model_fields.items(): |
| env_name = field.alias or field_name |
| if not env_name.startswith("POTS_TRACKER_"): |
| continue |
| rows.append( |
| { |
| "name": env_name, |
| "type": _render_type(field.annotation), |
| "default": _render_default(field.default), |
| "description": _render_description(field_name, env_name), |
| "scope": _render_scope(env_name), |
| "sensitivity": _render_sensitivity(env_name), |
| } |
| ) |
| return rows |
|
|
|
|
| def render_env_reference() -> str: |
| rows = iter_reference_rows() |
| lines = [ |
| "# Environment Reference", |
| "", |
| "Generated from `backend/app/pots_shutdown_tracker/config.py`. Regenerate this file with `python backend/scripts/dump_env_reference.py > docs/ENV.md` after changing `Settings`.", |
| "", |
| "| Name | Type | Default | Description | Scope | Sensitivity |", |
| "| --- | --- | --- | --- | --- | --- |", |
| ] |
| for row in rows: |
| lines.append( |
| f"| `{row['name']}` | {row['type']} | {row['default']} | {row['description']} | {row['scope']} | {row['sensitivity']} |" |
| ) |
| lines.append("") |
| return "\n".join(lines) |
|
|
|
|
| def main() -> None: |
| sys.stdout.write(render_env_reference()) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|