Spaces:

crazycrazypete
/

pots-shutdown-tracker

Running

pots-shutdown-tracker / backend /scripts /dump_env_reference.py

github-actions

Deploy e9638c4ddc3ed29a18779b38f43922aa3139b311

611bfd9 about 1 month ago

11.1 kB

	from __future__ import annotations

	import sys
	from types import UnionType
	from pathlib import Path
	from typing import Any, Union, get_args, get_origin

	BACKEND_ROOT = Path(__file__).resolve().parents[1]
	if str(BACKEND_ROOT) not in sys.path:
	sys.path.insert(0, str(BACKEND_ROOT))

	from pots_shutdown_tracker.config import Settings


	SECRET_ENV_VARS = {
	"POTS_TRACKER_ADMIN_API_KEY",
	"POTS_TRACKER_DB_URL",
	"POTS_TRACKER_FCC_ECFS_API_KEY",
	"POTS_TRACKER_HF_STORAGE_TOKEN",
	}

	FIELD_DESCRIPTION_OVERRIDES = {
	"active_window_post_target_grace_days": "Days a notice can remain active after its target date.",
	"admin_api_key": "Shared secret required to authorize admin endpoints.",
	"admin_api_key_header": "Header name expected on admin requests.",
	"api_prefix": "Base URL prefix mounted in FastAPI.",
	"area_risk_airport_promotes_to_direct": (
	"When true, `<city> Airport` notices promote to a direct match for searches on `<city>`. "
	"Set false to surface them in the nearby-municipality section instead."
	),
	"app_name": "Human-readable application name.",
	"auto_create_schema": "Create the database schema automatically at startup.",
	"bulk_lookup_concurrent_workers": "Maximum number of background bulk lookup jobs processed concurrently.",
	"bulk_lookup_file_size_mb": "Maximum uploaded bulk lookup workbook size in megabytes.",
	"bulk_lookup_max_rows": "Maximum data rows accepted in a bulk lookup workbook.",
	"bulk_lookup_retention_days": "Days to retain bulk lookup input and output blobs before cleanup.",
	"cors_allow_origins": "Comma-separated list of allowed browser origins.",
	"db_max_overflow": "Extra SQLAlchemy pool connections allowed above the base size.",
	"db_pool_pre_ping": "Ping pooled database connections before use.",
	"db_pool_recycle_seconds": "Lifetime of pooled database connections before recycle.",
	"db_pool_size": "Base SQLAlchemy database pool size.",
	"db_pool_timeout_seconds": "Seconds to wait for a pooled database connection.",
	"db_url": "Database connection string.",
	"enable_ai": "Enable AI-backed parsing, summarization, and search helpers.",
	"enable_weekly_jobs": "Enable the weekly APScheduler job set.",
	"fcc_ecfs_api_key": "Optional ECFS API key reserved for FCC watch discovery.",
	"fcc_ecfs_base_url": "Base URL for the FCC ECFS public API.",
	"fcc_watch_lookback_months": "Historical lookback window for FCC watch backfill phases.",
	"fcc_watch_proceedings": "Comma-separated ECFS proceeding numbers for targeted FCC watch scans.",
	"fetch_max_content_length_mb": "Maximum fetched body size in megabytes.",
	"fetch_read_timeout_seconds": "Read timeout for fetched responses.",
	"fetch_timeout_seconds": "Overall fetch timeout in seconds.",
	"frontend_dist_path": "Path to the built frontend bundle served by the app.",
	"hf_storage_path_prefix": "Path prefix used for stored blobs in the dataset repo.",
	"hf_storage_require_private": "Refuse to use a public Hugging Face dataset repo.",
	"hf_storage_repo_id": "Hugging Face dataset repo that stores crawler blobs.",
	"hf_storage_revision": "Revision used for Hugging Face dataset uploads and downloads.",
	"hf_storage_token": "Write token for the Hugging Face dataset repo.",
	"lookback_months": "Active corpus lookback window in months.",
	"log_level": "Application log level.",
	"openai_api_key": "OpenAI API key used by AI features.",
	"query_embedding_cache_enabled": "Enable caching for query embeddings.",
	"query_embedding_cache_size": "Maximum number of cached query embeddings.",
	"request_timing_enabled": "Emit request timing logs.",
	"run_migrations_on_startup": "Run Alembic migrations during app startup.",
	"scheduler_enabled_instances": "Number of instances allowed to run scheduler jobs.",
	"search_candidate_limit": "Maximum number of candidate notices considered during search.",
	"search_refinement_limit": "Maximum number of notices retained after search refinement.",
	"search_prewarm_enabled": "Enable search prewarm at startup.",
	"search_prewarm_queries": "Comma-separated search queries used for prewarming.",
	"search_result_cache_enabled": "Enable caching for search responses.",
	"search_result_cache_size": "Maximum number of cached search responses.",
	"search_result_cache_ttl_seconds": "TTL for cached search responses in seconds.",
	"search_trace_enabled": "Emit detailed search trace logs.",
	"search_vector_candidate_limit": "Maximum number of vector candidates considered during search.",
	"slow_request_threshold_ms": "Threshold for logging slow requests in milliseconds.",
	"source_coverage_matrix_file": "Path to the source coverage matrix JSON file.",
	"startup_db_wait_seconds": "Maximum time to wait for the database during startup.",
	"storage_backend": "Select the storage backend (`filesystem` or `huggingface_dataset`).",
	"storage_path": "Local storage and cache root.",
	"serve_frontend": "Serve the frontend bundle from FastAPI.",
	"ops_presets_file": "Path to the curated ops preset list.",
	"timezone": "Application timezone.",
	"trust_matrix_empty_threshold": "Maximum empty-matrix ratio before queryability is disabled.",
	"trust_stale_days": "Days after which the corpus is considered stale.",
	"user_agent": "User-Agent string used for outbound HTTP requests.",
	"weekly_schedule": "Cron expression for the weekly job schedule.",
	}

	SPECIAL_SCOPE_OVERRIDES = {
	"POTS_TRACKER_ADMIN_API_KEY": "hosted",
	"POTS_TRACKER_CORS_ALLOW_ORIGINS": "hosted",
	"POTS_TRACKER_FCC_ECFS_API_KEY": "hosted",
	"POTS_TRACKER_FCC_ECFS_BASE_URL": "hosted",
	"POTS_TRACKER_FCC_WATCH_LOOKBACK_MONTHS": "hosted",
	"POTS_TRACKER_FCC_WATCH_PROCEEDINGS": "hosted",
	"POTS_TRACKER_FRONTEND_DIST_PATH": "hosted",
	"POTS_TRACKER_HF_STORAGE_PATH_PREFIX": "hosted",
	"POTS_TRACKER_HF_STORAGE_REPO_ID": "hosted",
	"POTS_TRACKER_HF_STORAGE_REQUIRE_PRIVATE": "hosted",
	"POTS_TRACKER_HF_STORAGE_REVISION": "hosted",
	"POTS_TRACKER_HF_STORAGE_TOKEN": "hosted",
	"POTS_TRACKER_RUN_MIGRATIONS_ON_STARTUP": "hosted",
	"POTS_TRACKER_SERVE_FRONTEND": "hosted",
	}

	TOKEN_DISPLAY = {
	"ai": "AI",
	"api": "API",
	"att": "AT&T",
	"clli": "CLLI",
	"db": "DB",
	"fcc": "FCC",
	"hf": "HF",
	"id": "ID",
	"ip": "IP",
	"json": "JSON",
	"lbs": "lbs",
	"ops": "ops",
	"p90": "p90",
	"png": "PNG",
	"sql": "SQL",
	"ttl": "TTL",
	"ui": "UI",
	"url": "URL",
	"urls": "URLs",
	}


	def _humanize_token(token: str) -> str:
	if token in TOKEN_DISPLAY:
	return TOKEN_DISPLAY[token]
	if token.isdigit():
	return token
	if token.isupper():
	return token
	return token.replace("-", " ").capitalize()


	def _humanize_name(name: str) -> str:
	return " ".join(_humanize_token(part) for part in name.split("_") if part)


	def _render_description(field_name: str, env_name: str) -> str:
	if field_name in FIELD_DESCRIPTION_OVERRIDES:
	return FIELD_DESCRIPTION_OVERRIDES[field_name]

	if field_name.endswith("_index_urls"):
	carrier = _humanize_name(field_name.removesuffix("_index_urls"))
	return f"Comma-separated index URLs for {carrier}."
	if field_name.endswith("_tracker_urls"):
	carrier = _humanize_name(field_name.removesuffix("_tracker_urls"))
	return f"Comma-separated tracker URLs for {carrier}."
	if field_name.endswith("_seed_documents_file"):
	carrier = _humanize_name(field_name.removesuffix("_seed_documents_file"))
	return f"Path to the seed document URL file for {carrier}."
	if field_name.endswith("_document_urls_file"):
	carrier = _humanize_name(field_name.removesuffix("_document_urls_file"))
	return f"Path to extra document URLs for {carrier}."
	if field_name.endswith("_document_urls"):
	carrier = _humanize_name(field_name.removesuffix("_document_urls"))
	return f"Comma-separated explicit document URLs for {carrier}."
	if field_name.endswith("_urls_file"):
	carrier = _humanize_name(field_name.removesuffix("_urls_file"))
	return f"Path to an additional URL file for {carrier}."
	if field_name.endswith("_urls"):
	carrier = _humanize_name(field_name.removesuffix("_urls"))
	return f"Comma-separated URLs for {carrier}."

	return f"{_humanize_name(field_name)} setting."


	def _render_scope(env_name: str) -> str:
	return SPECIAL_SCOPE_OVERRIDES.get(env_name, "both")


	def _render_sensitivity(env_name: str) -> str:
	return "secret" if env_name in SECRET_ENV_VARS else "non-secret"


	def _render_type(annotation: Any) -> str:
	if annotation is Any:
	return "Any"
	if annotation is type(None):
	return "None"
	origin = get_origin(annotation)
	if origin in {Union, UnionType}:
	return " \| ".join(_render_type(arg) for arg in get_args(annotation))
	if isinstance(annotation, type):
	return annotation.__name__
	text = str(annotation).replace("typing.", "").replace("pathlib.", "")
	return text.replace("<class '", "").replace("'>", "")


	def _render_default(value: Any) -> str:
	if value is None:
	return "`unset`"
	if isinstance(value, bool):
	return f"`{str(value).lower()}`"
	if isinstance(value, Path):
	text = value.as_posix()
	if not value.is_absolute() and not text.startswith("./"):
	text = f"./{text}"
	return f"`{text}`"
	if isinstance(value, str):
	return '`""`' if value == "" else f"`{value}`"
	return f"`{value}`"


	def iter_reference_rows() -> list[dict[str, str]]:
	rows: list[dict[str, str]] = []
	for field_name, field in Settings.model_fields.items():
	env_name = field.alias or field_name
	if not env_name.startswith("POTS_TRACKER_"):
	continue
	rows.append(
	{
	"name": env_name,
	"type": _render_type(field.annotation),
	"default": _render_default(field.default),
	"description": _render_description(field_name, env_name),
	"scope": _render_scope(env_name),
	"sensitivity": _render_sensitivity(env_name),
	}
	)
	return rows


	def render_env_reference() -> str:
	rows = iter_reference_rows()
	lines = [
	"# Environment Reference",
	"",
	"Generated from `backend/app/pots_shutdown_tracker/config.py`. Regenerate this file with `python backend/scripts/dump_env_reference.py > docs/ENV.md` after changing `Settings`.",
	"",
	"\| Name \| Type \| Default \| Description \| Scope \| Sensitivity \|",
	"\| --- \| --- \| --- \| --- \| --- \| --- \|",
	]
	for row in rows:
	lines.append(
	f"\| `{row['name']}` \| {row['type']} \| {row['default']} \| {row['description']} \| {row['scope']} \| {row['sensitivity']} \|"
	)
	lines.append("")
	return "\n".join(lines)


	def main() -> None:
	sys.stdout.write(render_env_reference())


	if __name__ == "__main__":
	main()