"""Probe for section-level parquet files on Lex CDN.""" import httpx base = "https://lexdownloads.blob.core.windows.net/downloads/latest" patterns = [ "legislation_section.parquet", "legislation_sections.parquet", "legislation_section_1983.parquet", "legislation_section/1983.parquet", "sections/1983.parquet", "legislation_sections/1983.parquet", "legislation_section_2005.parquet", "legislation_section_2014.parquet", "sections_1983.parquet", "legislation-section.parquet", "legislation-sections.parquet", "explanatory_note.parquet", "amendment.parquet", ] for p in patterns: url = f"{base}/{p}" try: r = httpx.head(url, timeout=10, follow_redirects=True) size = r.headers.get("content-length", "?") mb = int(size) // 1024 // 1024 if size.isdigit() else "?" status = "FOUND" if r.status_code == 200 else str(r.status_code) print(f"{status}: {p} ({mb} MB)" if r.status_code == 200 else f" {r.status_code}: {p}") except Exception as e: print(f" ERROR: {p} -> {type(e).__name__}")