Spaces:
Sleeping
Sleeping
| """Probe for section-level parquet files on Lex CDN.""" | |
| import httpx | |
| base = "https://lexdownloads.blob.core.windows.net/downloads/latest" | |
| patterns = [ | |
| "legislation_section.parquet", | |
| "legislation_sections.parquet", | |
| "legislation_section_1983.parquet", | |
| "legislation_section/1983.parquet", | |
| "sections/1983.parquet", | |
| "legislation_sections/1983.parquet", | |
| "legislation_section_2005.parquet", | |
| "legislation_section_2014.parquet", | |
| "sections_1983.parquet", | |
| "legislation-section.parquet", | |
| "legislation-sections.parquet", | |
| "explanatory_note.parquet", | |
| "amendment.parquet", | |
| ] | |
| for p in patterns: | |
| url = f"{base}/{p}" | |
| try: | |
| r = httpx.head(url, timeout=10, follow_redirects=True) | |
| size = r.headers.get("content-length", "?") | |
| mb = int(size) // 1024 // 1024 if size.isdigit() else "?" | |
| status = "FOUND" if r.status_code == 200 else str(r.status_code) | |
| print(f"{status}: {p} ({mb} MB)" if r.status_code == 200 else f" {r.status_code}: {p}") | |
| except Exception as e: | |
| print(f" ERROR: {p} -> {type(e).__name__}") | |