NurseLex / probe_sections.py
NurseCitizenDeveloper's picture
feat: complete local embedding search with i-dot-ai HF model
19a3093
"""Probe for section-level parquet files on Lex CDN."""
import httpx
base = "https://lexdownloads.blob.core.windows.net/downloads/latest"
patterns = [
"legislation_section.parquet",
"legislation_sections.parquet",
"legislation_section_1983.parquet",
"legislation_section/1983.parquet",
"sections/1983.parquet",
"legislation_sections/1983.parquet",
"legislation_section_2005.parquet",
"legislation_section_2014.parquet",
"sections_1983.parquet",
"legislation-section.parquet",
"legislation-sections.parquet",
"explanatory_note.parquet",
"amendment.parquet",
]
for p in patterns:
url = f"{base}/{p}"
try:
r = httpx.head(url, timeout=10, follow_redirects=True)
size = r.headers.get("content-length", "?")
mb = int(size) // 1024 // 1024 if size.isdigit() else "?"
status = "FOUND" if r.status_code == 200 else str(r.status_code)
print(f"{status}: {p} ({mb} MB)" if r.status_code == 200 else f" {r.status_code}: {p}")
except Exception as e:
print(f" ERROR: {p} -> {type(e).__name__}")