NurseLex / download_sections.py
NurseCitizenDeveloper's picture
feat: complete local embedding search with i-dot-ai HF model
19a3093
"""
Download all sections for key nursing Acts from the Lex API.
Saves as nursing_sections.json for offline use.
"""
import httpx
import json
import time
BASE = "https://lex.lab.i.ai.gov.uk"
NURSING_ACTS = {
"Mental Health Act 1983": "ukpga/1983/20",
"Mental Capacity Act 2005": "ukpga/2005/9",
"Care Act 2014": "ukpga/2014/23",
"Human Rights Act 1998": "ukpga/1998/42",
"Equality Act 2010": "ukpga/2010/15",
"Health and Social Care Act 2012": "ukpga/2012/7",
"Mental Health Units (Use of Force) Act 2018": "ukpga/2018/27",
"Autism Act 2009": "ukpga/2009/15",
"Children Act 1989": "ukpga/1989/41",
"Children Act 2004": "ukpga/2004/31",
"Safeguarding Vulnerable Groups Act 2006": "ukpga/2006/47",
}
all_sections = []
client = httpx.Client(timeout=60.0)
for act_name, leg_id in NURSING_ACTS.items():
print(f"\n--- {act_name} ({leg_id}) ---")
# Try searching for all sections of this Act
try:
# Use section search endpoint
resp = client.post(
f"{BASE}/legislation/section/search",
json={
"query": act_name,
"legislation_id": leg_id,
"size": 200,
"include_text": True,
},
)
resp.raise_for_status()
data = resp.json()
sections = data if isinstance(data, list) else data.get("results", data.get("sections", []))
print(f" Got {len(sections)} sections")
for section in sections:
section["act_name"] = act_name
section["legislation_id"] = leg_id
all_sections.append(section)
time.sleep(1) # Rate limiting
except Exception as e:
print(f" ERROR: {type(e).__name__}: {e}")
# Fallback: try getting sections one by one using browse
try:
resp2 = client.get(
f"{BASE}/legislation/{leg_id}/sections",
params={"limit": 200},
)
resp2.raise_for_status()
data2 = resp2.json()
sections2 = data2 if isinstance(data2, list) else data2.get("sections", [])
print(f" Fallback got {len(sections2)} sections")
for section in sections2:
section["act_name"] = act_name
section["legislation_id"] = leg_id
all_sections.append(section)
except Exception as e2:
print(f" Fallback also failed: {type(e2).__name__}: {e2}")
time.sleep(1)
client.close()
print(f"\n=== TOTAL: {len(all_sections)} sections ===")
# Save
output_path = r"c:\Users\g0226\Downloads\Ai Education\NurseLex\nursing_sections.json"
with open(output_path, "w", encoding="utf-8") as f:
json.dump(all_sections, f, indent=2, ensure_ascii=False)
print(f"Saved to {output_path}")
print(f"File size: {len(json.dumps(all_sections)) // 1024} KB")
# Show sample
if all_sections:
s = all_sections[0]
print(f"\nSample keys: {list(s.keys())}")
print(f"Sample title: {s.get('title', '?')}")
print(f"Sample text preview: {str(s.get('text', ''))[:200]}")