Spaces:

NurseCitizenDeveloper
/

NurseLex

Sleeping

App Files Files Community

NurseLex / download_sections.py

NurseCitizenDeveloper

feat: complete local embedding search with i-dot-ai HF model

19a3093 12 days ago

raw

history blame contribute delete

3.1 kB

	"""
	Download all sections for key nursing Acts from the Lex API.
	Saves as nursing_sections.json for offline use.
	"""
	import httpx
	import json
	import time

	BASE = "https://lex.lab.i.ai.gov.uk"

	NURSING_ACTS = {
	"Mental Health Act 1983": "ukpga/1983/20",
	"Mental Capacity Act 2005": "ukpga/2005/9",
	"Care Act 2014": "ukpga/2014/23",
	"Human Rights Act 1998": "ukpga/1998/42",
	"Equality Act 2010": "ukpga/2010/15",
	"Health and Social Care Act 2012": "ukpga/2012/7",
	"Mental Health Units (Use of Force) Act 2018": "ukpga/2018/27",
	"Autism Act 2009": "ukpga/2009/15",
	"Children Act 1989": "ukpga/1989/41",
	"Children Act 2004": "ukpga/2004/31",
	"Safeguarding Vulnerable Groups Act 2006": "ukpga/2006/47",
	}

	all_sections = []
	client = httpx.Client(timeout=60.0)

	for act_name, leg_id in NURSING_ACTS.items():
	print(f"\n--- {act_name} ({leg_id}) ---")

	# Try searching for all sections of this Act
	try:
	# Use section search endpoint
	resp = client.post(
	f"{BASE}/legislation/section/search",
	json={
	"query": act_name,
	"legislation_id": leg_id,
	"size": 200,
	"include_text": True,
	},
	)
	resp.raise_for_status()
	data = resp.json()

	sections = data if isinstance(data, list) else data.get("results", data.get("sections", []))
	print(f" Got {len(sections)} sections")

	for section in sections:
	section["act_name"] = act_name
	section["legislation_id"] = leg_id
	all_sections.append(section)

	time.sleep(1) # Rate limiting

	except Exception as e:
	print(f" ERROR: {type(e).__name__}: {e}")

	# Fallback: try getting sections one by one using browse
	try:
	resp2 = client.get(
	f"{BASE}/legislation/{leg_id}/sections",
	params={"limit": 200},
	)
	resp2.raise_for_status()
	data2 = resp2.json()
	sections2 = data2 if isinstance(data2, list) else data2.get("sections", [])
	print(f" Fallback got {len(sections2)} sections")

	for section in sections2:
	section["act_name"] = act_name
	section["legislation_id"] = leg_id
	all_sections.append(section)

	except Exception as e2:
	print(f" Fallback also failed: {type(e2).__name__}: {e2}")

	time.sleep(1)

	client.close()

	print(f"\n=== TOTAL: {len(all_sections)} sections ===")

	# Save
	output_path = r"c:\Users\g0226\Downloads\Ai Education\NurseLex\nursing_sections.json"
	with open(output_path, "w", encoding="utf-8") as f:
	json.dump(all_sections, f, indent=2, ensure_ascii=False)

	print(f"Saved to {output_path}")
	print(f"File size: {len(json.dumps(all_sections)) // 1024} KB")

	# Show sample
	if all_sections:
	s = all_sections[0]
	print(f"\nSample keys: {list(s.keys())}")
	print(f"Sample title: {s.get('title', '?')}")
	print(f"Sample text preview: {str(s.get('text', ''))[:200]}")