import json import httpx from concurrent.futures import ThreadPoolExecutor, as_completed BASE_URL = 'https://lex.lab.i.ai.gov.uk' INPUT_FILE = 'nursing_sections.json' def fetch_note_for_section(section): url = f'{BASE_URL}/explanatory_note/section/search' act_title = section.get('act_name', '') section_number = section.get('number', '') parent_leg_id = section.get('legislation_id', '') if not act_title or not section_number: return None query = f'"{act_title}" Section {section_number}' payload = { 'query': query, 'limit': 5 } try: r = httpx.post(url, json=payload, timeout=15) r.raise_for_status() data = r.json() # We need to find a note that actually belongs to this Act if isinstance(data, list): for note in data: note_leg_id = note.get('legislation_id', '') if note_leg_id and parent_leg_id in note_leg_id: # Double check the text or title mentions the section # Explanatory notes usually format like "Section 2: ..." or "2. ..." or "Paragraph 2" return { 'section_uri': section.get('uri'), 'act_name': act_title, 'section_number': section_number, 'note_text': note.get('text', '') } except Exception as e: print(f"Error for {query}: {e}") return None def main(): print("Loading sections...") with open(INPUT_FILE, 'r', encoding='utf-8') as f: sections = json.load(f) print(f"Loaded {len(sections)} sections.") # Test on a small but diverse subset (MHA 1983, MCA 2005) test_sections = [] has_mca = False for s in sections: if s.get('number') in [2, 3, 5, 136]: test_sections.append(s) if 'Capacity' in s.get('act_name', ''): has_mca = True if len(test_sections) > 50 and has_mca: break test_sections = test_sections[:20] print(f"Testing {len(test_sections)} sections...") notes = {} with ThreadPoolExecutor(max_workers=5) as executor: future_to_section = {executor.submit(fetch_note_for_section, s): s for s in test_sections} for future in as_completed(future_to_section): s = future_to_section[future] result = future.result() if result: notes[s['uri']] = result print(f"✅ Found note for {result['act_name']} S.{result['section_number']}") else: print(f"❌ No note found for {s.get('act_name')} S.{s.get('number')}") print(f"Found {len(notes)} notes in test batch.") with open('test_notes.json', 'w', encoding='utf-8') as f: json.dump(notes, f, indent=2) if __name__ == '__main__': main()