Spaces:
Sleeping
Sleeping
| import json | |
| import httpx | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| BASE_URL = 'https://lex.lab.i.ai.gov.uk' | |
| INPUT_FILE = 'nursing_sections.json' | |
| def fetch_note_for_section(section): | |
| url = f'{BASE_URL}/explanatory_note/section/search' | |
| act_title = section.get('act_name', '') | |
| section_number = section.get('number', '') | |
| parent_leg_id = section.get('legislation_id', '') | |
| if not act_title or not section_number: | |
| return None | |
| query = f'"{act_title}" Section {section_number}' | |
| payload = { | |
| 'query': query, | |
| 'limit': 5 | |
| } | |
| try: | |
| r = httpx.post(url, json=payload, timeout=15) | |
| r.raise_for_status() | |
| data = r.json() | |
| # We need to find a note that actually belongs to this Act | |
| if isinstance(data, list): | |
| for note in data: | |
| note_leg_id = note.get('legislation_id', '') | |
| if note_leg_id and parent_leg_id in note_leg_id: | |
| # Double check the text or title mentions the section | |
| # Explanatory notes usually format like "Section 2: ..." or "2. ..." or "Paragraph 2" | |
| return { | |
| 'section_uri': section.get('uri'), | |
| 'act_name': act_title, | |
| 'section_number': section_number, | |
| 'note_text': note.get('text', '') | |
| } | |
| except Exception as e: | |
| print(f"Error for {query}: {e}") | |
| return None | |
| def main(): | |
| print("Loading sections...") | |
| with open(INPUT_FILE, 'r', encoding='utf-8') as f: | |
| sections = json.load(f) | |
| print(f"Loaded {len(sections)} sections.") | |
| # Test on a small but diverse subset (MHA 1983, MCA 2005) | |
| test_sections = [] | |
| has_mca = False | |
| for s in sections: | |
| if s.get('number') in [2, 3, 5, 136]: | |
| test_sections.append(s) | |
| if 'Capacity' in s.get('act_name', ''): | |
| has_mca = True | |
| if len(test_sections) > 50 and has_mca: | |
| break | |
| test_sections = test_sections[:20] | |
| print(f"Testing {len(test_sections)} sections...") | |
| notes = {} | |
| with ThreadPoolExecutor(max_workers=5) as executor: | |
| future_to_section = {executor.submit(fetch_note_for_section, s): s for s in test_sections} | |
| for future in as_completed(future_to_section): | |
| s = future_to_section[future] | |
| result = future.result() | |
| if result: | |
| notes[s['uri']] = result | |
| print(f"✅ Found note for {result['act_name']} S.{result['section_number']}") | |
| else: | |
| print(f"❌ No note found for {s.get('act_name')} S.{s.get('number')}") | |
| print(f"Found {len(notes)} notes in test batch.") | |
| with open('test_notes.json', 'w', encoding='utf-8') as f: | |
| json.dump(notes, f, indent=2) | |
| if __name__ == '__main__': | |
| main() | |