Spaces:

NurseCitizenDeveloper
/

NurseLex

Sleeping

File size: 2,991 Bytes

19a3093

import json
import httpx
from concurrent.futures import ThreadPoolExecutor, as_completed

BASE_URL = 'https://lex.lab.i.ai.gov.uk'
INPUT_FILE = 'nursing_sections.json'

def fetch_note_for_section(section):
    url = f'{BASE_URL}/explanatory_note/section/search'
    
    act_title = section.get('act_name', '')
    section_number = section.get('number', '')
    parent_leg_id = section.get('legislation_id', '')
    
    if not act_title or not section_number:
        return None

    query = f'"{act_title}" Section {section_number}'
    
    payload = {
        'query': query,
        'limit': 5
    }
    
    try:
        r = httpx.post(url, json=payload, timeout=15)
        r.raise_for_status()
        data = r.json()
        
        # We need to find a note that actually belongs to this Act
        if isinstance(data, list):
            for note in data:
                note_leg_id = note.get('legislation_id', '')
                if note_leg_id and parent_leg_id in note_leg_id:
                    # Double check the text or title mentions the section
                    # Explanatory notes usually format like "Section 2: ..." or "2. ..." or "Paragraph 2"
                    return {
                        'section_uri': section.get('uri'),
                        'act_name': act_title,
                        'section_number': section_number,
                        'note_text': note.get('text', '')
                    }
    except Exception as e:
        print(f"Error for {query}: {e}")
        
    return None

def main():
    print("Loading sections...")
    with open(INPUT_FILE, 'r', encoding='utf-8') as f:
        sections = json.load(f)
        
    print(f"Loaded {len(sections)} sections.")
    
    # Test on a small but diverse subset (MHA 1983, MCA 2005)
    test_sections = []
    has_mca = False
    for s in sections:
        if s.get('number') in [2, 3, 5, 136]:
            test_sections.append(s)
            if 'Capacity' in s.get('act_name', ''):
                has_mca = True
        if len(test_sections) > 50 and has_mca:
            break
            
    test_sections = test_sections[:20] 

    print(f"Testing {len(test_sections)} sections...")
    notes = {}
    
    with ThreadPoolExecutor(max_workers=5) as executor:
        future_to_section = {executor.submit(fetch_note_for_section, s): s for s in test_sections}
        for future in as_completed(future_to_section):
            s = future_to_section[future]
            result = future.result()
            if result:
                notes[s['uri']] = result
                print(f"✅ Found note for {result['act_name']} S.{result['section_number']}")
            else:
                print(f"❌ No note found for {s.get('act_name')} S.{s.get('number')}")
                
    print(f"Found {len(notes)} notes in test batch.")
    
    with open('test_notes.json', 'w', encoding='utf-8') as f:
        json.dump(notes, f, indent=2)

if __name__ == '__main__':
    main()