File size: 3,852 Bytes
b339b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""Parse Congress Legislators JSON files to extract congress numbers from term data."""

from __future__ import annotations

import json
from datetime import date
from pathlib import Path

from .congress_utils import congress_from_date


def parse_legislators_json(json_path: Path) -> list[dict]:
    """

    Parse a legislators JSON file.



    Args:

        json_path: Path to the JSON file.



    Returns:

        List of legislator dictionaries with id, name, bio, and terms.

    """
    with json_path.open(encoding="utf-8") as f:
        return json.load(f)


def extract_bioguide_max_congress(

    current_json_path: Path,

    historical_json_path: Path,

) -> dict[str, int]:
    """

    Extract mapping of bioguide_id to maximum congress number served.



    Parses both current and historical JSON files to calculate the max

    congress number for each legislator based on their term dates.



    Args:

        current_json_path: Path to legislators-current.json

        historical_json_path: Path to legislators-historical.json



    Returns:

        Dictionary mapping bioguide_id to max congress number.

    """
    bioguide_to_max_congress: dict[str, int] = {}

    for json_path in [current_json_path, historical_json_path]:
        if not json_path.exists():
            continue

        legislators = parse_legislators_json(json_path)

        for leg in legislators:
            bioguide_id = leg.get("id", {}).get("bioguide")
            if not bioguide_id:
                continue

            terms = leg.get("terms", [])
            if not terms:
                continue

            # Calculate congress number for each term
            max_congress = 0
            for term in terms:
                # Use end date if available, otherwise start date
                term_date_str = term.get("end") or term.get("start")
                if not term_date_str:
                    continue

                try:
                    term_date = date.fromisoformat(term_date_str)
                    congress = congress_from_date(term_date)
                    max_congress = max(max_congress, congress)
                except ValueError:
                    # Skip invalid dates
                    continue

            if max_congress > 0:
                # Keep the higher value if legislator appears in both files
                existing = bioguide_to_max_congress.get(bioguide_id, 0)
                bioguide_to_max_congress[bioguide_id] = max(existing, max_congress)

    return bioguide_to_max_congress


def filter_bioguides_by_congress(

    bioguide_max_congress: dict[str, int],

    min_congress: int,

) -> set[str]:
    """

    Get set of bioguide_ids that served in min_congress or later.



    Args:

        bioguide_max_congress: Mapping of bioguide_id to max congress.

        min_congress: Minimum congress number (inclusive).



    Returns:

        Set of bioguide_ids that meet the criteria.

    """
    return {
        bioguide_id
        for bioguide_id, max_congress in bioguide_max_congress.items()
        if max_congress >= min_congress
    }


def get_congress_stats(bioguide_max_congress: dict[str, int]) -> dict:
    """

    Get statistics about congress distribution.



    Utility function for debugging and data inspection.



    Args:

        bioguide_max_congress: Mapping of bioguide_id to max congress.



    Returns:

        Dictionary with min, max, and distribution info.

    """
    if not bioguide_max_congress:
        return {"count": 0, "min_congress": None, "max_congress": None}

    congresses = list(bioguide_max_congress.values())
    return {
        "count": len(congresses),
        "min_congress": min(congresses),
        "max_congress": max(congresses),
    }