File size: 8,003 Bytes
7392937
 
3a373f3
7392937
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a373f3
7392937
3a373f3
 
7392937
3a373f3
 
7392937
3a373f3
7392937
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6eea04f
7392937
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a373f3
 
7392937
 
 
 
 
 
 
 
 
 
 
3a373f3
7392937
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import requests
import xml.etree.ElementTree as ET
import json
import time
import os

# --- Configuration ---
# Replace with your own nation name or contact info.
USER_AGENT = "NS Issue Search dev update script (Jiangbei)"
CACHE_FILE = "../parsed_ga_resolutions.json"
API_BASE_URL = "https://www.nationstates.net/cgi-bin/api.cgi"
COUNCIL_ID = 1  # 1 for General Assembly, 2 for Security Council


def load_cache(filename):
    """Loads existing resolutions from the JSON cache file."""
    if not os.path.exists(filename):
        print(f"Cache file '{filename}' not found. Will start from scratch.")
        return {}

    try:
        with open(filename, 'r', encoding='utf-8') as f:
            resolutions_list = json.load(f)
            # Convert list to a dictionary keyed by resolution ID for fast lookups
            return {res['id']: res for res in resolutions_list}
    except (json.JSONDecodeError, IOError) as e:
        print(f"Error reading cache file '{filename}': {e}. Starting from scratch.")
        return {}


def save_cache(filename, resolutions_dict):
    """Saves the resolutions dictionary to the JSON cache file."""
    try:
        # Convert the dictionary values back to a list and sort by ID
        sorted_resolutions = sorted(resolutions_dict.values(), key=lambda r: r['id'])
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(sorted_resolutions, f, indent=2)
        print(f"Successfully saved {len(sorted_resolutions)} resolutions to '{filename}'.")
    except IOError as e:
        print(f"Error writing to cache file '{filename}': {e}")


def parse_resolution_xml(xml_string):
    """
    Parses a single XML string from the NationStates API into a structured dictionary.

    Args:
        xml_string: The XML content from the API response.

    Returns:
        A dictionary representing the resolution data, or None if parsing fails or resolution is empty.
    """
    try:
        root = ET.fromstring(xml_string)
        res_node = root.find('RESOLUTION')

        # If the RESOLUTION tag is empty, it means the resolution doesn't exist.
        if res_node is None or not list(res_node):
            return None

        data = {}
        # Iterate through all direct child tags of <RESOLUTION>
        for child in res_node:
            # Special case for COAUTHOR, which has multiple <N> children
            if child.tag == 'COAUTHOR':
                co_authors = [n.text for n in child.findall('N')]
                if co_authors:
                    data['co_authors'] = co_authors
                continue  # Skip to the next tag

            key = child.tag.lower()
            value = child.text

            # Try to convert numeric values to integers
            try:
                data[key] = int(value)
            except (ValueError, TypeError):
                data[key] = value

        # --- Map API fields to desired dictionary structure ---
        # Keep required fields with consistent naming
        if 'name' in data: data['title'] = data.pop('name')
        if 'desc' in data: data['body'] = data.pop('desc')  # Keep BBCode as text
        if 'councilid' in data: data['id'] = data.pop('councilid') # councilid is resolution id

        # Determine status and structure repeal information
        if 'repealed_by' in data:
            data['status'] = 'Repealed'
            data['repealed_by'] = {
                'id': data.pop('repealed_by'),
                'timestamp': data.pop('repealed', None)
            }
        else:
            data['status'] = 'Active'

        # Structure info for resolutions that ARE repeals
        if 'repeals_resid' in data:
            data['repeals'] = {
                'id': data.pop('repeals_resid'),
                'council': data.pop('repeals_councilid')
            }

        return data

    except ET.ParseError as e:
        print(f"Error parsing XML: {e}")
        return None


def main():
    """Main function to fetch, parse, and cache resolutions."""
    print("--- World Assembly Resolution Fetcher ---")

    # Load existing resolutions from cache
    cached_resolutions = load_cache(CACHE_FILE)
    if cached_resolutions:
        # Find the latest resolution ID we already have and start from the next one
        start_id = max(cached_resolutions.keys()) + 1
        print(f"Loaded {len(cached_resolutions)} resolutions from cache. Starting fetch from GA#{start_id}.")
    else:
        start_id = 1

    # --- API Request Loop ---
    session = requests.Session()
    session.headers.update({'User-Agent': USER_AGENT})

    current_id = start_id
    newly_fetched = []

    rate_limit_info = {
        'remaining': 50,
        'reset_in': 30
    }

    while True:
        # Check if we are about to exceed the rate limit
        if rate_limit_info['remaining'] < 2:
            wait_time = rate_limit_info['reset_in'] + 1  # Add a small buffer
            print(f"Rate limit approaching. Waiting for {wait_time} seconds...")
            time.sleep(wait_time)

        print(f"Fetching resolution GA#{current_id}...")

        params = {'wa': COUNCIL_ID, 'id': current_id, 'q': 'resolution'}
        try:
            response = session.get(API_BASE_URL, params=params, timeout=15)

            # Update rate limit info from headers after every request
            rate_limit_info['remaining'] = int(response.headers.get('RateLimit-Remaining', 50))
            rate_limit_info['reset_in'] = int(response.headers.get('RateLimit-Reset', 30))

            # Handle API responses
            if response.status_code == 429:
                retry_after = int(response.headers.get('Retry-After', 30))
                print(f"Rate limit exceeded (429). Waiting for {retry_after} seconds as requested by API.")
                time.sleep(retry_after)
                continue  # Retry the same ID

            response.raise_for_status()  # Raises an error for other bad responses (4xx or 5xx)

        except requests.exceptions.RequestException as e:
            print(f"An error occurred during request for GA#{current_id}: {e}")
            print("Stopping script. Run again to resume.")
            break

        # Parse the response content
        parsed_data = parse_resolution_xml(response.text)

        if parsed_data:
            newly_fetched.append(parsed_data)
            current_id += 1
            time.sleep(0.7)  # Be polite: 50 requests/30s = 0.6s per request. Add a small delay.
        else:
            # API returns empty <RESOLUTION> for non-existent IDs, signaling we are done.
            print(f"GA#{current_id} does not exist. Assuming it's the last one.")
            print("--- Fetching complete. ---")
            break

    # --- Post-Fetch Processing ---
    if not newly_fetched:
        print("No new resolutions found. Cache is up-to-date.")
        return

    print(f"Fetched {len(newly_fetched)} new resolutions.")

    # Update cache with new data
    updates_made = 0
    for res in newly_fetched:
        # Check if this new resolution repeals an older one
        if res['status'] == 'Repealed' and res.get('repealed_by'):
            repealed_id = res['id']
            # Check if we have the repealed resolution in our cache
            if repealed_id in cached_resolutions and cached_resolutions[repealed_id]['status'] == 'Active':
                print(
                    f"Updating status for GA#{repealed_id}: was Active, now Repealed by GA#{res['repealed_by']['id']}.")
                cached_resolutions[repealed_id]['status'] = 'Repealed'
                cached_resolutions[repealed_id]['repealed_by'] = res['repealed_by']
                updates_made += 1

        # Add the new resolution to our collection
        cached_resolutions[res['id']] = res

    if updates_made:
        print(f"Updated the status of {updates_made} existing resolutions.")

    # Save the final, complete collection to the cache file
    save_cache(CACHE_FILE, cached_resolutions)


if __name__ == "__main__":
    main()