import requests import xml.etree.ElementTree as ET import json import time import os # --- Configuration --- # Replace with your own nation name or contact info. USER_AGENT = "NS Issue Search dev update script (Jiangbei)" CACHE_FILE = "../parsed_ga_resolutions.json" API_BASE_URL = "https://www.nationstates.net/cgi-bin/api.cgi" COUNCIL_ID = 1 # 1 for General Assembly, 2 for Security Council def load_cache(filename): """Loads existing resolutions from the JSON cache file.""" if not os.path.exists(filename): print(f"Cache file '{filename}' not found. Will start from scratch.") return {} try: with open(filename, 'r', encoding='utf-8') as f: resolutions_list = json.load(f) # Convert list to a dictionary keyed by resolution ID for fast lookups return {res['id']: res for res in resolutions_list} except (json.JSONDecodeError, IOError) as e: print(f"Error reading cache file '{filename}': {e}. Starting from scratch.") return {} def save_cache(filename, resolutions_dict): """Saves the resolutions dictionary to the JSON cache file.""" try: # Convert the dictionary values back to a list and sort by ID sorted_resolutions = sorted(resolutions_dict.values(), key=lambda r: r['id']) with open(filename, 'w', encoding='utf-8') as f: json.dump(sorted_resolutions, f, indent=2) print(f"Successfully saved {len(sorted_resolutions)} resolutions to '{filename}'.") except IOError as e: print(f"Error writing to cache file '{filename}': {e}") def parse_resolution_xml(xml_string): """ Parses a single XML string from the NationStates API into a structured dictionary. Args: xml_string: The XML content from the API response. Returns: A dictionary representing the resolution data, or None if parsing fails or resolution is empty. """ try: root = ET.fromstring(xml_string) res_node = root.find('RESOLUTION') # If the RESOLUTION tag is empty, it means the resolution doesn't exist. if res_node is None or not list(res_node): return None data = {} # Iterate through all direct child tags of for child in res_node: # Special case for COAUTHOR, which has multiple children if child.tag == 'COAUTHOR': co_authors = [n.text for n in child.findall('N')] if co_authors: data['co_authors'] = co_authors continue # Skip to the next tag key = child.tag.lower() value = child.text # Try to convert numeric values to integers try: data[key] = int(value) except (ValueError, TypeError): data[key] = value # --- Map API fields to desired dictionary structure --- # Keep required fields with consistent naming if 'name' in data: data['title'] = data.pop('name') if 'desc' in data: data['body'] = data.pop('desc') # Keep BBCode as text if 'councilid' in data: data['id'] = data.pop('councilid') # councilid is resolution id # Determine status and structure repeal information if 'repealed_by' in data: data['status'] = 'Repealed' data['repealed_by'] = { 'id': data.pop('repealed_by'), 'timestamp': data.pop('repealed', None) } else: data['status'] = 'Active' # Structure info for resolutions that ARE repeals if 'repeals_resid' in data: data['repeals'] = { 'id': data.pop('repeals_resid'), 'council': data.pop('repeals_councilid') } return data except ET.ParseError as e: print(f"Error parsing XML: {e}") return None def main(): """Main function to fetch, parse, and cache resolutions.""" print("--- World Assembly Resolution Fetcher ---") # Load existing resolutions from cache cached_resolutions = load_cache(CACHE_FILE) if cached_resolutions: # Find the latest resolution ID we already have and start from the next one start_id = max(cached_resolutions.keys()) + 1 print(f"Loaded {len(cached_resolutions)} resolutions from cache. Starting fetch from GA#{start_id}.") else: start_id = 1 # --- API Request Loop --- session = requests.Session() session.headers.update({'User-Agent': USER_AGENT}) current_id = start_id newly_fetched = [] rate_limit_info = { 'remaining': 50, 'reset_in': 30 } while True: # Check if we are about to exceed the rate limit if rate_limit_info['remaining'] < 2: wait_time = rate_limit_info['reset_in'] + 1 # Add a small buffer print(f"Rate limit approaching. Waiting for {wait_time} seconds...") time.sleep(wait_time) print(f"Fetching resolution GA#{current_id}...") params = {'wa': COUNCIL_ID, 'id': current_id, 'q': 'resolution'} try: response = session.get(API_BASE_URL, params=params, timeout=15) # Update rate limit info from headers after every request rate_limit_info['remaining'] = int(response.headers.get('RateLimit-Remaining', 50)) rate_limit_info['reset_in'] = int(response.headers.get('RateLimit-Reset', 30)) # Handle API responses if response.status_code == 429: retry_after = int(response.headers.get('Retry-After', 30)) print(f"Rate limit exceeded (429). Waiting for {retry_after} seconds as requested by API.") time.sleep(retry_after) continue # Retry the same ID response.raise_for_status() # Raises an error for other bad responses (4xx or 5xx) except requests.exceptions.RequestException as e: print(f"An error occurred during request for GA#{current_id}: {e}") print("Stopping script. Run again to resume.") break # Parse the response content parsed_data = parse_resolution_xml(response.text) if parsed_data: newly_fetched.append(parsed_data) current_id += 1 time.sleep(0.7) # Be polite: 50 requests/30s = 0.6s per request. Add a small delay. else: # API returns empty for non-existent IDs, signaling we are done. print(f"GA#{current_id} does not exist. Assuming it's the last one.") print("--- Fetching complete. ---") break # --- Post-Fetch Processing --- if not newly_fetched: print("No new resolutions found. Cache is up-to-date.") return print(f"Fetched {len(newly_fetched)} new resolutions.") # Update cache with new data updates_made = 0 for res in newly_fetched: # Check if this new resolution repeals an older one if res['status'] == 'Repealed' and res.get('repealed_by'): repealed_id = res['id'] # Check if we have the repealed resolution in our cache if repealed_id in cached_resolutions and cached_resolutions[repealed_id]['status'] == 'Active': print( f"Updating status for GA#{repealed_id}: was Active, now Repealed by GA#{res['repealed_by']['id']}.") cached_resolutions[repealed_id]['status'] = 'Repealed' cached_resolutions[repealed_id]['repealed_by'] = res['repealed_by'] updates_made += 1 # Add the new resolution to our collection cached_resolutions[res['id']] = res if updates_made: print(f"Updated the status of {updates_made} existing resolutions.") # Save the final, complete collection to the cache file save_cache(CACHE_FILE, cached_resolutions) if __name__ == "__main__": main()