import ast import gradio as gr # pip install "gradio[mcp]" from pycspwrapper import LVStat # pip install pycspwrapper import requests from typing import Any, Dict, List, Tuple def get_topics(name:str = '') -> dict: """Available topics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: name (str): name of the topic. If not defined, function will return all available topics. Returns: dict: The dictionary of topics, where key is topic name and value is topic code. Examples: >>> get_topics('vide') {'Vide': 'ENV'} >>> print(get_topics()) {'Iedzīvotāji': 'POP', 'Darbs': 'EMP', 'Sociālā aizsardzība un veselība': 'VES',... """ name_capit = name.capitalize() base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/' content = requests.get(base_url) content_short = {i['text']: i['id'] for i in content.json()} if name_capit in content_short.keys(): return {name_capit: content_short[name_capit]} else: return content_short def get_topic_content(topic: str) -> dict: """Available contents of the topic from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: topic (str): topic code. Use get_topics to get topic code. Returns: dict: The dictionary of the contents of the topic, where key is the topic content and value is the topic content code. Examples: >>> # First get topic code ... get_topics('vide') {'Vide': 'ENV'} >>> # Then use this code to get content ... print(get_topic_content('ENV')) {'Vides konti': 'VI', 'Atkritumu apsaimniekošana': 'AK', 'Agro-vides rādītāji': 'AV',... >>> get_topics('Iedzīvotāji') {'Iedzīvotāji': 'POP'} >>> print(get_topic_content('POP')) {'Iedzīvotāju skaits un raksturojošie rādītāji': 'IR', 'Dzimstība': 'ID', 'Mirstība': 'IM', 'Nāves cēloņi': 'NC',... """ base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/' content = requests.get(base_url+topic.upper()) content_short = {i['text']: i['id'] for i in content.json()} return content_short def get_titles(topic_content_code:str = '', url:str = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*') -> dict: """Available data (titles) from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: topic_content_code (str): topic content code. Use get_topic_content to get topic content code. If not defined, function will return all available titles. url (str): URL from where to get list of available titles. Default value: 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*'. Returns: dict: The dictionary of the titles available from Official Statistics Portal, where key is the title name and value is the list of 4 elements: topic code, topic content code, topic sub-content code and report ID. Examples: >>> # First get topic code ... get_topics('Darbs') {'Darbs': 'EMP'} >>> # Then use this code to get content ... print(get_topic_content('EMP')) {'Darba samaksa (algas)': 'DS', 'Darbaspēka izmaksas': 'DI', 'Darbvietas un darba laiks': 'DV',... >>> # Then use this content code to extract report titles ... print(get_titles('DS')) {'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],... """ alldb = requests.get(url) dict_result = {} for i in alldb.json(): if topic_content_code == i['path'].split('/')[-2]: dict_result[i['title']] = [j for j in i['path'].split('/') if j]+[i['id']] return dict_result def get_query_values(topic_params: list[str] = []) -> List[Dict]: """Get query code and values for particular report. Args: topic_params (list[str]): arguments as a list that are needed for data extraction. Arguments in the list should be in the following order: - topic code, - topic content code, - topic sub-content code - report ID. These codes you can get from the function get_titles. Returns: Dict: A dictionary where each key is a query parameter code and the value is another dictionary mapping possible values to their descriptive texts. Examples: >>> # First get report topic parameters from get_titles ... print(get_titles('DS')) {'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],... >>> # Then use these values to get possible query values ... print(get_query_values(['POP', 'ID', 'IDS', 'IDS010'])) {'SEX_NEWBORN': {'T': 'Pavisam', 'M': 'Vīrieši', 'F': 'Sievietes'}, 'ContentsCode': {'IDS010': 'Dzīvi dzimuši', 'IDS0101': 'Nedzīvi dzimuši', 'IDS0102': 'Nedzīvi dzimuši uz 1000 dzīvi dzimušiem'}, 'TIME': {'1920': '1920' ... '2024': '2024'}} """ base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/' url = base_url + '/'.join(topic_params) response = requests.get(url) try: response.raise_for_status() # Raises HTTPError for bad responses (4xx, 5xx) data = response.json() if 'variables' not in data: raise ValueError("Unexpected JSON structure: 'variables' key missing") result = {} for var in data['variables']: code = var.get('code', '') values = var.get('values', []) value_texts = var.get('valueTexts', []) # build dict mapping value -> valueText mapping = dict(zip(values, value_texts)) result[code] = mapping return result except requests.exceptions.RequestException as e: raise RuntimeError(f"Request failed: {e}") except ValueError as ve: raise RuntimeError(f"Parsing failed: {ve}") def construct_csp_link(params: list[str]) -> str: base_url = 'https://data.stat.gov.lv/pxweb/lv/OSP_PUB/START__' mid_path = '__'.join(params[:3]) last_part = params[3] return f"{base_url}{mid_path}/{last_part}/" def get_csp_data(lang: str = 'en', topic_params: list[str] = [], **kwargs) -> List[Dict]: """Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Use 'Source URL' from the Returns to cite the data source. Args: lang (str): Language. Default value 'en'. topic_params (list[str]): arguments as a list that are needed for data extraction. Arguments in the list should be in the following order: - topic code, - topic content code, - topic sub-content code - report ID. These codes you can get from the function get_titles. kwargs: Keyword arguments for query configuration. Possible query argument names and their possible values can be obtained using the function get_query_values. Returns: list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. First list element is data source (URL) from CSB. Examples: >>> topics = ['POP', 'IR', 'IRE', 'IRE010'] >>> query_args = get_query_values(topics) >>> print(query_args) {'ETHNICITY': {'TOTAL': 'Pavisam', 'E_LAT': 'Latvieši', 'E_ABZ': 'Abāzi', 'E_ABK': 'Abhāzi',... 'E_SWE': 'Zviedri', 'OTH': 'Cita tautība', 'UNK_NSP': 'Nezināma, neizvēlēta'}, 'ContentsCode': {'IRE010': 'Skaits'}, 'TIME': {'1935': '1935', ... '2025': '2025'}} >>> # Then use these codes and values to get data for example Latvians for years 2024 and 2025. >>> # Value text 'Latvieši' explains what 'E_LAT' means. >>> data = get_csp_data( ... lang='en', ... topic_params=topics, ... ETHNICITY=['E_LAT'], ... TIME=['2024', '2025'] ... ) >>> print(data[0]) {'key': ['E_LAT', '2024'], 'values': ['1186337']} """ csp2 = LVStat(lang, *topic_params) csp2.set_query(**kwargs) link = construct_csp_link(topic_params) cspdata2 = csp2.get_data() return [{"Source URL": link}] + cspdata2['data'] with gr.Blocks() as demo: gr.Markdown("### Latvian CSP Data Query Interface") lang = gr.Dropdown(["en", "lv"], value="en", label="Language") # Step 1: Topic Selection topic_dict = get_topics() topic_dropdown = gr.Dropdown(choices=list(topic_dict.keys()), label="Select Topic") # Step 2: Topic Content (dynamically populated) topic_content_dropdown = gr.Dropdown(label="Select Topic Content", visible=False) # Step 3: Report Titles (dynamically populated) report_dropdown = gr.Dropdown(label="Select Report", visible=False) # Dynamic Link & Topic Params Output link_output = gr.Markdown(visible=False) topic_params_box = gr.Textbox(label="Topic Params", lines=1, interactive=True) #topic_params = gr.Textbox(label="Topic Params (Python list, e.g., ['POP', 'IR', 'IRE', 'IRE010'])") kwargs_box = gr.Textbox( label="Query Parameters (Python dict, e.g., {'ETHNICITY': ['E_LAT'], 'TIME': ['2024', '2025']})", lines=4, ) output = gr.JSON(label="Result") run_button = gr.Button("Run Query") def update_topic_content(topic_name: str)-> Tuple[Any, Dict[str, str], str]: # """Given a topic name, updates the UI dropdown choices with the corresponding content, # and returns the content dictionary (what content is available under given topic_name) and internal topic code (ID of the topic_name). # # Args: # topic_name (str): The name of the selected topic. Possible topic names: 'Darbs', 'Iedzīvotāji', 'Informācijas tehnoloģijas', 'Izglītība, kultūra un zinātne', 'Nozares',\ # 'Sociālā aizsardzība un veselība', 'Tirdzniecība un pakalpojumi', 'Uzņēmējdarbība', 'Valsts un ekonomika', 'Vide'. # Returns: # tuple: # - gr.update: Gradio UI update object with new dropdown choices and visibility set to True. # - content_dict (dict): Dictionary containing content entries for the selected topic. # - topic_code (str): Internal code corresponding to the topic name. # """ topic_code = topic_dict[topic_name] content_dict = get_topic_content(topic_code) return gr.update(choices=list(content_dict.keys()), visible=True), content_dict, topic_code def update_reports(topic_content_name: str, content_dict: dict = None) -> Tuple[Dict[str, str], str, Any]: # """Updates the UI dropdown menu with available report titles for a selected topic content, and returns the titles dictionary and the corresponding content code. # # Args: # topic_content_name (str): The name of the selected topic content (e.g., "Darba samaksa (algas)"). Topic content names you can get from `update_topic_content` (`content_dict` object). # content_dict (dict): A dictionary mapping topic content names to their corresponding content codes. Obtained from `update_topic_content` (2nd returned object: `content_dict`). # topic_name (str): The name of the selected topic. Possible topic names: 'Darbs', 'Iedzīvotāji', 'Informācijas tehnoloģijas', 'Izglītība, kultūra un zinātne', 'Nozares',\ # 'Sociālā aizsardzība un veselība', 'Tirdzniecība un pakalpojumi', 'Uzņēmējdarbība', 'Valsts un ekonomika', 'Vide'. # Returns: # tuple: # - gr.update: A Gradio UI update object to populate a dropdown with the list of report titles and make it visible. # - titles_dict (dict): A dictionary of available report titles from the Official Statistics Portal # of Latvia (CSP). Each key is a human-readable report title, and each value # is a list of metadata: `[topic_code, topic_content_code, sub_content_code, report_id]`. # This list (as a string) of metadata later can be used in the `run_get_csp_data` function (into topic_params_str parameter) # - topic_content_code (str): The internal code associated with the selected topic content, used to retrieve titles via the `get_titles` function. # Dependencies: # - Relies on `get_titles(topic_content_code)` to fetch metadata from the CSP's API at: # https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=* # """ topic_content_code = content_dict[topic_content_name] titles_dict = get_titles(topic_content_code) return titles_dict, topic_content_code, gr.update(choices=list(titles_dict.keys()), visible=True) def update_topic_params_and_link(report_title: str, titles_dict: dict) -> Tuple[str, Any, Any]: # """Prepares and returns metadata, a hyperlink, and query parameter preview for a selected report # from the Official Statistics Portal of Latvia (CSP). # # Args: # report_title (str): The title of the selected report, as shown in the dropdown. # titles_dict (dict): Dictionary mapping report titles to their metadata list: # [topic_code, content_code, sub_content_code, report_id], # typically retrieved using `get_titles(topic_content_code)`. # Returns: # tuple: # - topic_params_str (str): String representation of the internal report metadata (code list), # useful for debugging or internal reference. # This scring can be used in the `run_get_csp_data` (parameter `topic_params_str`). # - gr.update: Gradio component update with a Markdown-style hyperlink pointing to the # CSP page for the selected report. # - gr.update: Gradio component update showing a sample query parameter, particularly # for the `TIME` dimension if present, using the most recent 3 values. # Details: # - The function extracts the internal metadata for the selected report. # - It generates a URL using `construct_csp_link(...)` that links directly to the CSP report page. # - It attempts to fetch available query parameters using `get_query_values(...)`, then isolates # the `TIME` filter and selects the last 3 available values (e.g., most recent years). # - If fetching query parameters fails, an empty dictionary (`'{}'`) is returned as the fallback. # Example Output: # - topic_params_str: "['POP', 'ID', 'IDS', 'IDS010']" # - link (Markdown): "[Dzimušo skaits pēc dzimuma](https://data.stat.gov.lv/.../IDS010)" # - query_str: "{'TIME': ['2020', '2021', '2022']}" # """ title_value = titles_dict[report_title] topic_params_str = str(title_value) link = construct_csp_link(title_value) try: q = get_query_values(title_value) timeval = q.get('TIME','') query = {} if len(timeval) > 0: query = {'TIME': list(timeval.keys())[-3:]} # for i in q: # if i.get('code', '') == 'TIME': # query = {'TIME': i['values'][-3:]} # break query_str = str(query) except Exception as e: query_str = '{}' return topic_params_str, gr.update(value=f"[{report_title}]({link})", visible=True), gr.update(value=query_str) topic_content_state = gr.State() titles_state = gr.State() topic_code_state = gr.State() topic_content_code_state = gr.State() topic_dropdown.change(fn=update_topic_content, inputs=topic_dropdown, outputs=[topic_content_dropdown, topic_content_state, topic_code_state]) topic_content_dropdown.change(fn=update_reports, inputs=[topic_content_dropdown, topic_content_state], #, topic_code_state], outputs=[titles_state, topic_content_code_state, report_dropdown]) report_dropdown.change(fn=update_topic_params_and_link, inputs=[report_dropdown, titles_state], #, topic_code_state, topic_content_code_state], outputs=[topic_params_box, link_output, kwargs_box]) def run_get_csp_data(lang: str = 'en', topic_params_str: str = '[]', query_kwargs_str: str = '{}') -> List[Dict]: """Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: lang (str): Language. Default value 'en'. topic_params_str (str): string representation of a list that is needed for data extraction. Arguments in the list should be in the following order: topic code, topic content code, topic sub-content code, report ID.\ These codes you can get from the function get_titles. query_kwargs_str (str): string representation of a dictionary - keyword arguments for query configuration.\ Possible query argument names and their possible values can be obtained using the function get_query_values. Returns: list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. Examples: >>> # First get topic code ... get_topics('Iedzīvotāji') {'Iedzīvotāji': 'POP'} >>> # Then use this code to get topic contents ... print(get_topic_content('POP')) {'Iedzīvotāju skaits un raksturojošie rādītāji': 'IR', 'Dzimstība': 'ID', 'Mirstība': 'IM', 'Nāves cēloņi': 'NC'... >>> # Then use this content code to extract report titles ... print(get_titles('IR')) {'Iedzīvotāju skaits gada sākumā, tā izmaiņas un dabiskās kustības galvenie rādītāji 1920 - 2021': ['POP', 'IR', 'IRS010'],... >>> # Use all these previous codes to select data for corresponding report ... report = ['POP', 'IR', 'IRE', 'IRE010'] >>> # Get 'code' and 'values' for filtering data ... query_args = get_query_values(report) >>> print(query_args) {'ETHNICITY': {'TOTAL': 'Pavisam', 'E_LAT': 'Latvieši',...}, 'ContentsCode': {'IRE010': 'Skaits'}, 'TIME': {'1935': '1935', ..., '2025': '2025'}} >>> # Get final result ... data = run_get_csp_data( ... lang='en', ... topic_params_str = str(report), ... query_kwargs_str = '{'ETHNICITY': ['E_LAT'], 'TIME': ['2024', '2025']}' ... ) >>> print(data[0]) {'key': ['E_LAT', '2024'], 'values': ['1186337']} """ try: topic_params = ast.literal_eval(topic_params_str) query_kwargs = ast.literal_eval(query_kwargs_str) if not isinstance(topic_params, list) or not isinstance(query_kwargs, dict): raise ValueError("Input format error") result = get_csp_data(lang=lang, topic_params=topic_params, **query_kwargs) return result except Exception as e: return {"error": str(e)} gr.api(get_topics, api_name="get_topic_name_and_id") gr.api(get_topic_content, api_name="get_topic_content_name_and_id") gr.api(get_titles, api_name="get_report_titles") gr.api(get_query_values, api_name="get_query_values") run_button.click(fn=run_get_csp_data, inputs=[lang, topic_params_box, kwargs_box], outputs=output) if __name__ == "__main__": demo.launch(mcp_server=True)