|
|
import ast |
|
|
import gradio as gr |
|
|
from pycspwrapper import LVStat |
|
|
import requests |
|
|
from typing import Any, Dict, List, Tuple |
|
|
|
|
|
|
|
|
def get_topics(name:str = '') -> dict: |
|
|
"""Available topics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). |
|
|
|
|
|
Args: |
|
|
name (str): name of the topic. If not defined, function will return all available topics. |
|
|
Returns: |
|
|
dict: The dictionary of topics, where key is topic name and value is topic code. |
|
|
Examples: |
|
|
>>> get_topics('vide') |
|
|
{'Vide': 'ENV'} |
|
|
>>> print(get_topics()) |
|
|
{'Iedzīvotāji': 'POP', 'Darbs': 'EMP', 'Sociālā aizsardzība un veselība': 'VES',... |
|
|
""" |
|
|
name_capit = name.capitalize() |
|
|
base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/' |
|
|
content = requests.get(base_url) |
|
|
content_short = {i['text']: i['id'] for i in content.json()} |
|
|
|
|
|
if name_capit in content_short.keys(): |
|
|
return {name_capit: content_short[name_capit]} |
|
|
else: |
|
|
return content_short |
|
|
|
|
|
|
|
|
def get_topic_content(topic: str) -> dict: |
|
|
"""Available contents of the topic from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). |
|
|
|
|
|
Args: |
|
|
topic (str): topic code. Use get_topics to get topic code. |
|
|
Returns: |
|
|
dict: The dictionary of the contents of the topic, where key is the topic content and value is the topic content code. |
|
|
Examples: |
|
|
>>> # First get topic code |
|
|
... get_topics('vide') |
|
|
{'Vide': 'ENV'} |
|
|
>>> # Then use this code to get content |
|
|
... print(get_topic_content('ENV')) |
|
|
{'Vides konti': 'VI', 'Atkritumu apsaimniekošana': 'AK', 'Agro-vides rādītāji': 'AV',... |
|
|
>>> get_topics('Iedzīvotāji') |
|
|
{'Iedzīvotāji': 'POP'} |
|
|
>>> print(get_topic_content('POP')) |
|
|
{'Iedzīvotāju skaits un raksturojošie rādītāji': 'IR', 'Dzimstība': 'ID', 'Mirstība': 'IM', 'Nāves cēloņi': 'NC',... |
|
|
""" |
|
|
base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/' |
|
|
content = requests.get(base_url+topic.upper()) |
|
|
content_short = {i['text']: i['id'] for i in content.json()} |
|
|
return content_short |
|
|
|
|
|
|
|
|
def get_titles(topic_content_code:str = '', |
|
|
url:str = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*') -> dict: |
|
|
"""Available data (titles) from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). |
|
|
|
|
|
Args: |
|
|
topic_content_code (str): topic content code. Use get_topic_content to get topic content code. |
|
|
If not defined, function will return all available titles. |
|
|
url (str): URL from where to get list of available titles. Default value: 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*'. |
|
|
Returns: |
|
|
dict: The dictionary of the titles available from Official Statistics Portal, |
|
|
where key is the title name and value is the list of 4 elements: topic code, topic content code, topic sub-content code and report ID. |
|
|
Examples: |
|
|
>>> # First get topic code |
|
|
... get_topics('Darbs') |
|
|
{'Darbs': 'EMP'} |
|
|
>>> # Then use this code to get content |
|
|
... print(get_topic_content('EMP')) |
|
|
{'Darba samaksa (algas)': 'DS', 'Darbaspēka izmaksas': 'DI', 'Darbvietas un darba laiks': 'DV',... |
|
|
>>> # Then use this content code to extract report titles |
|
|
... print(get_titles('DS')) |
|
|
{'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],... |
|
|
""" |
|
|
alldb = requests.get(url) |
|
|
dict_result = {} |
|
|
for i in alldb.json(): |
|
|
if topic_content_code == i['path'].split('/')[-2]: |
|
|
dict_result[i['title']] = [j for j in i['path'].split('/') if j]+[i['id']] |
|
|
return dict_result |
|
|
|
|
|
def get_query_values(topic_params: list[str] = []) -> List[Dict]: |
|
|
"""Get query code and values for particular report. |
|
|
|
|
|
Args: |
|
|
topic_params (list[str]): arguments as a list that are needed for data extraction. |
|
|
Arguments in the list should be in the following order: |
|
|
- topic code, |
|
|
- topic content code, |
|
|
- topic sub-content code |
|
|
- report ID. |
|
|
These codes you can get from the function get_titles. |
|
|
Returns: |
|
|
Dict: A dictionary where each key is a query parameter code and the value is another dictionary mapping possible values to their descriptive texts. |
|
|
Examples: |
|
|
>>> # First get report topic parameters from get_titles |
|
|
... print(get_titles('DS')) |
|
|
{'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],... |
|
|
>>> # Then use these values to get possible query values |
|
|
... print(get_query_values(['POP', 'ID', 'IDS', 'IDS010'])) |
|
|
{'SEX_NEWBORN': {'T': 'Pavisam', 'M': 'Vīrieši', 'F': 'Sievietes'}, 'ContentsCode': {'IDS010': 'Dzīvi dzimuši', 'IDS0101': 'Nedzīvi dzimuši', 'IDS0102': 'Nedzīvi dzimuši uz 1000 dzīvi dzimušiem'}, |
|
|
'TIME': {'1920': '1920' ... '2024': '2024'}} |
|
|
""" |
|
|
base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/' |
|
|
url = base_url + '/'.join(topic_params) |
|
|
response = requests.get(url) |
|
|
try: |
|
|
response.raise_for_status() |
|
|
data = response.json() |
|
|
if 'variables' not in data: |
|
|
raise ValueError("Unexpected JSON structure: 'variables' key missing") |
|
|
|
|
|
result = {} |
|
|
|
|
|
for var in data['variables']: |
|
|
code = var.get('code', '') |
|
|
values = var.get('values', []) |
|
|
value_texts = var.get('valueTexts', []) |
|
|
|
|
|
mapping = dict(zip(values, value_texts)) |
|
|
result[code] = mapping |
|
|
|
|
|
return result |
|
|
except requests.exceptions.RequestException as e: |
|
|
raise RuntimeError(f"Request failed: {e}") |
|
|
except ValueError as ve: |
|
|
raise RuntimeError(f"Parsing failed: {ve}") |
|
|
|
|
|
def construct_csp_link(params: list[str]) -> str: |
|
|
base_url = 'https://data.stat.gov.lv/pxweb/lv/OSP_PUB/START__' |
|
|
mid_path = '__'.join(params[:3]) |
|
|
last_part = params[3] |
|
|
return f"{base_url}{mid_path}/{last_part}/" |
|
|
|
|
|
def get_csp_data(lang: str = 'en', topic_params: list[str] = [], **kwargs) -> List[Dict]: |
|
|
"""Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Use 'Source URL' from the Returns to cite the data source. |
|
|
|
|
|
Args: |
|
|
lang (str): Language. Default value 'en'. |
|
|
topic_params (list[str]): arguments as a list that are needed for data extraction. |
|
|
Arguments in the list should be in the following order: |
|
|
- topic code, |
|
|
- topic content code, |
|
|
- topic sub-content code |
|
|
- report ID. |
|
|
These codes you can get from the function get_titles. |
|
|
kwargs: Keyword arguments for query configuration. |
|
|
Possible query argument names and their possible values |
|
|
can be obtained using the function get_query_values. |
|
|
Returns: |
|
|
list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. First list element is data source (URL) from CSB. |
|
|
Examples: |
|
|
>>> topics = ['POP', 'IR', 'IRE', 'IRE010'] |
|
|
>>> query_args = get_query_values(topics) |
|
|
>>> print(query_args) |
|
|
{'ETHNICITY': {'TOTAL': 'Pavisam', 'E_LAT': 'Latvieši', 'E_ABZ': 'Abāzi', 'E_ABK': 'Abhāzi',... |
|
|
'E_SWE': 'Zviedri', 'OTH': 'Cita tautība', 'UNK_NSP': 'Nezināma, neizvēlēta'}, 'ContentsCode': {'IRE010': 'Skaits'}, |
|
|
'TIME': {'1935': '1935', ... '2025': '2025'}} |
|
|
>>> # Then use these codes and values to get data for example Latvians for years 2024 and 2025. |
|
|
>>> # Value text 'Latvieši' explains what 'E_LAT' means. |
|
|
>>> data = get_csp_data( |
|
|
... lang='en', |
|
|
... topic_params=topics, |
|
|
... ETHNICITY=['E_LAT'], |
|
|
... TIME=['2024', '2025'] |
|
|
... ) |
|
|
>>> print(data[0]) |
|
|
{'key': ['E_LAT', '2024'], 'values': ['1186337']} |
|
|
""" |
|
|
csp2 = LVStat(lang, *topic_params) |
|
|
csp2.set_query(**kwargs) |
|
|
|
|
|
link = construct_csp_link(topic_params) |
|
|
|
|
|
cspdata2 = csp2.get_data() |
|
|
return [{"Source URL": link}] + cspdata2['data'] |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("### Latvian CSP Data Query Interface") |
|
|
|
|
|
lang = gr.Dropdown(["en", "lv"], value="en", label="Language") |
|
|
|
|
|
|
|
|
topic_dict = get_topics() |
|
|
topic_dropdown = gr.Dropdown(choices=list(topic_dict.keys()), label="Select Topic") |
|
|
|
|
|
|
|
|
topic_content_dropdown = gr.Dropdown(label="Select Topic Content", visible=False) |
|
|
|
|
|
|
|
|
report_dropdown = gr.Dropdown(label="Select Report", visible=False) |
|
|
|
|
|
|
|
|
link_output = gr.Markdown(visible=False) |
|
|
topic_params_box = gr.Textbox(label="Topic Params", lines=1, interactive=True) |
|
|
|
|
|
|
|
|
kwargs_box = gr.Textbox( |
|
|
label="Query Parameters (Python dict, e.g., {'ETHNICITY': ['E_LAT'], 'TIME': ['2024', '2025']})", |
|
|
lines=4, |
|
|
) |
|
|
output = gr.JSON(label="Result") |
|
|
|
|
|
run_button = gr.Button("Run Query") |
|
|
|
|
|
def update_topic_content(topic_name: str)-> Tuple[Any, Dict[str, str], str]: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
topic_code = topic_dict[topic_name] |
|
|
content_dict = get_topic_content(topic_code) |
|
|
return gr.update(choices=list(content_dict.keys()), visible=True), content_dict, topic_code |
|
|
|
|
|
def update_reports(topic_content_name: str, content_dict: dict = None) -> Tuple[Dict[str, str], str, Any]: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
topic_content_code = content_dict[topic_content_name] |
|
|
titles_dict = get_titles(topic_content_code) |
|
|
return titles_dict, topic_content_code, gr.update(choices=list(titles_dict.keys()), visible=True) |
|
|
|
|
|
def update_topic_params_and_link(report_title: str, titles_dict: dict) -> Tuple[str, Any, Any]: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
title_value = titles_dict[report_title] |
|
|
topic_params_str = str(title_value) |
|
|
link = construct_csp_link(title_value) |
|
|
|
|
|
try: |
|
|
q = get_query_values(title_value) |
|
|
timeval = q.get('TIME','') |
|
|
query = {} |
|
|
|
|
|
if len(timeval) > 0: |
|
|
query = {'TIME': list(timeval.keys())[-3:]} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
query_str = str(query) |
|
|
except Exception as e: |
|
|
query_str = '{}' |
|
|
|
|
|
return topic_params_str, gr.update(value=f"[{report_title}]({link})", visible=True), gr.update(value=query_str) |
|
|
|
|
|
topic_content_state = gr.State() |
|
|
titles_state = gr.State() |
|
|
topic_code_state = gr.State() |
|
|
topic_content_code_state = gr.State() |
|
|
|
|
|
topic_dropdown.change(fn=update_topic_content, inputs=topic_dropdown, outputs=[topic_content_dropdown, topic_content_state, topic_code_state]) |
|
|
topic_content_dropdown.change(fn=update_reports, inputs=[topic_content_dropdown, topic_content_state], |
|
|
outputs=[titles_state, topic_content_code_state, report_dropdown]) |
|
|
report_dropdown.change(fn=update_topic_params_and_link, inputs=[report_dropdown, titles_state], |
|
|
outputs=[topic_params_box, link_output, kwargs_box]) |
|
|
|
|
|
def run_get_csp_data(lang: str = 'en', topic_params_str: str = '[]', query_kwargs_str: str = '{}') -> List[Dict]: |
|
|
"""Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). |
|
|
|
|
|
Args: |
|
|
lang (str): Language. Default value 'en'. |
|
|
topic_params_str (str): string representation of a list that is needed for data extraction. Arguments in the list should be in the following order: topic code, topic content code, topic sub-content code, report ID.\ |
|
|
These codes you can get from the function get_titles. |
|
|
query_kwargs_str (str): string representation of a dictionary - keyword arguments for query configuration.\ |
|
|
Possible query argument names and their possible values can be obtained using the function get_query_values. |
|
|
Returns: |
|
|
list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. |
|
|
Examples: |
|
|
>>> # First get topic code |
|
|
... get_topics('Iedzīvotāji') |
|
|
{'Iedzīvotāji': 'POP'} |
|
|
>>> # Then use this code to get topic contents |
|
|
... print(get_topic_content('POP')) |
|
|
{'Iedzīvotāju skaits un raksturojošie rādītāji': 'IR', 'Dzimstība': 'ID', 'Mirstība': 'IM', 'Nāves cēloņi': 'NC'... |
|
|
>>> # Then use this content code to extract report titles |
|
|
... print(get_titles('IR')) |
|
|
{'Iedzīvotāju skaits gada sākumā, tā izmaiņas un dabiskās kustības galvenie rādītāji 1920 - 2021': ['POP', 'IR', 'IRS010'],... |
|
|
>>> # Use all these previous codes to select data for corresponding report |
|
|
... report = ['POP', 'IR', 'IRE', 'IRE010'] |
|
|
>>> # Get 'code' and 'values' for filtering data |
|
|
... query_args = get_query_values(report) |
|
|
>>> print(query_args) |
|
|
{'ETHNICITY': {'TOTAL': 'Pavisam', 'E_LAT': 'Latvieši',...}, 'ContentsCode': {'IRE010': 'Skaits'}, 'TIME': {'1935': '1935', ..., '2025': '2025'}} |
|
|
>>> # Get final result |
|
|
... data = run_get_csp_data( |
|
|
... lang='en', |
|
|
... topic_params_str = str(report), |
|
|
... query_kwargs_str = '{'ETHNICITY': ['E_LAT'], 'TIME': ['2024', '2025']}' |
|
|
... ) |
|
|
>>> print(data[0]) |
|
|
{'key': ['E_LAT', '2024'], 'values': ['1186337']} |
|
|
""" |
|
|
try: |
|
|
topic_params = ast.literal_eval(topic_params_str) |
|
|
query_kwargs = ast.literal_eval(query_kwargs_str) |
|
|
if not isinstance(topic_params, list) or not isinstance(query_kwargs, dict): |
|
|
raise ValueError("Input format error") |
|
|
result = get_csp_data(lang=lang, topic_params=topic_params, **query_kwargs) |
|
|
return result |
|
|
except Exception as e: |
|
|
return {"error": str(e)} |
|
|
|
|
|
gr.api(get_topics, api_name="get_topic_name_and_id") |
|
|
gr.api(get_topic_content, api_name="get_topic_content_name_and_id") |
|
|
gr.api(get_titles, api_name="get_report_titles") |
|
|
gr.api(get_query_values, api_name="get_query_values") |
|
|
|
|
|
run_button.click(fn=run_get_csp_data, inputs=[lang, topic_params_box, kwargs_box], outputs=output) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(mcp_server=True) |
|
|
|