File size: 19,642 Bytes
5122a51
 
 
 
50f1ec6
5122a51
 
 
 
 
fae50e0
 
 
 
 
 
 
 
 
 
5122a51
 
 
 
 
 
 
 
 
 
 
 
 
 
fae50e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5122a51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5c45b3
5122a51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e254b2
5122a51
 
 
 
 
 
8e254b2
 
5122a51
 
 
 
 
 
 
 
 
8e254b2
 
 
 
 
 
 
 
 
 
 
 
5122a51
 
 
 
 
cc2dd22
 
 
 
 
5122a51
 
8e254b2
5122a51
 
 
 
 
 
 
 
 
 
 
8e254b2
5122a51
 
cc2dd22
5122a51
 
 
 
8e254b2
 
 
 
 
5122a51
 
 
 
 
 
 
 
 
 
 
cc2dd22
 
5122a51
 
cc2dd22
5122a51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
004d11e
d20beae
 
 
 
 
 
 
 
 
 
 
 
5122a51
 
 
 
d20beae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5122a51
 
95bac5d
5122a51
96440be
d20beae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5122a51
 
 
 
 
 
8e254b2
5122a51
8e254b2
 
 
 
 
 
 
 
5122a51
 
 
 
 
 
 
 
 
 
 
 
3491363
95bac5d
86fb94a
5122a51
 
 
 
 
 
 
958a42d
5122a51
96440be
8e254b2
5122a51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e254b2
5122a51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8dce6d6
 
 
 
 
5122a51
 
 
5a32284
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
import ast
import gradio as gr # pip install "gradio[mcp]"
from pycspwrapper import LVStat # pip install pycspwrapper
import requests
from typing import Any, Dict, List, Tuple


def get_topics(name:str = '') -> dict:
    """Available topics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde).
    
Args:
    name (str): name of the topic. If not defined, function will return all available topics.
Returns:
    dict: The dictionary of topics, where key is topic name and value is topic code.
Examples:
    >>> get_topics('vide')
    {'Vide': 'ENV'}
    >>> print(get_topics())
    {'Iedzīvotāji': 'POP', 'Darbs': 'EMP', 'Sociālā aizsardzība un veselība': 'VES',...
"""
    name_capit = name.capitalize()
    base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/'
    content = requests.get(base_url)
    content_short = {i['text']: i['id'] for i in content.json()}

    if name_capit in content_short.keys():
        return {name_capit: content_short[name_capit]}
    else:
        return content_short


def get_topic_content(topic: str) -> dict:
    """Available contents of the topic from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde).
    
Args:
    topic (str): topic code. Use get_topics to get topic code.
Returns:
    dict: The dictionary of the contents of the topic, where key is the topic content and value is the topic content code.
Examples:
    >>> # First get topic code
    ... get_topics('vide')
    {'Vide': 'ENV'}
    >>> # Then use this code to get content
    ... print(get_topic_content('ENV'))
    {'Vides konti': 'VI', 'Atkritumu apsaimniekošana': 'AK', 'Agro-vides rādītāji': 'AV',...
    >>> get_topics('Iedzīvotāji')
    {'Iedzīvotāji': 'POP'}
    >>> print(get_topic_content('POP'))
    {'Iedzīvotāju skaits un raksturojošie rādītāji': 'IR', 'Dzimstība': 'ID', 'Mirstība': 'IM', 'Nāves cēloņi': 'NC',...
"""
    base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/'
    content = requests.get(base_url+topic.upper())
    content_short = {i['text']: i['id'] for i in content.json()}
    return content_short


def get_titles(topic_content_code:str = '',
               url:str = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*') -> dict:
    """Available data (titles) from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde).
    
    Args:
        topic_content_code (str): topic content code. Use get_topic_content to get topic content code.
        If not defined, function will return all available titles.
        url (str): URL from where to get list of available titles. Default value: 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*'.
    Returns:
        dict: The dictionary of the titles available from Official Statistics Portal,
        where key is the title name and value is the list of 4 elements: topic code, topic content code, topic sub-content code and report ID.
    Examples:
        >>> # First get topic code
        ... get_topics('Darbs')
        {'Darbs': 'EMP'}
        >>> # Then use this code to get content
        ... print(get_topic_content('EMP'))
        {'Darba samaksa (algas)': 'DS', 'Darbaspēka izmaksas': 'DI', 'Darbvietas un darba laiks': 'DV',...
        >>> # Then use this content code to extract report titles
        ... print(get_titles('DS'))
        {'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],...
    """
    alldb = requests.get(url)
    dict_result = {}
    for i in alldb.json():
        if topic_content_code == i['path'].split('/')[-2]:
            dict_result[i['title']] = [j for j in i['path'].split('/') if j]+[i['id']]
    return dict_result

def get_query_values(topic_params: list[str] = []) -> List[Dict]:
    """Get query code and values for particular report.
    
    Args:
        topic_params (list[str]): arguments as a list that are needed for data extraction.
            Arguments in the list should be in the following order:
            - topic code,
            - topic content code,
            - topic sub-content code
            - report ID.
            These codes you can get from the function get_titles.
    Returns:
        Dict: A dictionary where each key is a query parameter code and the value is another dictionary mapping possible values to their descriptive texts.
    Examples:
        >>> # First get report topic parameters from get_titles
        ... print(get_titles('DS'))
        {'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],...
        >>> # Then use these values to get possible query values
        ... print(get_query_values(['POP', 'ID', 'IDS', 'IDS010']))
        {'SEX_NEWBORN': {'T': 'Pavisam', 'M': 'Vīrieši', 'F': 'Sievietes'}, 'ContentsCode': {'IDS010': 'Dzīvi dzimuši', 'IDS0101': 'Nedzīvi dzimuši', 'IDS0102': 'Nedzīvi dzimuši uz 1000 dzīvi dzimušiem'}, 
        'TIME': {'1920': '1920' ... '2024': '2024'}}
    """
    base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/'
    url = base_url + '/'.join(topic_params)
    response = requests.get(url)
    try:
        response.raise_for_status()  # Raises HTTPError for bad responses (4xx, 5xx)
        data = response.json()
        if 'variables' not in data:
            raise ValueError("Unexpected JSON structure: 'variables' key missing")
        
        result = {}

        for var in data['variables']:
            code = var.get('code', '')
            values = var.get('values', [])
            value_texts = var.get('valueTexts', [])
            # build dict mapping value -> valueText
            mapping = dict(zip(values, value_texts))
            result[code] = mapping
        
        return result
    except requests.exceptions.RequestException as e:
        raise RuntimeError(f"Request failed: {e}")
    except ValueError as ve:
        raise RuntimeError(f"Parsing failed: {ve}")

def construct_csp_link(params: list[str]) -> str:
    base_url = 'https://data.stat.gov.lv/pxweb/lv/OSP_PUB/START__'
    mid_path = '__'.join(params[:3])
    last_part = params[3]
    return f"{base_url}{mid_path}/{last_part}/"

def get_csp_data(lang: str = 'en', topic_params: list[str] = [], **kwargs) -> List[Dict]:
    """Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Use 'Source URL' from the Returns to cite the data source.
    
    Args:
        lang (str): Language. Default value 'en'.
        topic_params (list[str]): arguments as a list that are needed for data extraction.
            Arguments in the list should be in the following order:
            - topic code,
            - topic content code,
            - topic sub-content code
            - report ID.
            These codes you can get from the function get_titles.
        kwargs: Keyword arguments for query configuration.
            Possible query argument names and their possible values
            can be obtained using the function get_query_values.
    Returns:
        list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. First list element is data source (URL) from CSB.
    Examples:
        >>> topics = ['POP', 'IR', 'IRE', 'IRE010']
        >>> query_args = get_query_values(topics)
        >>> print(query_args)
        {'ETHNICITY': {'TOTAL': 'Pavisam', 'E_LAT': 'Latvieši', 'E_ABZ': 'Abāzi', 'E_ABK': 'Abhāzi',...
        'E_SWE': 'Zviedri', 'OTH': 'Cita tautība', 'UNK_NSP': 'Nezināma, neizvēlēta'}, 'ContentsCode': {'IRE010': 'Skaits'}, 
        'TIME': {'1935': '1935', ... '2025': '2025'}}
        >>> # Then use these codes and values to get data for example Latvians for years 2024 and 2025.
        >>> # Value text 'Latvieši' explains what 'E_LAT' means.
        >>> data = get_csp_data(
        ...     lang='en',
        ...     topic_params=topics,
        ...     ETHNICITY=['E_LAT'],
        ...     TIME=['2024', '2025']
        ... )
        >>> print(data[0])
        {'key': ['E_LAT', '2024'], 'values': ['1186337']}
    """
    csp2 = LVStat(lang, *topic_params)
    csp2.set_query(**kwargs)

    link = construct_csp_link(topic_params)
    
    cspdata2 = csp2.get_data()
    return [{"Source URL": link}] + cspdata2['data']


with gr.Blocks() as demo:
    gr.Markdown("### Latvian CSP Data Query Interface")
    
    lang = gr.Dropdown(["en", "lv"], value="en", label="Language")

    # Step 1: Topic Selection
    topic_dict = get_topics()
    topic_dropdown = gr.Dropdown(choices=list(topic_dict.keys()), label="Select Topic")

    # Step 2: Topic Content (dynamically populated)
    topic_content_dropdown = gr.Dropdown(label="Select Topic Content", visible=False)

    # Step 3: Report Titles (dynamically populated)
    report_dropdown = gr.Dropdown(label="Select Report", visible=False)

    # Dynamic Link & Topic Params Output
    link_output = gr.Markdown(visible=False)
    topic_params_box = gr.Textbox(label="Topic Params", lines=1, interactive=True)

    #topic_params = gr.Textbox(label="Topic Params (Python list, e.g., ['POP', 'IR', 'IRE', 'IRE010'])")
    kwargs_box = gr.Textbox(
        label="Query Parameters (Python dict, e.g., {'ETHNICITY': ['E_LAT'], 'TIME': ['2024', '2025']})",
        lines=4,
    )
    output = gr.JSON(label="Result")

    run_button = gr.Button("Run Query")

    def update_topic_content(topic_name: str)-> Tuple[Any, Dict[str, str], str]:
#        """Given a topic name, updates the UI dropdown choices with the corresponding content,
#        and returns the content dictionary (what content is available under given topic_name) and internal topic code (ID of the topic_name).
#
#    Args:
#        topic_name (str): The name of the selected topic. Possible topic names: 'Darbs', 'Iedzīvotāji', 'Informācijas tehnoloģijas', 'Izglītība, kultūra un zinātne', 'Nozares',\
#        'Sociālā aizsardzība un veselība', 'Tirdzniecība un pakalpojumi', 'Uzņēmējdarbība', 'Valsts un ekonomika', 'Vide'.        
#    Returns:
#        tuple:
#            - gr.update: Gradio UI update object with new dropdown choices and visibility set to True.
#            - content_dict (dict): Dictionary containing content entries for the selected topic.
#            - topic_code (str): Internal code corresponding to the topic name.
#    """
        topic_code = topic_dict[topic_name]
        content_dict = get_topic_content(topic_code)
        return gr.update(choices=list(content_dict.keys()), visible=True), content_dict, topic_code

    def update_reports(topic_content_name: str, content_dict: dict = None) -> Tuple[Dict[str, str], str, Any]:
#        """Updates the UI dropdown menu with available report titles for a selected topic content, and returns the titles dictionary and the corresponding content code.
#        
#    Args:
#        topic_content_name (str): The name of the selected topic content (e.g., "Darba samaksa (algas)"). Topic content names you can get from `update_topic_content` (`content_dict` object).
#        content_dict (dict): A dictionary mapping topic content names to their corresponding content codes. Obtained from `update_topic_content` (2nd returned object: `content_dict`).
#        topic_name (str): The name of the selected topic. Possible topic names: 'Darbs', 'Iedzīvotāji', 'Informācijas tehnoloģijas', 'Izglītība, kultūra un zinātne', 'Nozares',\
#        'Sociālā aizsardzība un veselība', 'Tirdzniecība un pakalpojumi', 'Uzņēmējdarbība', 'Valsts un ekonomika', 'Vide'.
#    Returns:
#        tuple:
#            - gr.update: A Gradio UI update object to populate a dropdown with the list of report titles and make it visible.
#            - titles_dict (dict): A dictionary of available report titles from the Official Statistics Portal
#                                  of Latvia (CSP). Each key is a human-readable report title, and each value
#                                  is a list of metadata: `[topic_code, topic_content_code, sub_content_code, report_id]`.
#                                  This list (as a string) of metadata later can be used in the `run_get_csp_data` function (into topic_params_str parameter)
#            - topic_content_code (str): The internal code associated with the selected topic content, used to retrieve titles via the `get_titles` function.
#    Dependencies:
#        - Relies on `get_titles(topic_content_code)` to fetch metadata from the CSP's API at:
#          https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*
#    """
        topic_content_code = content_dict[topic_content_name]
        titles_dict = get_titles(topic_content_code)
        return titles_dict, topic_content_code, gr.update(choices=list(titles_dict.keys()), visible=True)

    def update_topic_params_and_link(report_title: str, titles_dict: dict) -> Tuple[str, Any, Any]:
#        """Prepares and returns metadata, a hyperlink, and query parameter preview for a selected report
#        from the Official Statistics Portal of Latvia (CSP).
#
#    Args:
#        report_title (str): The title of the selected report, as shown in the dropdown.
#        titles_dict (dict): Dictionary mapping report titles to their metadata list:
#                            [topic_code, content_code, sub_content_code, report_id],
#                            typically retrieved using `get_titles(topic_content_code)`.
#    Returns:
#        tuple:
#            - topic_params_str (str): String representation of the internal report metadata (code list),
#                                      useful for debugging or internal reference.
#                                      This scring can be used in the `run_get_csp_data` (parameter `topic_params_str`).
#            - gr.update: Gradio component update with a Markdown-style hyperlink pointing to the
#                         CSP page for the selected report.
#            - gr.update: Gradio component update showing a sample query parameter, particularly
#                         for the `TIME` dimension if present, using the most recent 3 values.
#    Details:
#        - The function extracts the internal metadata for the selected report.
#        - It generates a URL using `construct_csp_link(...)` that links directly to the CSP report page.
#        - It attempts to fetch available query parameters using `get_query_values(...)`, then isolates
#          the `TIME` filter and selects the last 3 available values (e.g., most recent years).
#        - If fetching query parameters fails, an empty dictionary (`'{}'`) is returned as the fallback.
#    Example Output:
#        - topic_params_str: "['POP', 'ID', 'IDS', 'IDS010']"
#        - link (Markdown): "[Dzimušo skaits pēc dzimuma](https://data.stat.gov.lv/.../IDS010)"
#        - query_str: "{'TIME': ['2020', '2021', '2022']}"
#    """
        title_value = titles_dict[report_title]
        topic_params_str = str(title_value)
        link = construct_csp_link(title_value)

        try:
            q = get_query_values(title_value)
            timeval = q.get('TIME','')
            query = {}

            if len(timeval) > 0:
                query = {'TIME': list(timeval.keys())[-3:]}
                
            # for i in q:
            #     if i.get('code', '') == 'TIME':
            #         query = {'TIME': i['values'][-3:]}
            #         break
            query_str = str(query)
        except Exception as e:
            query_str = '{}'

        return topic_params_str, gr.update(value=f"[{report_title}]({link})", visible=True), gr.update(value=query_str)

    topic_content_state = gr.State()
    titles_state = gr.State()
    topic_code_state = gr.State()
    topic_content_code_state = gr.State()

    topic_dropdown.change(fn=update_topic_content, inputs=topic_dropdown, outputs=[topic_content_dropdown, topic_content_state, topic_code_state])
    topic_content_dropdown.change(fn=update_reports, inputs=[topic_content_dropdown, topic_content_state], #, topic_code_state],
                                  outputs=[titles_state, topic_content_code_state, report_dropdown])
    report_dropdown.change(fn=update_topic_params_and_link, inputs=[report_dropdown, titles_state], #, topic_code_state, topic_content_code_state],
                           outputs=[topic_params_box, link_output, kwargs_box])
    
    def run_get_csp_data(lang: str = 'en', topic_params_str: str = '[]', query_kwargs_str: str = '{}') -> List[Dict]:
        """Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde).
    
    Args:
        lang (str): Language. Default value 'en'.
        topic_params_str (str): string representation of a list that is needed for data extraction. Arguments in the list should be in the following order: topic code, topic content code, topic sub-content code, report ID.\
            These codes you can get from the function get_titles.
        query_kwargs_str (str): string representation of a dictionary - keyword arguments for query configuration.\
            Possible query argument names and their possible values can be obtained using the function get_query_values.
    Returns:
        list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values.
    Examples:
        >>> # First get topic code
        ... get_topics('Iedzīvotāji')
        {'Iedzīvotāji': 'POP'}
        >>> # Then use this code to get topic contents
        ... print(get_topic_content('POP'))
        {'Iedzīvotāju skaits un raksturojošie rādītāji': 'IR', 'Dzimstība': 'ID', 'Mirstība': 'IM', 'Nāves cēloņi': 'NC'...
        >>> # Then use this content code to extract report titles
        ... print(get_titles('IR'))
        {'Iedzīvotāju skaits gada sākumā, tā izmaiņas un dabiskās kustības galvenie rādītāji 1920 - 2021': ['POP', 'IR', 'IRS010'],...
        >>> # Use all these previous codes to select data for corresponding report
        ... report = ['POP', 'IR', 'IRE', 'IRE010']
        >>> # Get 'code' and 'values' for filtering data
        ... query_args = get_query_values(report)
        >>> print(query_args)
        {'ETHNICITY': {'TOTAL': 'Pavisam', 'E_LAT': 'Latvieši',...}, 'ContentsCode': {'IRE010': 'Skaits'}, 'TIME': {'1935': '1935', ..., '2025': '2025'}}
        >>> # Get final result
        ... data = run_get_csp_data(
        ...        lang='en',
        ...        topic_params_str = str(report),
        ...        query_kwargs_str = '{'ETHNICITY': ['E_LAT'], 'TIME': ['2024', '2025']}'
        ...        )
        >>> print(data[0])
        {'key': ['E_LAT', '2024'], 'values': ['1186337']}
    """
        try:
            topic_params = ast.literal_eval(topic_params_str)
            query_kwargs = ast.literal_eval(query_kwargs_str)
            if not isinstance(topic_params, list) or not isinstance(query_kwargs, dict):
                raise ValueError("Input format error")
            result = get_csp_data(lang=lang, topic_params=topic_params, **query_kwargs)
            return result
        except Exception as e:
            return {"error": str(e)}

    gr.api(get_topics, api_name="get_topic_name_and_id")
    gr.api(get_topic_content, api_name="get_topic_content_name_and_id")
    gr.api(get_titles, api_name="get_report_titles")
    gr.api(get_query_values, api_name="get_query_values")    

    run_button.click(fn=run_get_csp_data, inputs=[lang, topic_params_box, kwargs_box], outputs=output)

if __name__ == "__main__":
    demo.launch(mcp_server=True)