Aivis commited on
Commit
8e254b2
·
verified ·
1 Parent(s): cc2dd22

Update app.py

Browse files

Change `get_query_values` output for better readability for LLMs and therefore changing `update_topic_params_and_link` due to different output + correcting descriptions

Files changed (1) hide show
  1. app.py +33 -17
app.py CHANGED
@@ -95,14 +95,15 @@ def get_query_values(topic_params: list[str] = []) -> List[Dict]:
95
  - report ID.
96
  These codes you can get from the function get_titles.
97
  Returns:
98
- list: The list of the dictionaries, where dictionary's 'code' value you should use as an argument name and 'values' value(s) as an argument values.
99
  Examples:
100
  >>> # First get report topic parameters from get_titles
101
  ... print(get_titles('DS'))
102
  {'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],...
103
  >>> # Then use these values to get possible query values
104
  ... print(get_query_values(['POP', 'ID', 'IDS', 'IDS010']))
105
- [{'code': 'SEX_NEWBORN', 'values': ['T', 'M', 'F'], 'valueTexts': ['Pavisam', 'Vīrieši', 'Sievietes']}, {'code': 'TIME', 'values': ['1920', '1921', '1922', '1923',...
 
106
  """
107
  base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/'
108
  url = base_url + '/'.join(topic_params)
@@ -112,10 +113,18 @@ def get_query_values(topic_params: list[str] = []) -> List[Dict]:
112
  data = response.json()
113
  if 'variables' not in data:
114
  raise ValueError("Unexpected JSON structure: 'variables' key missing")
115
- return [{'code': i.get('code', ''),
116
- 'values': i.get('values', ''),
117
- 'valueTexts': i.get('valueTexts', '')}
118
- for i in data['variables']]
 
 
 
 
 
 
 
 
119
  except requests.exceptions.RequestException as e:
120
  raise RuntimeError(f"Request failed: {e}")
121
  except ValueError as ve:
@@ -128,7 +137,7 @@ def construct_csp_link(params: list[str]) -> str:
128
  return f"{base_url}{mid_path}/{last_part}/"
129
 
130
  def get_csp_data(lang: str = 'en', topic_params: list[str] = [], **kwargs) -> List[Dict]:
131
- """Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Always cite 'Source URL' in the output.
132
 
133
  Args:
134
  lang (str): Language. Default value 'en'.
@@ -140,16 +149,19 @@ def get_csp_data(lang: str = 'en', topic_params: list[str] = [], **kwargs) -> Li
140
  - report ID.
141
  These codes you can get from the function get_titles.
142
  kwargs: Keyword arguments for query configuration.
143
- Possible query argument names ('code') and their possible values ('values')
144
  can be obtained using the function get_query_values.
145
- 'valueTexts' from get_query_values could help if 'values' is unclear.
146
  Returns:
147
  list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. First list element is data source (URL) from CSB.
148
  Examples:
149
  >>> topics = ['POP', 'IR', 'IRE', 'IRE010']
150
  >>> query_args = get_query_values(topics)
151
  >>> print(query_args)
152
- [{'code': 'ETHNICITY', 'values': ['TOTAL', 'E_LAT',...], 'valueTexts': ['Pavisam', 'Latvieši',...]}, {'code': 'TIME', 'values': ['1935', '1959',..., '2025'],...}]
 
 
 
 
153
  >>> data = get_csp_data(
154
  ... lang='en',
155
  ... topic_params=topics,
@@ -272,11 +284,16 @@ with gr.Blocks() as demo:
272
 
273
  try:
274
  q = get_query_values(title_value)
 
275
  query = {}
276
- for i in q:
277
- if i.get('code', '') == 'TIME':
278
- query = {'TIME': i['values'][-3:]}
279
- break
 
 
 
 
280
  query_str = str(query)
281
  except Exception as e:
282
  query_str = '{}'
@@ -302,8 +319,7 @@ with gr.Blocks() as demo:
302
  topic_params_str (str): string representation of a list that is needed for data extraction. Arguments in the list should be in the following order: topic code, topic content code, topic sub-content code, report ID.\
303
  These codes you can get from the function get_titles.
304
  query_kwargs_str (str): string representation of a dictionary - keyword arguments for query configuration.\
305
- Possible query argument names ('code') and their possible values ('values') can be obtained using the function get_query_values.\
306
- 'valueTexts' from get_query_values could help if 'values' is unclear.
307
  Returns:
308
  list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values.
309
  Examples:
@@ -321,7 +337,7 @@ with gr.Blocks() as demo:
321
  >>> # Get 'code' and 'values' for filtering data
322
  ... query_args = get_query_values(report)
323
  >>> print(query_args)
324
- [{'code': 'ETHNICITY', 'values': ['TOTAL', 'E_LAT',...], 'valueTexts': ['Pavisam', 'Latvieši',...]}, {'code': 'TIME', 'values': ['1935', '1959',..., '2025'],...}]
325
  >>> # Get final result
326
  ... data = run_get_csp_data(
327
  ... lang='en',
 
95
  - report ID.
96
  These codes you can get from the function get_titles.
97
  Returns:
98
+ Dict: A dictionary where each key is a query parameter code and the value is another dictionary mapping possible values to their descriptive texts.
99
  Examples:
100
  >>> # First get report topic parameters from get_titles
101
  ... print(get_titles('DS'))
102
  {'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],...
103
  >>> # Then use these values to get possible query values
104
  ... print(get_query_values(['POP', 'ID', 'IDS', 'IDS010']))
105
+ {'SEX_NEWBORN': {'T': 'Pavisam', 'M': 'Vīrieši', 'F': 'Sievietes'}, 'ContentsCode': {'IDS010': 'Dzīvi dzimuši', 'IDS0101': 'Nedzīvi dzimuši', 'IDS0102': 'Nedzīvi dzimuši uz 1000 dzīvi dzimušiem'},
106
+ 'TIME': {'1920': '1920' ... '2024': '2024'}}
107
  """
108
  base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/'
109
  url = base_url + '/'.join(topic_params)
 
113
  data = response.json()
114
  if 'variables' not in data:
115
  raise ValueError("Unexpected JSON structure: 'variables' key missing")
116
+
117
+ result = {}
118
+
119
+ for var in data['variables']:
120
+ code = var.get('code', '')
121
+ values = var.get('values', [])
122
+ value_texts = var.get('valueTexts', [])
123
+ # build dict mapping value -> valueText
124
+ mapping = dict(zip(values, value_texts))
125
+ result[code] = mapping
126
+
127
+ return result
128
  except requests.exceptions.RequestException as e:
129
  raise RuntimeError(f"Request failed: {e}")
130
  except ValueError as ve:
 
137
  return f"{base_url}{mid_path}/{last_part}/"
138
 
139
  def get_csp_data(lang: str = 'en', topic_params: list[str] = [], **kwargs) -> List[Dict]:
140
+ """Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Use 'Source URL' from the Returns to cite the data source.
141
 
142
  Args:
143
  lang (str): Language. Default value 'en'.
 
149
  - report ID.
150
  These codes you can get from the function get_titles.
151
  kwargs: Keyword arguments for query configuration.
152
+ Possible query argument names and their possible values
153
  can be obtained using the function get_query_values.
 
154
  Returns:
155
  list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. First list element is data source (URL) from CSB.
156
  Examples:
157
  >>> topics = ['POP', 'IR', 'IRE', 'IRE010']
158
  >>> query_args = get_query_values(topics)
159
  >>> print(query_args)
160
+ {'ETHNICITY': {'TOTAL': 'Pavisam', 'E_LAT': 'Latvieši', 'E_ABZ': 'Abāzi', 'E_ABK': 'Abhāzi',...
161
+ 'E_SWE': 'Zviedri', 'OTH': 'Cita tautība', 'UNK_NSP': 'Nezināma, neizvēlēta'}, 'ContentsCode': {'IRE010': 'Skaits'},
162
+ 'TIME': {'1935': '1935', ... '2025': '2025'}}
163
+ >>> # Then use these codes and values to get data for example Latvians for years 2024 and 2025.
164
+ >>> # Value text 'Latvieši' explains what 'E_LAT' means.
165
  >>> data = get_csp_data(
166
  ... lang='en',
167
  ... topic_params=topics,
 
284
 
285
  try:
286
  q = get_query_values(title_value)
287
+ timeval = q.get('TIME','')
288
  query = {}
289
+
290
+ if len(timeval) > 0:
291
+ query = {'TIME': list(timeval.keys())[-3:]}
292
+
293
+ # for i in q:
294
+ # if i.get('code', '') == 'TIME':
295
+ # query = {'TIME': i['values'][-3:]}
296
+ # break
297
  query_str = str(query)
298
  except Exception as e:
299
  query_str = '{}'
 
319
  topic_params_str (str): string representation of a list that is needed for data extraction. Arguments in the list should be in the following order: topic code, topic content code, topic sub-content code, report ID.\
320
  These codes you can get from the function get_titles.
321
  query_kwargs_str (str): string representation of a dictionary - keyword arguments for query configuration.\
322
+ Possible query argument names and their possible values can be obtained using the function get_query_values.
 
323
  Returns:
324
  list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values.
325
  Examples:
 
337
  >>> # Get 'code' and 'values' for filtering data
338
  ... query_args = get_query_values(report)
339
  >>> print(query_args)
340
+ {'ETHNICITY': {'TOTAL': 'Pavisam', 'E_LAT': 'Latvieši',...}, 'ContentsCode': {'IRE010': 'Skaits'}, 'TIME': {'1935': '1935', ..., '2025': '2025'}}
341
  >>> # Get final result
342
  ... data = run_get_csp_data(
343
  ... lang='en',