ESGen / esgen /utils.py
Bohui Zhang
Update the second version
41bef2b
import re
from typing import Optional
import requests
from esgen.config import N_SEARCH_RESULTS, PREFIXES
def wikidata_api_search(inputs: str = " ", search_type: str = "item") -> list:
"""
:param inputs:
:param search_type:
:return: list of choices of the form (name, value)
"""
if search_type not in ["item", "property"]:
search_type = "item"
try:
url = (f"https://www.wikidata.org/w/api.php?action=wbsearchentities"
f"&search={inputs}"
f"&language=en"
f"&type={search_type}"
f"&limit={N_SEARCH_RESULTS}"
f"&format=json")
data = requests.get(url).json()['search']
if not data:
return list()
choices = list()
for item in data:
try:
name = f"{item['display']['label']['value']} ({item['id']}): {item['display']['description']['value']}"
value = f"{item['id']}"
# (name: the displayed name of the checkbox button, value: the value to be passed to the function)
choices.append((name, value))
except (KeyError, IndexError): # skip item if elements are missing
continue
return choices
except (KeyError, IndexError):
return list()
def get_id(uri: str) -> str:
"""
:param uri:
:return:
"""
start_index = uri.rfind("/")
return uri[start_index + 1:]
def get_uri(item_id: str, prefix: Optional[str] = None) -> str:
"""
:param item_id:
:param prefix:
:return:
"""
if prefix is None and ":" in item_id:
prefix, item_id = item_id.split(":")[0], item_id.split(":")[1]
uri = PREFIXES[prefix] + item_id
return uri
elif prefix:
uri = PREFIXES[prefix] + item_id
return uri
else:
return item_id
def position_start_line(shexc_text: str) -> int:
"""
:param shexc_text:
:return:
"""
for idx, line in enumerate(shexc_text.split("\n")):
if line.startswith("start") or line.startswith("<"):
return idx
return 0
def locate_comment(document: list[str], case: str) -> str | int:
"""
locate comment in document
:param document: lines after the comment
:param case:
:return: the next nonempty line in document
"""
if case == "general":
for line in document[1:]:
if line:
return line
else: # case == "constraint"
line = document[0]
if line[:line.index('#')].strip():
return line[:line.index('#')].rstrip()
else:
for line in document[1:]:
if line:
return line
return 0