|
|
import os
|
|
|
|
|
|
import gradio as gr
|
|
|
|
|
|
from esgen.config import NODE_CONSTRAINTS
|
|
|
from esgen.model import ESGenerationModel
|
|
|
from esgen.queries import collect_examples, query_value_type
|
|
|
from esgen.verbalizer import chatbot_verbaliser, init_comment_verbaliser
|
|
|
from esgen.utils import wikidata_api_search, get_id
|
|
|
|
|
|
|
|
|
def entity_schema_generation(name, class_id, threshold, property_types):
|
|
|
"""
|
|
|
|
|
|
:param name:
|
|
|
:param class_id:
|
|
|
:param threshold:
|
|
|
:param property_types:
|
|
|
:return:
|
|
|
"""
|
|
|
try:
|
|
|
model = ESGenerationModel(name, class_id, threshold, property_types)
|
|
|
model.generate_initial_schema()
|
|
|
prop = model.pending["pending"][0]
|
|
|
prop["examples"] = collect_examples(model.class_id, get_id(prop["uri"]))
|
|
|
bot_message = chatbot_verbaliser(prop["label"], get_id(prop["uri"]), prop["frequency"], prop["examples"])
|
|
|
return str(model), model.pending, [[None, bot_message]]
|
|
|
except (IndexError, KeyError):
|
|
|
error_message = "Failed to generate the initial EntitySchema. Please check your input class, retry with a lower cutoff, or raise an issue."
|
|
|
return error_message, dict(), [[None, error_message]]
|
|
|
|
|
|
|
|
|
def input_yes(es_output, es_json, history):
|
|
|
if not es_json["pending"]:
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_output)
|
|
|
model.update_pending(es_json["pending"])
|
|
|
return str(model), model.pending, history
|
|
|
|
|
|
required_prop = es_json["pending"].pop(0)
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_output)
|
|
|
model.insert_triple_constraint(
|
|
|
shape_id=model.name,
|
|
|
predicate=get_id(required_prop["uri"]),
|
|
|
allowed_values="datatypes",
|
|
|
classes=None,
|
|
|
datatype=query_value_type(class_id=model.class_id, property_id=get_id(required_prop["uri"]), return_format="str"),
|
|
|
cardinality="has 1 or more matching statements",
|
|
|
comment=init_comment_verbaliser(prop_label=required_prop["label"], freq=required_prop["frequency"]),
|
|
|
)
|
|
|
model.update_pending(es_json["pending"])
|
|
|
|
|
|
try:
|
|
|
prop = model.pending["pending"][0]
|
|
|
except IndexError:
|
|
|
bot_message = "The EntitySchema is complete. Thank you for your input."
|
|
|
history.append(("Yes", bot_message))
|
|
|
return str(model), model.pending, history
|
|
|
prop["examples"] = collect_examples(model.class_id, get_id(prop["uri"]))
|
|
|
bot_message = chatbot_verbaliser(prop["label"], get_id(prop["uri"]), prop["frequency"], prop["examples"])
|
|
|
history.append(("Yes", bot_message))
|
|
|
return str(model), model.pending, history
|
|
|
|
|
|
|
|
|
def input_optional(es_output, es_json, history):
|
|
|
if not es_json["pending"]:
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_output)
|
|
|
model.update_pending(es_json["pending"])
|
|
|
return str(model), model.pending, history
|
|
|
|
|
|
optional_prop = es_json["pending"].pop(0)
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_output)
|
|
|
model.insert_triple_constraint(
|
|
|
shape_id=model.name,
|
|
|
predicate=get_id(optional_prop["uri"]),
|
|
|
allowed_values="datatypes",
|
|
|
classes=None,
|
|
|
datatype=query_value_type(class_id=model.class_id, property_id=get_id(optional_prop["uri"]), return_format="str"),
|
|
|
cardinality="has 0 or more matching statements",
|
|
|
comment=init_comment_verbaliser(prop_label=optional_prop["label"], freq=optional_prop["frequency"]),
|
|
|
)
|
|
|
model.update_pending(es_json["pending"])
|
|
|
|
|
|
try:
|
|
|
prop = model.pending["pending"][0]
|
|
|
except IndexError:
|
|
|
bot_message = "The EntitySchema is complete. Thank you for your input."
|
|
|
history.append(("Optional", bot_message))
|
|
|
return str(model), model.pending, history
|
|
|
prop["examples"] = collect_examples(model.class_id, get_id(prop["uri"]))
|
|
|
bot_message = chatbot_verbaliser(prop["label"], get_id(prop["uri"]), prop["frequency"], prop["examples"])
|
|
|
history.append(("Optional", bot_message))
|
|
|
return str(model), model.pending, history
|
|
|
|
|
|
|
|
|
def input_no(es_output, es_json, history):
|
|
|
if not es_json["pending"]:
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_output)
|
|
|
model.update_pending(es_json["pending"])
|
|
|
return str(model), model.pending, history
|
|
|
|
|
|
rejected_prop = es_json["pending"].pop(0)
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_output)
|
|
|
model.insert_triple_constraint(
|
|
|
shape_id=model.name,
|
|
|
predicate=get_id(rejected_prop["uri"]),
|
|
|
allowed_values="datatypes",
|
|
|
classes=None,
|
|
|
datatype=query_value_type(class_id=model.class_id, property_id=get_id(rejected_prop["uri"]),
|
|
|
return_format="str"),
|
|
|
cardinality="has no matching statements",
|
|
|
comment=init_comment_verbaliser(prop_label=rejected_prop["label"], freq=rejected_prop["frequency"]),
|
|
|
)
|
|
|
model.update_pending(es_json["pending"])
|
|
|
try:
|
|
|
prop = model.pending["pending"][0]
|
|
|
except IndexError:
|
|
|
bot_message = "The EntitySchema is complete. Thank you for your input."
|
|
|
history.append(("No", bot_message))
|
|
|
return str(model), model.pending, history
|
|
|
prop["examples"] = collect_examples(model.class_id, get_id(prop["uri"]))
|
|
|
bot_message = chatbot_verbaliser(prop["label"], get_id(prop["uri"]), prop["frequency"], prop["examples"])
|
|
|
history.append(("No", bot_message))
|
|
|
return str(model), model.pending, history
|
|
|
|
|
|
|
|
|
def input_skip(es_output, es_json, history):
|
|
|
if not es_json["pending"]:
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_output)
|
|
|
model.update_pending(es_json["pending"])
|
|
|
return str(model), model.pending, history
|
|
|
|
|
|
_ = es_json["pending"].pop(0)
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_output)
|
|
|
model.update_pending(es_json["pending"])
|
|
|
try:
|
|
|
prop = model.pending["pending"][0]
|
|
|
except IndexError:
|
|
|
bot_message = "The EntitySchema is complete. Thank you for your input."
|
|
|
history.append(("Skip", bot_message))
|
|
|
return str(model), model.pending, history
|
|
|
prop["examples"] = collect_examples(model.class_id, get_id(prop["uri"]))
|
|
|
bot_message = chatbot_verbaliser(prop["label"], get_id(prop["uri"]), prop["frequency"], prop["examples"])
|
|
|
history.append(("Skip", bot_message))
|
|
|
return str(model), model.pending, history
|
|
|
|
|
|
|
|
|
def update_property_choices(inputs):
|
|
|
"""
|
|
|
|
|
|
:param inputs:
|
|
|
:return:
|
|
|
"""
|
|
|
if not inputs:
|
|
|
return gr.Radio(visible=False)
|
|
|
choices = wikidata_api_search(inputs, search_type="property")
|
|
|
if not choices:
|
|
|
return gr.Radio(visible=False)
|
|
|
return gr.Radio(choices=choices, label="", interactive=True, visible=True)
|
|
|
|
|
|
|
|
|
def update_class_choices(inputs):
|
|
|
"""
|
|
|
|
|
|
:param inputs:
|
|
|
:return:
|
|
|
"""
|
|
|
if not inputs:
|
|
|
return gr.Radio(visible=False)
|
|
|
inputs = inputs.split(",")[-1]
|
|
|
choices = wikidata_api_search(inputs, search_type="item")
|
|
|
if not choices:
|
|
|
return gr.Radio(visible=False)
|
|
|
return gr.Radio(choices=choices, label="", interactive=True, visible=True)
|
|
|
|
|
|
|
|
|
def select_property(inputs):
|
|
|
"""
|
|
|
|
|
|
:param inputs:
|
|
|
:return:
|
|
|
"""
|
|
|
return inputs, gr.Radio(choices=[], value=None, interactive=True, visible=False)
|
|
|
|
|
|
|
|
|
def select_class(inputs):
|
|
|
"""
|
|
|
|
|
|
:param inputs:
|
|
|
:return:
|
|
|
"""
|
|
|
return inputs, gr.Radio(choices=[], value=None, interactive=True, visible=False)
|
|
|
|
|
|
|
|
|
def add_class(history, inputs):
|
|
|
"""
|
|
|
|
|
|
:param history:
|
|
|
:param inputs:
|
|
|
:return:
|
|
|
"""
|
|
|
history = ",".join(history.split(",")[:-1])
|
|
|
if not history:
|
|
|
classes = inputs + ","
|
|
|
else:
|
|
|
classes = history + "," + inputs + ","
|
|
|
return classes, gr.Radio(choices=[], value=None, interactive=True, visible=False)
|
|
|
|
|
|
|
|
|
def select_allowed_values(choice):
|
|
|
"""
|
|
|
|
|
|
:param choice:
|
|
|
:return:
|
|
|
"""
|
|
|
if choice == "with any value":
|
|
|
class_names = gr.Textbox(placeholder="class name", visible=False)
|
|
|
class_choices = gr.Radio(visible=False)
|
|
|
datatypes = gr.Dropdown(visible=False)
|
|
|
return class_names, class_choices, datatypes
|
|
|
elif choice == "datatypes":
|
|
|
class_names = gr.Textbox(visible=False)
|
|
|
class_choices = gr.Radio(visible=False)
|
|
|
datatypes = gr.Dropdown(choices=NODE_CONSTRAINTS, label="Datatypes", interactive=True, visible=True)
|
|
|
return class_names, class_choices, datatypes
|
|
|
else:
|
|
|
class_names = gr.Textbox(placeholder="class name", visible=True)
|
|
|
class_choices = gr.Radio(visible=False)
|
|
|
datatypes = gr.Dropdown(visible=False)
|
|
|
return class_names, class_choices, datatypes
|
|
|
|
|
|
|
|
|
def insert_constraint(es_str, shape_id, prop_id, allowed_values, classes, datatypes, cardinality, comment):
|
|
|
"""
|
|
|
|
|
|
:param es_str:
|
|
|
:param shape_id:
|
|
|
:param prop_id:
|
|
|
:param allowed_values:
|
|
|
:param classes:
|
|
|
:param datatypes:
|
|
|
:param cardinality:
|
|
|
:param comment:
|
|
|
:return:
|
|
|
"""
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_str)
|
|
|
model.insert_triple_constraint(shape_id, prop_id, allowed_values, classes, datatypes, cardinality, comment)
|
|
|
|
|
|
return str(model)
|
|
|
|
|
|
|
|
|
def load_examples(es_name: str, es_str: str):
|
|
|
"""
|
|
|
|
|
|
:param es_name:
|
|
|
:param es_str:
|
|
|
:return:
|
|
|
"""
|
|
|
if es_str:
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(es_str)
|
|
|
content = str(model)
|
|
|
else:
|
|
|
example_path = os.path.join(os.path.dirname(__file__), f"../data/{es_name}.txt")
|
|
|
print(f"Loading examples from {example_path}")
|
|
|
with open(example_path, 'r') as fp:
|
|
|
content = fp.read()
|
|
|
model = ESGenerationModel(name="")
|
|
|
model.load_es_shexc(content)
|
|
|
|
|
|
return content, gr.Dropdown(choices=model.shapes, label="Shape Name")
|
|
|
|