File size: 1,638 Bytes
db54566 3ad32ba db54566 3ad32ba db54566 3ad32ba | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | from pathlib import Path
from django.conf import settings
_freebase_ent2name = None
def _get_freebase_ent2name():
global _freebase_ent2name
if _freebase_ent2name is None:
path = Path(settings.COINS_DATA_DIR) / "FB15k-237" / "ent2name.txt"
mapping = {}
if path.exists():
with open(path, encoding="utf-8") as f:
for line in f:
parts = line.rstrip("\n").split(None, 1)
if len(parts) == 2:
mapping[parts[0]] = parts[1].replace("_", " ")
_freebase_ent2name = mapping
return _freebase_ent2name
def clean_entity_name(name: str, dataset_id: str) -> str:
"""Apply dataset-specific entity name cleaning for display."""
if dataset_id == "freebase":
resolved = _get_freebase_ent2name().get(name)
if resolved:
return resolved
return name.replace("/m/", "")
elif dataset_id == "wordnet":
return name.split(".")[0]
else: # nell
if "concept" not in name:
return name
return name.split(":")[-1 if "new" not in name else -2]
def clean_relation_name(name: str, dataset_id: str) -> str:
"""Apply dataset-specific relation name cleaning for display."""
if dataset_id == "freebase":
return ".".join(
"_".join(hop.split("/")[-2:]) for hop in name.split(".")
)
elif dataset_id == "wordnet":
return name[1:] if name.startswith("_") else name
else: # nell
if "concept" not in name:
return name
return name.split(":")[-1 if "new" not in name else -2]
|