from pathlib import Path from django.conf import settings _freebase_ent2name = None def _get_freebase_ent2name(): global _freebase_ent2name if _freebase_ent2name is None: path = Path(settings.COINS_DATA_DIR) / "FB15k-237" / "ent2name.txt" mapping = {} if path.exists(): with open(path, encoding="utf-8") as f: for line in f: parts = line.rstrip("\n").split(None, 1) if len(parts) == 2: mapping[parts[0]] = parts[1].replace("_", " ") _freebase_ent2name = mapping return _freebase_ent2name def clean_entity_name(name: str, dataset_id: str) -> str: """Apply dataset-specific entity name cleaning for display.""" if dataset_id == "freebase": resolved = _get_freebase_ent2name().get(name) if resolved: return resolved return name.replace("/m/", "") elif dataset_id == "wordnet": return name.split(".")[0] else: # nell if "concept" not in name: return name return name.split(":")[-1 if "new" not in name else -2] def clean_relation_name(name: str, dataset_id: str) -> str: """Apply dataset-specific relation name cleaning for display.""" if dataset_id == "freebase": return ".".join( "_".join(hop.split("/")[-2:]) for hop in name.split(".") ) elif dataset_id == "wordnet": return name[1:] if name.startswith("_") else name else: # nell if "concept" not in name: return name return name.split(":")[-1 if "new" not in name else -2]