| from pathlib import Path |
|
|
| from django.conf import settings |
|
|
| _freebase_ent2name = None |
|
|
|
|
| def _get_freebase_ent2name(): |
| global _freebase_ent2name |
| if _freebase_ent2name is None: |
| path = Path(settings.COINS_DATA_DIR) / "FB15k-237" / "ent2name.txt" |
| mapping = {} |
| if path.exists(): |
| with open(path, encoding="utf-8") as f: |
| for line in f: |
| parts = line.rstrip("\n").split(None, 1) |
| if len(parts) == 2: |
| mapping[parts[0]] = parts[1].replace("_", " ") |
| _freebase_ent2name = mapping |
| return _freebase_ent2name |
|
|
|
|
| def clean_entity_name(name: str, dataset_id: str) -> str: |
| """Apply dataset-specific entity name cleaning for display.""" |
| if dataset_id == "freebase": |
| resolved = _get_freebase_ent2name().get(name) |
| if resolved: |
| return resolved |
| return name.replace("/m/", "") |
| elif dataset_id == "wordnet": |
| return name.split(".")[0] |
| else: |
| if "concept" not in name: |
| return name |
| return name.split(":")[-1 if "new" not in name else -2] |
|
|
|
|
| def clean_relation_name(name: str, dataset_id: str) -> str: |
| """Apply dataset-specific relation name cleaning for display.""" |
| if dataset_id == "freebase": |
| return ".".join( |
| "_".join(hop.split("/")[-2:]) for hop in name.split(".") |
| ) |
| elif dataset_id == "wordnet": |
| return name[1:] if name.startswith("_") else name |
| else: |
| if "concept" not in name: |
| return name |
| return name.split(":")[-1 if "new" not in name else -2] |
|
|