| import os |
| from typing import List, Dict, Tuple |
|
|
|
|
| def load_labels_from_file(file_path: str) -> List[str]: |
| """ |
| Load labels from text file (one label per line) |
| """ |
| labels = [] |
| with open(file_path, "r", encoding="utf-8") as f: |
| for line in f: |
| line = line.strip() |
| if line and not line.startswith("#"): |
| labels.append(line) |
| return labels |
|
|
|
|
| def get_label_mappings(args): |
| """ |
| Load all label mappings from files |
| Returns: tuple of (INTENT_LABELS, SLOT_LABELS, mappings_dict) |
| """ |
| intent_label_path = os.path.join(args.data_dir, args.task, args.intent_label_file) |
| slot_label_path = os.path.join(args.data_dir, args.task, args.slot_label_file) |
| |
| INTENT_LABELS = load_labels_from_file(intent_label_path) |
| SLOT_LABELS = load_labels_from_file(slot_label_path) |
| |
| |
| TOKEN_INTENT_LABELS = ["O"] + INTENT_LABELS |
| |
| |
| INTENT_LABELS_WITH_PAD = ["PAD"] + INTENT_LABELS |
| |
| |
| INTENT2ID = {intent: i for i, intent in enumerate(INTENT_LABELS)} |
| ID2INTENT = {i: intent for intent, i in INTENT2ID.items()} |
| |
| SLOT2ID = {slot: i for i, slot in enumerate(SLOT_LABELS)} |
| ID2SLOT = {i: slot for slot, i in SLOT2ID.items()} |
| |
| TOKINT2ID = {tokint: i for i, tokint in enumerate(TOKEN_INTENT_LABELS)} |
| ID2TOKINT = {i: tokint for tokint, i in TOKINT2ID.items()} |
| |
| TAGINT2ID = {intent: i for i, intent in enumerate(INTENT_LABELS_WITH_PAD)} |
| ID2TAGINT = {i: intent for intent, i in TAGINT2ID.items()} |
| |
| mappings = { |
| 'INTENT_LABELS': INTENT_LABELS, |
| 'SLOT_LABELS': SLOT_LABELS, |
| 'TOKEN_INTENT_LABELS': TOKEN_INTENT_LABELS, |
| 'INTENT_LABELS_WITH_PAD': INTENT_LABELS_WITH_PAD, |
| 'INTENT2ID': INTENT2ID, |
| 'ID2INTENT': ID2INTENT, |
| 'SLOT2ID': SLOT2ID, |
| 'ID2SLOT': ID2SLOT, |
| 'TOKINT2ID': TOKINT2ID, |
| 'ID2TOKINT': ID2TOKINT, |
| 'TAGINT2ID': TAGINT2ID, |
| 'ID2TAGINT': ID2TAGINT |
| } |
| |
| return INTENT_LABELS, SLOT_LABELS, mappings |
|
|
|
|