DazKha's picture
initial commit
f32bfa2
import os
from typing import List, Dict, Tuple
def load_labels_from_file(file_path: str) -> List[str]:
"""
Load labels from text file (one label per line)
"""
labels = []
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"): # Skip empty lines and comments
labels.append(line)
return labels
def get_label_mappings(args):
"""
Load all label mappings from files
Returns: tuple of (INTENT_LABELS, SLOT_LABELS, mappings_dict)
"""
intent_label_path = os.path.join(args.data_dir, args.task, args.intent_label_file)
slot_label_path = os.path.join(args.data_dir, args.task, args.slot_label_file)
INTENT_LABELS = load_labels_from_file(intent_label_path)
SLOT_LABELS = load_labels_from_file(slot_label_path)
# Generate token-intent labels (O + intents)
TOKEN_INTENT_LABELS = ["O"] + INTENT_LABELS
# Add PAD for tag-intent
INTENT_LABELS_WITH_PAD = ["PAD"] + INTENT_LABELS
# Create mappings
INTENT2ID = {intent: i for i, intent in enumerate(INTENT_LABELS)}
ID2INTENT = {i: intent for intent, i in INTENT2ID.items()}
SLOT2ID = {slot: i for i, slot in enumerate(SLOT_LABELS)}
ID2SLOT = {i: slot for slot, i in SLOT2ID.items()}
TOKINT2ID = {tokint: i for i, tokint in enumerate(TOKEN_INTENT_LABELS)}
ID2TOKINT = {i: tokint for tokint, i in TOKINT2ID.items()}
TAGINT2ID = {intent: i for i, intent in enumerate(INTENT_LABELS_WITH_PAD)}
ID2TAGINT = {i: intent for intent, i in TAGINT2ID.items()}
mappings = {
'INTENT_LABELS': INTENT_LABELS,
'SLOT_LABELS': SLOT_LABELS,
'TOKEN_INTENT_LABELS': TOKEN_INTENT_LABELS,
'INTENT_LABELS_WITH_PAD': INTENT_LABELS_WITH_PAD,
'INTENT2ID': INTENT2ID,
'ID2INTENT': ID2INTENT,
'SLOT2ID': SLOT2ID,
'ID2SLOT': ID2SLOT,
'TOKINT2ID': TOKINT2ID,
'ID2TOKINT': ID2TOKINT,
'TAGINT2ID': TAGINT2ID,
'ID2TAGINT': ID2TAGINT
}
return INTENT_LABELS, SLOT_LABELS, mappings