Spaces:
Runtime error
Runtime error
| import os | |
| def vocab_process(data_dir): | |
| slot_label_vocab = 'slot_label.txt' | |
| intent_label_vocab = 'intent_label.txt' | |
| train_dir = os.path.join(data_dir, 'train') | |
| # intent | |
| with open(os.path.join(train_dir, 'label'), 'r', encoding='utf-8') as f_r, open(os.path.join(data_dir, intent_label_vocab), 'w', | |
| encoding='utf-8') as f_w: | |
| intent_vocab = set() | |
| for line in f_r: | |
| line = line.strip() | |
| intent_vocab.add(line) | |
| additional_tokens = ["UNK"] | |
| for token in additional_tokens: | |
| f_w.write(token + '\n') | |
| intent_vocab = sorted(list(intent_vocab)) | |
| for intent in intent_vocab: | |
| f_w.write(intent + '\n') | |
| # slot | |
| with open(os.path.join(train_dir, 'seq.out'), 'r', encoding='utf-8') as f_r, open(os.path.join(data_dir, slot_label_vocab), 'w', | |
| encoding='utf-8') as f_w: | |
| slot_vocab = set() | |
| for line in f_r: | |
| line = line.strip() | |
| slots = line.split() | |
| for slot in slots: | |
| slot_vocab.add(slot) | |
| slot_vocab = sorted(list(slot_vocab), key=lambda x: (x[2:], x[:2])) | |
| # Write additional tokens | |
| additional_tokens = ["PAD", "UNK"] | |
| for token in additional_tokens: | |
| f_w.write(token + '\n') | |
| for slot in slot_vocab: | |
| f_w.write(slot + '\n') | |
| if __name__ == "__main__": | |
| vocab_process('atis') | |
| vocab_process('snips') | |