File size: 339 Bytes
549c270
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
# scripts/utils/item_token_ids.py

from transformers import AutoTokenizer

def get_item_token_ids(tokenizer=None, model_path="data/processed/beauty/cove/model"):
    if tokenizer is None:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
    item_token_ids = list(tokenizer.get_added_vocab().values())
    return item_token_ids