| | import torch
|
| | from transformers import AutoTokenizer, AutoModel
|
| |
|
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| |
|
| | def load_tokenizers_and_embeddings():
|
| |
|
| | tokenizer_vi = AutoTokenizer.from_pretrained("vinai/phobert-base")
|
| | model_vi = AutoModel.from_pretrained("vinai/phobert-base").to(device)
|
| | embedding_matrix_vi = model_vi.embeddings.word_embeddings.weight
|
| |
|
| |
|
| | tokenizer_en = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
|
| | model_en = AutoModel.from_pretrained("bert-base-cased-finetuned-mrpc").to(device)
|
| | embedding_matrix_en = model_en.embeddings.word_embeddings.weight
|
| |
|
| | return {
|
| | "tokenizer_vi": tokenizer_vi,
|
| | "embedding_vi": embedding_matrix_vi,
|
| | "tokenizer_en": tokenizer_en,
|
| | "embedding_en": embedding_matrix_en,
|
| | "device": device
|
| | }
|
| |
|