Spaces:
Configuration error
Configuration error
| import numpy as np | |
| import gensim.downloader as api | |
| import config | |
| def get_word2vec_enc(corpus: list, gensim_pretrained_emb:str) -> list: | |
| """ | |
| Get the W2V value for each word withing | |
| :param text: The text we want to get embeddings for | |
| :param embed_size: Dimension output for pretrained embeddings | |
| :param pretrained_emb: The pretrained embedding to use | |
| :return: words encoded as vectors | |
| """ | |
| word_vecs = api.load(gensim_pretrained_emb) | |
| embedding_weights = np.zeros((config.VOCAB_SIZE, config.EMBED_SIZE)) | |
| for word, i in corpus: | |
| if word in word_vecs: | |
| embedding_weights[i] = word_vecs[word] | |
| return embedding_weights | |