File size: 676 Bytes
9e2ba5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import numpy as np
import gensim.downloader as api

import config

def get_word2vec_enc(corpus: list,  gensim_pretrained_emb:str) -> list:
    """
    Get the W2V value for each word withing
    :param text: The text we want to get embeddings for
    :param embed_size: Dimension output for pretrained embeddings
    :param pretrained_emb: The pretrained embedding to use
    :return: words encoded as vectors
    """
    word_vecs = api.load(gensim_pretrained_emb)
    embedding_weights = np.zeros((config.VOCAB_SIZE, config.EMBED_SIZE))
    for word, i in corpus:
        if word in word_vecs:
            embedding_weights[i] = word_vecs[word]
    return embedding_weights