import numpy as np from typing import List class BaseEmbedder: """ The Base Embedder used for creating embedding models Arguments: embedding_model: The main embedding model to be used for extracting document and word embedding word_embedding_model: The embedding model used for extracting word embeddings only. If this model is selected, then the `embedding_model` is purely used for creating document embeddings. """ def __init__(self, embedding_model=None, word_embedding_model=None): self.embedding_model = embedding_model self.word_embedding_model = word_embedding_model def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray: """ Embed a list of n documents/words into an n-dimensional matrix of embeddings Arguments: documents: A list of documents or words to be embedded verbose: Controls the verbosity of the process Returns: Document/words embeddings with shape (n, m) with `n` documents/words that each have an embeddings size of `m` """ pass def embed_words(self, words: List[str], verbose: bool = False) -> np.ndarray: """ Embed a list of n words into an n-dimensional matrix of embeddings Arguments: words: A list of words to be embedded verbose: Controls the verbosity of the process Returns: Word embeddings with shape (n, m) with `n` words that each have an embeddings size of `m` """ return self.embed(words, verbose) def embed_documents(self, document: List[str], verbose: bool = False) -> np.ndarray: """ Embed a list of n words into an n-dimensional matrix of embeddings Arguments: document: A list of documents to be embedded verbose: Controls the verbosity of the process Returns: Document embeddings with shape (n, m) with `n` documents that each have an embeddings size of `m` """ return self.embed(document, verbose)