| import numpy as np |
| from typing import List |
|
|
|
|
| class BaseEmbedder: |
| """ The Base Embedder used for creating embedding models |
| |
| Arguments: |
| embedding_model: The main embedding model to be used for extracting |
| document and word embedding |
| word_embedding_model: The embedding model used for extracting word |
| embeddings only. If this model is selected, |
| then the `embedding_model` is purely used for |
| creating document embeddings. |
| """ |
| def __init__(self, |
| embedding_model=None, |
| word_embedding_model=None): |
| self.embedding_model = embedding_model |
| self.word_embedding_model = word_embedding_model |
|
|
| def embed(self, |
| documents: List[str], |
| verbose: bool = False) -> np.ndarray: |
| """ Embed a list of n documents/words into an n-dimensional |
| matrix of embeddings |
| |
| Arguments: |
| documents: A list of documents or words to be embedded |
| verbose: Controls the verbosity of the process |
| |
| Returns: |
| Document/words embeddings with shape (n, m) with `n` documents/words |
| that each have an embeddings size of `m` |
| """ |
| pass |
|
|
| def embed_words(self, |
| words: List[str], |
| verbose: bool = False) -> np.ndarray: |
| """ Embed a list of n words into an n-dimensional |
| matrix of embeddings |
| |
| Arguments: |
| words: A list of words to be embedded |
| verbose: Controls the verbosity of the process |
| |
| Returns: |
| Word embeddings with shape (n, m) with `n` words |
| that each have an embeddings size of `m` |
| |
| """ |
| return self.embed(words, verbose) |
|
|
| def embed_documents(self, |
| document: List[str], |
| verbose: bool = False) -> np.ndarray: |
| """ Embed a list of n words into an n-dimensional |
| matrix of embeddings |
| |
| Arguments: |
| document: A list of documents to be embedded |
| verbose: Controls the verbosity of the process |
| |
| Returns: |
| Document embeddings with shape (n, m) with `n` documents |
| that each have an embeddings size of `m` |
| """ |
| return self.embed(document, verbose) |
|
|