| import numpy as np |
| from typing import List |
| from bertopic.backend._base import BaseEmbedder |
| from bertopic.backend._utils import select_backend |
|
|
|
|
| class WordDocEmbedder(BaseEmbedder): |
| """ Combine a document- and word-level embedder |
| """ |
| def __init__(self, |
| embedding_model, |
| word_embedding_model): |
| super().__init__() |
|
|
| self.embedding_model = select_backend(embedding_model) |
| self.word_embedding_model = select_backend(word_embedding_model) |
|
|
| def embed_words(self, |
| words: List[str], |
| verbose: bool = False) -> np.ndarray: |
| """ Embed a list of n words into an n-dimensional |
| matrix of embeddings |
| |
| Arguments: |
| words: A list of words to be embedded |
| verbose: Controls the verbosity of the process |
| |
| Returns: |
| Word embeddings with shape (n, m) with `n` words |
| that each have an embeddings size of `m` |
| |
| """ |
| return self.word_embedding_model.embed(words, verbose) |
|
|
| def embed_documents(self, |
| document: List[str], |
| verbose: bool = False) -> np.ndarray: |
| """ Embed a list of n words into an n-dimensional |
| matrix of embeddings |
| |
| Arguments: |
| document: A list of documents to be embedded |
| verbose: Controls the verbosity of the process |
| |
| Returns: |
| Document embeddings with shape (n, m) with `n` documents |
| that each have an embeddings size of `m` |
| """ |
| return self.embedding_model.embed(document, verbose) |
|
|