import pandas as pd from scipy.sparse import csr_matrix from sklearn.base import BaseEstimator from typing import Mapping, List, Tuple class BaseRepresentation(BaseEstimator): """ The base representation model for fine-tuning topic representations """ def extract_topics(self, topic_model, documents: pd.DataFrame, c_tf_idf: csr_matrix, topics: Mapping[str, List[Tuple[str, float]]] ) -> Mapping[str, List[Tuple[str, float]]]: """ Extract topics Each representation model that inherits this class will have its arguments (topic_model, documents, c_tf_idf, topics) automatically passed. Therefore, the representation model will only have access to the information about topics related to those arguments. Arguments: topic_model: The BERTopic model that is fitted until topic representations are calculated. documents: A dataframe with columns "Document" and "Topic" that contains all documents with each corresponding topic. c_tf_idf: A c-TF-IDF representation that is typically identical to `topic_model.c_tf_idf_` except for dynamic, class-based, and hierarchical topic modeling where it is calculated on a subset of the documents. topics: A dictionary with topic (key) and tuple of word and weight (value) as calculated by c-TF-IDF. This is the default topics that are returned if no representation model is used. """ return topic_model.topic_representations_