kisejin commited on
Commit
547860b
·
verified ·
1 Parent(s): 19b102a

Upload 255 files

Browse files
BERTopic/.gitignore CHANGED
@@ -81,3 +81,6 @@ venv.bak/
81
  .idea/
82
  .vscode
83
  .DS_Store
 
 
 
 
81
  .idea/
82
  .vscode
83
  .DS_Store
84
+
85
+ # mkdocs
86
+ site/
BERTopic/bertopic/_bertopic.py CHANGED
@@ -158,7 +158,7 @@ class BERTopic:
158
  NOTE: This param will not be used if you pass in your own
159
  CountVectorizer.
160
  min_topic_size: The minimum size of the topic. Increasing this value will lead
161
- to a lower number of clusters/topics and vice versa.
162
  It is the same parameter as `min_cluster_size` in HDBSCAN.
163
  NOTE: This param will not be used if you are using `hdbscan_model`.
164
  nr_topics: Specifying the number of topics will reduce the initial
@@ -321,7 +321,7 @@ class BERTopic:
321
  embeddings: np.ndarray = None,
322
  images: List[str] = None,
323
  y: Union[List[int], np.ndarray] = None) -> Tuple[List[int],
324
- Union[np.ndarray, None]]:
325
  """ Fit the models on a collection of documents, generate topics,
326
  and return the probabilities and topic per document.
327
 
@@ -699,10 +699,11 @@ class BERTopic:
699
 
700
  def topics_over_time(self,
701
  docs: List[str],
702
- timestamps: Union[List[str],
703
- List[int]],
704
  topics: List[int] = None,
 
705
  nr_bins: int = None,
 
706
  datetime_format: str = None,
707
  evolution_tuning: bool = True,
708
  global_tuning: bool = True) -> pd.DataFrame:
@@ -826,7 +827,8 @@ class BERTopic:
826
 
827
  # Fill dataframe with results
828
  topics_at_timestamp = [(topic,
829
- ", ".join([words[0] for words in values][:5]),
 
830
  topic_frequency[topic],
831
  timestamp) for topic, values in words_per_topic.items()]
832
  topics_over_time.extend(topics_at_timestamp)
@@ -835,7 +837,7 @@ class BERTopic:
835
  previous_topics = sorted(list(documents_per_topic.Topic.values))
836
  previous_c_tf_idf = c_tf_idf.copy()
837
 
838
- return pd.DataFrame(topics_over_time, columns=["Topic", "Words", "Frequency", "Timestamp"])
839
 
840
  def topics_per_class(self,
841
  docs: List[str],
@@ -932,8 +934,8 @@ class BERTopic:
932
  `lambda x: sch.linkage(x, 'ward', optimal_ordering=True)`
933
  distance_function: The distance function to use on the c-TF-IDF matrix. Default is:
934
  `lambda x: 1 - cosine_similarity(x)`.
935
- You can pass any function that returns either a square matrix of
936
- shape (n_samples, n_samples) with zeros on the diagonal and
937
  non-negative values or condensed distance matrix of shape
938
  (n_samples * (n_samples - 1) / 2,) containing the upper
939
  triangular of the distance matrix.
@@ -1067,7 +1069,7 @@ class BERTopic:
1067
  use_embedding_model: bool = False,
1068
  calculate_tokens: bool = False,
1069
  separator: str = " ") -> Tuple[np.ndarray,
1070
- Union[List[np.ndarray], None]]:
1071
  """ A post-hoc approximation of topic distributions across documents.
1072
 
1073
  In order to perform this approximation, each document is split into tokens
@@ -1977,8 +1979,8 @@ class BERTopic:
1977
  for key, val in sorted(mapping.items()):
1978
  mappings[val].append(key)
1979
  mappings = {topic_from:
1980
- {"topics_to": topics_to,
1981
- "topic_sizes": [self.topic_sizes_[topic] for topic in topics_to]}
1982
  for topic_from, topics_to in mappings.items()}
1983
 
1984
  # Update topics
@@ -2464,7 +2466,7 @@ class BERTopic:
2464
  specific points. Helps to speed up generation of visualizations.
2465
  nr_levels: The number of levels to be visualized in the hierarchy. First, the distances
2466
  in `hierarchical_topics.Distance` are split in `nr_levels` lists of distances with
2467
- equal length. Then, for each list of distances, the merged topics, that have
2468
  a distance less or equal to the maximum distance of the selected list of distances, are selected.
2469
  NOTE: To get all possible merged steps, make sure that `nr_levels` is equal to
2470
  the length of `hierarchical_topics`.
@@ -3264,7 +3266,7 @@ class BERTopic:
3264
  sims = np.max(sim_matrix, axis=1)
3265
  to_merge = {
3266
  a - selected_topics["_outliers"]:
3267
- b - merged_topics["_outliers"] for a, (b, val) in enumerate(zip(sims_idx, sims))
3268
  if val >= min_similarity
3269
  }
3270
  to_merge.update(new_topics_dict)
@@ -3295,7 +3297,7 @@ class BERTopic:
3295
  serialization: str = "safetensors",
3296
  save_embedding_model: Union[str, bool] = True,
3297
  save_ctfidf: bool = False,
3298
- ):
3299
  """ Push your BERTopic model to a HuggingFace Hub
3300
 
3301
  Whenever you want to upload files to the Hub, you need to log in to your HuggingFace account:
@@ -3469,7 +3471,7 @@ class BERTopic:
3469
  documents: pd.DataFrame,
3470
  partial_fit: bool = False,
3471
  y: np.ndarray = None) -> Tuple[pd.DataFrame,
3472
- np.ndarray]:
3473
  """ Cluster UMAP embeddings with HDBSCAN
3474
 
3475
  Arguments:
@@ -3520,7 +3522,7 @@ class BERTopic:
3520
  return documents, probabilities
3521
 
3522
  def _zeroshot_topic_modeling(self, documents: pd.DataFrame, embeddings: np.ndarray) -> Tuple[pd.DataFrame, np.array,
3523
- pd.DataFrame, np.array]:
3524
  """ Find documents that could be assigned to either one of the topics in self.zeroshot_topic_list
3525
 
3526
  We transform the topics in `self.zeroshot_topic_list` to embeddings and
@@ -3605,16 +3607,16 @@ class BERTopic:
3605
  empty_dimensionality_model = BaseDimensionalityReduction()
3606
  empty_cluster_model = BaseCluster()
3607
  zeroshot_model = BERTopic(
3608
- n_gram_range=self.n_gram_range,
3609
- low_memory=self.low_memory,
3610
- calculate_probabilities=self.calculate_probabilities,
3611
- embedding_model=self.embedding_model,
3612
- umap_model=empty_dimensionality_model,
3613
- hdbscan_model=empty_cluster_model,
3614
- vectorizer_model=self.vectorizer_model,
3615
- ctfidf_model=self.ctfidf_model,
3616
- representation_model=self.representation_model,
3617
- verbose=self.verbose
3618
  ).fit(docs, embeddings=embeddings, y=y)
3619
  logger.info("Zeroshot Step 2 - Completed \u2713")
3620
  logger.info("Zeroshot Step 3 - Combining clustered topics with the zeroshot model")
@@ -3824,9 +3826,9 @@ class BERTopic:
3824
  # Sample documents per topic
3825
  documents_per_topic = (
3826
  documents.drop("Image", axis=1, errors="ignore")
3827
- .groupby('Topic')
3828
- .sample(n=nr_samples, replace=True, random_state=42)
3829
- .drop_duplicates()
3830
  )
3831
 
3832
  # Find and extract documents that are most similar to the topic
@@ -4007,7 +4009,7 @@ class BERTopic:
4007
  documents: pd.DataFrame,
4008
  c_tf_idf: csr_matrix = None,
4009
  calculate_aspects: bool = True) -> Mapping[str,
4010
- List[Tuple[str, float]]]:
4011
  """ Based on tf_idf scores per topic, extract the top n words per topic
4012
 
4013
  If the top words per topic need to be extracted, then only the `words` parameter
@@ -4126,8 +4128,8 @@ class BERTopic:
4126
  for key, val in sorted(mapped_topics.items()):
4127
  mappings[val].append(key)
4128
  mappings = {topic_from:
4129
- {"topics_to": topics_to,
4130
- "topic_sizes": [self.topic_sizes_[topic] for topic in topics_to]}
4131
  for topic_from, topics_to in mappings.items()}
4132
 
4133
  # Map topics
@@ -4177,8 +4179,8 @@ class BERTopic:
4177
  for key, val in sorted(mapped_topics.items()):
4178
  mappings[val].append(key)
4179
  mappings = {topic_from:
4180
- {"topics_to": topics_to,
4181
- "topic_sizes": [self.topic_sizes_[topic] for topic in topics_to]}
4182
  for topic_from, topics_to in mappings.items()}
4183
 
4184
  # Update documents and topics
@@ -4479,10 +4481,10 @@ def _create_model_from_files(
4479
 
4480
  # Fit BERTopic without actually performing any clustering
4481
  topic_model = BERTopic(
4482
- embedding_model=embedding_model,
4483
- umap_model=empty_dimensionality_model,
4484
- hdbscan_model=empty_cluster_model,
4485
- **params
4486
  )
4487
  topic_model.topic_embeddings_ = tensors["topic_embeddings"].numpy()
4488
  topic_model.topic_representations_ = {int(key): val for key, val in topics["topic_representations"].items()}
 
158
  NOTE: This param will not be used if you pass in your own
159
  CountVectorizer.
160
  min_topic_size: The minimum size of the topic. Increasing this value will lead
161
+ to a lower number of clusters/topics and vice versa.
162
  It is the same parameter as `min_cluster_size` in HDBSCAN.
163
  NOTE: This param will not be used if you are using `hdbscan_model`.
164
  nr_topics: Specifying the number of topics will reduce the initial
 
321
  embeddings: np.ndarray = None,
322
  images: List[str] = None,
323
  y: Union[List[int], np.ndarray] = None) -> Tuple[List[int],
324
+ Union[np.ndarray, None]]:
325
  """ Fit the models on a collection of documents, generate topics,
326
  and return the probabilities and topic per document.
327
 
 
699
 
700
  def topics_over_time(self,
701
  docs: List[str],
702
+ timestamps: Union[List[str]],
 
703
  topics: List[int] = None,
704
+ n_topics: int = 5,
705
  nr_bins: int = None,
706
+ n_keywords: int = 5,
707
  datetime_format: str = None,
708
  evolution_tuning: bool = True,
709
  global_tuning: bool = True) -> pd.DataFrame:
 
827
 
828
  # Fill dataframe with results
829
  topics_at_timestamp = [(topic,
830
+ ", ".join([words[0] for words in values][:n_topics]),
831
+ [words[1] for weights in values][:n_topics],
832
  topic_frequency[topic],
833
  timestamp) for topic, values in words_per_topic.items()]
834
  topics_over_time.extend(topics_at_timestamp)
 
837
  previous_topics = sorted(list(documents_per_topic.Topic.values))
838
  previous_c_tf_idf = c_tf_idf.copy()
839
 
840
+ return pd.DataFrame(topics_over_time, columns=["Topic", "Words", "Weight", "Frequency", "Timestamp"])
841
 
842
  def topics_per_class(self,
843
  docs: List[str],
 
934
  `lambda x: sch.linkage(x, 'ward', optimal_ordering=True)`
935
  distance_function: The distance function to use on the c-TF-IDF matrix. Default is:
936
  `lambda x: 1 - cosine_similarity(x)`.
937
+ You can pass any function that returns either a square matrix of
938
+ shape (n_samples, n_samples) with zeros on the diagonal and
939
  non-negative values or condensed distance matrix of shape
940
  (n_samples * (n_samples - 1) / 2,) containing the upper
941
  triangular of the distance matrix.
 
1069
  use_embedding_model: bool = False,
1070
  calculate_tokens: bool = False,
1071
  separator: str = " ") -> Tuple[np.ndarray,
1072
+ Union[List[np.ndarray], None]]:
1073
  """ A post-hoc approximation of topic distributions across documents.
1074
 
1075
  In order to perform this approximation, each document is split into tokens
 
1979
  for key, val in sorted(mapping.items()):
1980
  mappings[val].append(key)
1981
  mappings = {topic_from:
1982
+ {"topics_to": topics_to,
1983
+ "topic_sizes": [self.topic_sizes_[topic] for topic in topics_to]}
1984
  for topic_from, topics_to in mappings.items()}
1985
 
1986
  # Update topics
 
2466
  specific points. Helps to speed up generation of visualizations.
2467
  nr_levels: The number of levels to be visualized in the hierarchy. First, the distances
2468
  in `hierarchical_topics.Distance` are split in `nr_levels` lists of distances with
2469
+ equal length. Then, for each list of distances, the merged topics, that have
2470
  a distance less or equal to the maximum distance of the selected list of distances, are selected.
2471
  NOTE: To get all possible merged steps, make sure that `nr_levels` is equal to
2472
  the length of `hierarchical_topics`.
 
3266
  sims = np.max(sim_matrix, axis=1)
3267
  to_merge = {
3268
  a - selected_topics["_outliers"]:
3269
+ b - merged_topics["_outliers"] for a, (b, val) in enumerate(zip(sims_idx, sims))
3270
  if val >= min_similarity
3271
  }
3272
  to_merge.update(new_topics_dict)
 
3297
  serialization: str = "safetensors",
3298
  save_embedding_model: Union[str, bool] = True,
3299
  save_ctfidf: bool = False,
3300
+ ):
3301
  """ Push your BERTopic model to a HuggingFace Hub
3302
 
3303
  Whenever you want to upload files to the Hub, you need to log in to your HuggingFace account:
 
3471
  documents: pd.DataFrame,
3472
  partial_fit: bool = False,
3473
  y: np.ndarray = None) -> Tuple[pd.DataFrame,
3474
+ np.ndarray]:
3475
  """ Cluster UMAP embeddings with HDBSCAN
3476
 
3477
  Arguments:
 
3522
  return documents, probabilities
3523
 
3524
  def _zeroshot_topic_modeling(self, documents: pd.DataFrame, embeddings: np.ndarray) -> Tuple[pd.DataFrame, np.array,
3525
+ pd.DataFrame, np.array]:
3526
  """ Find documents that could be assigned to either one of the topics in self.zeroshot_topic_list
3527
 
3528
  We transform the topics in `self.zeroshot_topic_list` to embeddings and
 
3607
  empty_dimensionality_model = BaseDimensionalityReduction()
3608
  empty_cluster_model = BaseCluster()
3609
  zeroshot_model = BERTopic(
3610
+ n_gram_range=self.n_gram_range,
3611
+ low_memory=self.low_memory,
3612
+ calculate_probabilities=self.calculate_probabilities,
3613
+ embedding_model=self.embedding_model,
3614
+ umap_model=empty_dimensionality_model,
3615
+ hdbscan_model=empty_cluster_model,
3616
+ vectorizer_model=self.vectorizer_model,
3617
+ ctfidf_model=self.ctfidf_model,
3618
+ representation_model=self.representation_model,
3619
+ verbose=self.verbose
3620
  ).fit(docs, embeddings=embeddings, y=y)
3621
  logger.info("Zeroshot Step 2 - Completed \u2713")
3622
  logger.info("Zeroshot Step 3 - Combining clustered topics with the zeroshot model")
 
3826
  # Sample documents per topic
3827
  documents_per_topic = (
3828
  documents.drop("Image", axis=1, errors="ignore")
3829
+ .groupby('Topic')
3830
+ .sample(n=nr_samples, replace=True, random_state=42)
3831
+ .drop_duplicates()
3832
  )
3833
 
3834
  # Find and extract documents that are most similar to the topic
 
4009
  documents: pd.DataFrame,
4010
  c_tf_idf: csr_matrix = None,
4011
  calculate_aspects: bool = True) -> Mapping[str,
4012
+ List[Tuple[str, float]]]:
4013
  """ Based on tf_idf scores per topic, extract the top n words per topic
4014
 
4015
  If the top words per topic need to be extracted, then only the `words` parameter
 
4128
  for key, val in sorted(mapped_topics.items()):
4129
  mappings[val].append(key)
4130
  mappings = {topic_from:
4131
+ {"topics_to": topics_to,
4132
+ "topic_sizes": [self.topic_sizes_[topic] for topic in topics_to]}
4133
  for topic_from, topics_to in mappings.items()}
4134
 
4135
  # Map topics
 
4179
  for key, val in sorted(mapped_topics.items()):
4180
  mappings[val].append(key)
4181
  mappings = {topic_from:
4182
+ {"topics_to": topics_to,
4183
+ "topic_sizes": [self.topic_sizes_[topic] for topic in topics_to]}
4184
  for topic_from, topics_to in mappings.items()}
4185
 
4186
  # Update documents and topics
 
4481
 
4482
  # Fit BERTopic without actually performing any clustering
4483
  topic_model = BERTopic(
4484
+ embedding_model=embedding_model,
4485
+ umap_model=empty_dimensionality_model,
4486
+ hdbscan_model=empty_cluster_model,
4487
+ **params
4488
  )
4489
  topic_model.topic_embeddings_ = tensors["topic_embeddings"].numpy()
4490
  topic_model.topic_representations_ = {int(key): val for key, val in topics["topic_representations"].items()}
BERTopic/docs/api/plotting/document_datamap.md CHANGED
@@ -1,3 +1,3 @@
1
- # `Document Data Map`
2
 
3
  ::: bertopic.plotting._datamap.visualize_document_datamap
 
1
+ # `Documents with DataMapPlot`
2
 
3
  ::: bertopic.plotting._datamap.visualize_document_datamap
BERTopic/docs/faq.md CHANGED
@@ -311,3 +311,23 @@ are important in understanding the general topic of the document. Although this
311
  have data that contains a lot of noise, for example, HTML-tags, then it would be best to remove them. HTML-tags
312
  typically do not contribute to the meaning of a document and should therefore be removed. However, if you apply
313
  topic modeling to HTML-code to extract topics of code, then it becomes important.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  have data that contains a lot of noise, for example, HTML-tags, then it would be best to remove them. HTML-tags
312
  typically do not contribute to the meaning of a document and should therefore be removed. However, if you apply
313
  topic modeling to HTML-code to extract topics of code, then it becomes important.
314
+
315
+ ## **I run into issues running on Apple Silicon. What should I do?**
316
+ Apple Silicon chips (M1 & M2) are based on the ARM64 (aka [AArch64](https://apple.stackexchange.com/questions/451238/is-m1-chip-aarch64-or-amd64), not to be confused with AMD64). There are known issues with upstream dependencies for this architecture, for example [numba](https://github.com/numba/numba/issues/5520). You may not always run into this issue, depending on the extras that you need.
317
+
318
+ One possible solution to this is to use [VS Code Dev Containers](https://code.visualstudio.com/docs/devcontainers/containers), which allows you to setup a Linux-based environment. To run BERTopic effectively you need to be aware of two things:
319
+
320
+ - Make sure to use a Docker image specifically compiled for ARM64
321
+ - Make sure to use `volume` instead of `mount-bind`, since the latter significantly reduces I/O speeds to disk
322
+
323
+ Using the pre-configured [Data Science Devcontainers](https://github.com/b-data/data-science-devcontainers) makes sure these setting are optimized. To start using them, do the following:
324
+
325
+ - Install and run Docker
326
+ - Install `python-base` or `python-scipy` [devcontainer](https://github.com/b-data/data-science-devcontainers)
327
+ - ℹ️ Change PYTHON_VERSION to 3.11 in the `devcontainer.json` to work with the latest version of Python 3.11 (currently 3.11.8)
328
+ - Open VS Code, build the container and start working
329
+ - Note that data is persisted in the container
330
+ - When using an unmodified devcontainer.json: work in `/home/vscode` which is the `home` directory of user `vscode`
331
+ - Python packages are installed to the home directory by default. This is due to env variable `PIP_USER=1`
332
+ - Note that the directory `/workspaces` is also persisted
333
+
BERTopic/docs/getting_started/visualization/visualize_documents.md CHANGED
@@ -1,3 +1,5 @@
 
 
1
  Using the `.visualize_topics`, we can visualize the topics and get insight into their relationships. However,
2
  you might want a more fine-grained approach where we can visualize the documents inside the topics to see
3
  if they were assigned correctly or whether they make sense. To do so, we can use the `topic_model.visualize_documents()`
@@ -43,6 +45,30 @@ When you visualize the documents, you might not always want to see the complete
43
  topic_model.visualize_documents(titles, reduced_embeddings=reduced_embeddings)
44
  ```
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  ## **Visualize Probablities or Distribution**
47
 
48
  We can generate the topic-document probability matrix by simply setting `calculate_probabilities=True` if a HDBSCAN model is used:
@@ -100,3 +126,4 @@ df
100
  the distribution of the frequencies of topics across a document. It merely shows
101
  how confident BERTopic is that certain topics can be found in a document.
102
 
 
 
1
+ ## **Visualize documents with Plotly**
2
+
3
  Using the `.visualize_topics`, we can visualize the topics and get insight into their relationships. However,
4
  you might want a more fine-grained approach where we can visualize the documents inside the topics to see
5
  if they were assigned correctly or whether they make sense. To do so, we can use the `topic_model.visualize_documents()`
 
45
  topic_model.visualize_documents(titles, reduced_embeddings=reduced_embeddings)
46
  ```
47
 
48
+ ## **Visualize documents with DataMapPlot**
49
+
50
+ `.visualize_document_datamap` provides an alternative way to visualize the documents inside the topics as a static [DataMapPlot](https://datamapplot.readthedocs.io/en/latest/intro_splash.html). Using the same pipeline as above, you can generate a DataMapPlot by running:
51
+
52
+ ```python
53
+
54
+ # with the original embeddings
55
+ topic_model.visualize_document_datamap(docs, embeddings=embeddings)
56
+
57
+ # with the reduced embeddings
58
+ topic_model.visualize_document_datamap(docs, reduced_embeddings=reduced_embeddings)
59
+ ```
60
+
61
+ <br><br>
62
+ <img src="./datamapplot.png">
63
+ <br><br>
64
+
65
+ Or if you want to save the resulting figure:
66
+
67
+ ```python
68
+ fig = topic_model.visualize_document_datamap(docs, reduced_embeddings=reduced_embeddings)
69
+ fig.savefig("path/to/file.png", bbox_inches="tight")
70
+ ```
71
+
72
  ## **Visualize Probablities or Distribution**
73
 
74
  We can generate the topic-document probability matrix by simply setting `calculate_probabilities=True` if a HDBSCAN model is used:
 
126
  the distribution of the frequencies of topics across a document. It merely shows
127
  how confident BERTopic is that certain topics can be found in a document.
128
 
129
+
BERTopic/docs/index.md CHANGED
@@ -246,6 +246,7 @@ to tweak the model to your liking.
246
  |-----------------------|---|
247
  | Visualize Topics | `.visualize_topics()` |
248
  | Visualize Documents | `.visualize_documents()` |
 
249
  | Visualize Document Hierarchy | `.visualize_hierarchical_documents()` |
250
  | Visualize Topic Hierarchy | `.visualize_hierarchy()` |
251
  | Visualize Topic Tree | `.get_topic_tree(hierarchical_topics)` |
@@ -254,7 +255,8 @@ to tweak the model to your liking.
254
  | Visualize Term Score Decline | `.visualize_term_rank()` |
255
  | Visualize Topic Probability Distribution | `.visualize_distribution(probs[0])` |
256
  | Visualize Topics over Time | `.visualize_topics_over_time(topics_over_time)` |
257
- | Visualize Topics per Class | `.visualize_topics_per_class(topics_per_class)` |
 
258
 
259
 
260
  ## **Citation**
 
246
  |-----------------------|---|
247
  | Visualize Topics | `.visualize_topics()` |
248
  | Visualize Documents | `.visualize_documents()` |
249
+ | Visualize Document with DataMapPlot | `.visualize_document_datamap()` |
250
  | Visualize Document Hierarchy | `.visualize_hierarchical_documents()` |
251
  | Visualize Topic Hierarchy | `.visualize_hierarchy()` |
252
  | Visualize Topic Tree | `.get_topic_tree(hierarchical_topics)` |
 
255
  | Visualize Term Score Decline | `.visualize_term_rank()` |
256
  | Visualize Topic Probability Distribution | `.visualize_distribution(probs[0])` |
257
  | Visualize Topics over Time | `.visualize_topics_over_time(topics_over_time)` |
258
+ | Visualize Topics per Class | `.visualize_topics_per_class(topics_per_class)` |
259
+
260
 
261
 
262
  ## **Citation**
BERTopic/mkdocs.yml CHANGED
@@ -83,6 +83,7 @@ nav:
83
  - Plotting:
84
  - Barchart: api/plotting/barchart.md
85
  - Documents: api/plotting/documents.md
 
86
  - DTM: api/plotting/dtm.md
87
  - Hierarchical documents: api/plotting/hierarchical_documents.md
88
  - Hierarchical topics: api/plotting/hierarchy.md
 
83
  - Plotting:
84
  - Barchart: api/plotting/barchart.md
85
  - Documents: api/plotting/documents.md
86
+ - Documents with DataMapPlot: api/plotting/document_datamap.md
87
  - DTM: api/plotting/dtm.md
88
  - Hierarchical documents: api/plotting/hierarchical_documents.md
89
  - Hierarchical topics: api/plotting/hierarchy.md