Update BERTopic/my_topic_modeling.py
Browse files
BERTopic/my_topic_modeling.py
CHANGED
|
@@ -589,34 +589,7 @@ def working(args: argparse.Namespace, name_dataset: str):
|
|
| 589 |
topic_dtm_path_out = os.path.join(output_subdir, f'topics_dtm_{name_dataset}'+key+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.csv')
|
| 590 |
topics_over_time.to_csv(topic_dtm_path_out, encoding='utf-8')
|
| 591 |
fandc_logger.log(logging.INFO, f'Save topics over time for {name_dataset} dataset successfully!')
|
| 592 |
-
|
| 593 |
-
# ****** 2
|
| 594 |
-
# Get reduce topics and topic over time for each n_topics
|
| 595 |
-
if args.need_reduce_n_topics == 'yes':
|
| 596 |
-
fandc_logger.log(logging.INFO, f'Staring reduce topics and topic over time from 10 to 50...')
|
| 597 |
-
for n_topics in [10,20,30,40,50]:
|
| 598 |
-
topic_model_copy = copy.deepcopy(topic_model)
|
| 599 |
-
topic_model_copy.reduce_topics(df_documents, nr_topics=n_topics)
|
| 600 |
-
fig = topic_model_copy.visualize_topics(title=f"Intertopic Distance Map: {n_topics} topics")
|
| 601 |
-
fig.show()
|
| 602 |
-
vis_save_dir = os.path.join(output_subdir, f'bertopic_reduce_vis_{name_dataset}'+str(n_topics)+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.html')
|
| 603 |
-
fig.write_html(vis_save_dir)
|
| 604 |
-
|
| 605 |
-
topic_info = topic_model_copy.get_topic_info()
|
| 606 |
-
topic_info_path_out = os.path.join(output_subdir, f'topic_reduce_info_{name_dataset}'+str(n_topics)+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.csv')
|
| 607 |
-
topic_info.to_csv(topic_info_path_out, encoding='utf-8')
|
| 608 |
-
|
| 609 |
-
for key in timestamps_dict.keys():
|
| 610 |
-
topics_over_time_ = topic_model_copy.topics_over_time(df_documents, timestamps_dict[key])
|
| 611 |
-
fig = topic_model_copy.visualize_topics_over_time(topics_over_time_, top_n_topics=10, title=f"Topics over time following {key}")
|
| 612 |
-
fig.show()
|
| 613 |
-
vis_save_dir = os.path.join(output_subdir, f'bertopic_reduce_dtm_vis_{name_dataset}'+str(n_topics)+'_'+key+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.html')
|
| 614 |
-
fig.write_html(vis_save_dir)
|
| 615 |
-
|
| 616 |
-
topic_dtm_path_out = os.path.join(output_subdir, f'topics_reduce_dtm_{name_dataset}'+str(n_topics)+'_'+key+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.csv')
|
| 617 |
-
topics_over_time_.to_csv(topic_dtm_path_out, encoding='utf-8')
|
| 618 |
-
|
| 619 |
-
fandc_logger.log(logging.INFO, f'Save topics reduce and topic over time for {name_dataset} dataset successfully!')
|
| 620 |
###################################
|
| 621 |
fandc_logger.log(logging.INFO, f'ENDING TRAINING TOPIC MODELING {name_dataset} dataset\n')
|
| 622 |
|
|
|
|
| 589 |
topic_dtm_path_out = os.path.join(output_subdir, f'topics_dtm_{name_dataset}'+key+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.csv')
|
| 590 |
topics_over_time.to_csv(topic_dtm_path_out, encoding='utf-8')
|
| 591 |
fandc_logger.log(logging.INFO, f'Save topics over time for {name_dataset} dataset successfully!')
|
| 592 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
###################################
|
| 594 |
fandc_logger.log(logging.INFO, f'ENDING TRAINING TOPIC MODELING {name_dataset} dataset\n')
|
| 595 |
|