kisejin commited on
Commit
b0f7b59
·
verified ·
1 Parent(s): 0e76626

Update BERTopic/my_topic_modeling.py

Browse files
Files changed (1) hide show
  1. BERTopic/my_topic_modeling.py +1 -28
BERTopic/my_topic_modeling.py CHANGED
@@ -589,34 +589,7 @@ def working(args: argparse.Namespace, name_dataset: str):
589
  topic_dtm_path_out = os.path.join(output_subdir, f'topics_dtm_{name_dataset}'+key+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.csv')
590
  topics_over_time.to_csv(topic_dtm_path_out, encoding='utf-8')
591
  fandc_logger.log(logging.INFO, f'Save topics over time for {name_dataset} dataset successfully!')
592
-
593
- # ****** 2
594
- # Get reduce topics and topic over time for each n_topics
595
- if args.need_reduce_n_topics == 'yes':
596
- fandc_logger.log(logging.INFO, f'Staring reduce topics and topic over time from 10 to 50...')
597
- for n_topics in [10,20,30,40,50]:
598
- topic_model_copy = copy.deepcopy(topic_model)
599
- topic_model_copy.reduce_topics(df_documents, nr_topics=n_topics)
600
- fig = topic_model_copy.visualize_topics(title=f"Intertopic Distance Map: {n_topics} topics")
601
- fig.show()
602
- vis_save_dir = os.path.join(output_subdir, f'bertopic_reduce_vis_{name_dataset}'+str(n_topics)+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.html')
603
- fig.write_html(vis_save_dir)
604
-
605
- topic_info = topic_model_copy.get_topic_info()
606
- topic_info_path_out = os.path.join(output_subdir, f'topic_reduce_info_{name_dataset}'+str(n_topics)+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.csv')
607
- topic_info.to_csv(topic_info_path_out, encoding='utf-8')
608
-
609
- for key in timestamps_dict.keys():
610
- topics_over_time_ = topic_model_copy.topics_over_time(df_documents, timestamps_dict[key])
611
- fig = topic_model_copy.visualize_topics_over_time(topics_over_time_, top_n_topics=10, title=f"Topics over time following {key}")
612
- fig.show()
613
- vis_save_dir = os.path.join(output_subdir, f'bertopic_reduce_dtm_vis_{name_dataset}'+str(n_topics)+'_'+key+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.html')
614
- fig.write_html(vis_save_dir)
615
-
616
- topic_dtm_path_out = os.path.join(output_subdir, f'topics_reduce_dtm_{name_dataset}'+str(n_topics)+'_'+key+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.csv')
617
- topics_over_time_.to_csv(topic_dtm_path_out, encoding='utf-8')
618
-
619
- fandc_logger.log(logging.INFO, f'Save topics reduce and topic over time for {name_dataset} dataset successfully!')
620
  ###################################
621
  fandc_logger.log(logging.INFO, f'ENDING TRAINING TOPIC MODELING {name_dataset} dataset\n')
622
 
 
589
  topic_dtm_path_out = os.path.join(output_subdir, f'topics_dtm_{name_dataset}'+key+'_'+doc_type+'_'+doc_level+'_'+doc_time+'.csv')
590
  topics_over_time.to_csv(topic_dtm_path_out, encoding='utf-8')
591
  fandc_logger.log(logging.INFO, f'Save topics over time for {name_dataset} dataset successfully!')
592
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  ###################################
594
  fandc_logger.log(logging.INFO, f'ENDING TRAINING TOPIC MODELING {name_dataset} dataset\n')
595