kisejin commited on
Commit
44399fe
·
verified ·
1 Parent(s): 5ecdc19

Update BERTopic/my_topic_modeling.py

Browse files
Files changed (1) hide show
  1. BERTopic/my_topic_modeling.py +14 -12
BERTopic/my_topic_modeling.py CHANGED
@@ -76,13 +76,13 @@ target_col = 'normalized_content'
76
 
77
 
78
 
79
- def create_logger_file_and_console():
80
  # create logger for "Sample App"
81
  logger = logging.getLogger('automated_testing')
82
  logger.setLevel(logging.DEBUG)
83
 
84
  # create file handler which logs even debug messages
85
- fileh = logging.FileHandler('info.log', mode='a')
86
  fileh.setLevel(logging.DEBUG)
87
 
88
  # create console handler with a higher log level
@@ -100,13 +100,13 @@ def create_logger_file_and_console():
100
 
101
  return logger
102
 
103
- def create_logger_file():
104
  # create logger for "Sample App"
105
  logger = logging.getLogger('automated_testing')
106
  logger.setLevel(logging.INFO)
107
 
108
  # create file handler which logs even debug messages
109
- fileh = logging.FileHandler('info.log', mode='a')
110
  fileh.setLevel(logging.INFO)
111
 
112
  # create formatter and add it to the handlers
@@ -413,11 +413,17 @@ def coherence_score(topic_model, df_documents):
413
  return coherence
414
 
415
  def working(args: argparse.Namespace, name_dataset: str):
 
 
 
 
 
 
416
 
417
-
418
  ############# Create logger##################################
419
- fandc_logger = create_logger_file_and_console()
420
- file_logger = create_logger_file()
421
  console_logger = create_logger_console()
422
  ##############################################################
423
 
@@ -440,11 +446,6 @@ def working(args: argparse.Namespace, name_dataset: str):
440
  fandc_logger.log(logging.INFO, f'Processing data for {name_dataset} dataset successfully!')
441
  #######################################################################
442
 
443
- source = f'en_{name_dataset}'
444
- output_subdir_name = source + f'/bertopic2_non_zeroshot_{args.n_topics}topic_'+doc_type+'_'+doc_level+'_'+doc_time
445
- output_subdir = os.path.join(output_dir, output_subdir_name)
446
- if not os.path.exists(output_subdir):
447
- os.makedirs(output_subdir)
448
 
449
 
450
  # Create model
@@ -558,6 +559,7 @@ def working(args: argparse.Namespace, name_dataset: str):
558
 
559
  # ****** 2
560
  # Get reduce topics and topic over time for each n_topics
 
561
  fandc_logger.log(logging.INFO, f'Staring reduce topics and topic over time from 10 to 50...')
562
  for n_topics in [10,20,30,40,50]:
563
  topic_model_copy = copy.deepcopy(topic_model)
 
76
 
77
 
78
 
79
+ def create_logger_file_and_console(path_file):
80
  # create logger for "Sample App"
81
  logger = logging.getLogger('automated_testing')
82
  logger.setLevel(logging.DEBUG)
83
 
84
  # create file handler which logs even debug messages
85
+ fileh = logging.FileHandler(path_file, mode='a')
86
  fileh.setLevel(logging.DEBUG)
87
 
88
  # create console handler with a higher log level
 
100
 
101
  return logger
102
 
103
+ def create_logger_file(path_file):
104
  # create logger for "Sample App"
105
  logger = logging.getLogger('automated_testing')
106
  logger.setLevel(logging.INFO)
107
 
108
  # create file handler which logs even debug messages
109
+ fileh = logging.FileHandler(path_file, mode='a')
110
  fileh.setLevel(logging.INFO)
111
 
112
  # create formatter and add it to the handlers
 
413
  return coherence
414
 
415
  def working(args: argparse.Namespace, name_dataset: str):
416
+
417
+ source = f'en_{name_dataset}'
418
+ output_subdir_name = source + f'/bertopic2_non_zeroshot_{args.n_topics}topic_'+doc_type+'_'+doc_level+'_'+doc_time
419
+ output_subdir = os.path.join(output_dir, output_subdir_name)
420
+ if not os.path.exists(output_subdir):
421
+ os.makedirs(output_subdir)
422
 
423
+ info_log_out = os.path.join(output_subdir, 'info.log')
424
  ############# Create logger##################################
425
+ fandc_logger = create_logger_file_and_console(info_log_out)
426
+ file_logger = create_logger_file(info_log_out)
427
  console_logger = create_logger_console()
428
  ##############################################################
429
 
 
446
  fandc_logger.log(logging.INFO, f'Processing data for {name_dataset} dataset successfully!')
447
  #######################################################################
448
 
 
 
 
 
 
449
 
450
 
451
  # Create model
 
559
 
560
  # ****** 2
561
  # Get reduce topics and topic over time for each n_topics
562
+ if args.need_reduce_n_tops == 'yes':
563
  fandc_logger.log(logging.INFO, f'Staring reduce topics and topic over time from 10 to 50...')
564
  for n_topics in [10,20,30,40,50]:
565
  topic_model_copy = copy.deepcopy(topic_model)