Update BERTopic/my_topic_modeling.py
Browse files- BERTopic/my_topic_modeling.py +14 -12
BERTopic/my_topic_modeling.py
CHANGED
|
@@ -76,13 +76,13 @@ target_col = 'normalized_content'
|
|
| 76 |
|
| 77 |
|
| 78 |
|
| 79 |
-
def create_logger_file_and_console():
|
| 80 |
# create logger for "Sample App"
|
| 81 |
logger = logging.getLogger('automated_testing')
|
| 82 |
logger.setLevel(logging.DEBUG)
|
| 83 |
|
| 84 |
# create file handler which logs even debug messages
|
| 85 |
-
fileh = logging.FileHandler(
|
| 86 |
fileh.setLevel(logging.DEBUG)
|
| 87 |
|
| 88 |
# create console handler with a higher log level
|
|
@@ -100,13 +100,13 @@ def create_logger_file_and_console():
|
|
| 100 |
|
| 101 |
return logger
|
| 102 |
|
| 103 |
-
def create_logger_file():
|
| 104 |
# create logger for "Sample App"
|
| 105 |
logger = logging.getLogger('automated_testing')
|
| 106 |
logger.setLevel(logging.INFO)
|
| 107 |
|
| 108 |
# create file handler which logs even debug messages
|
| 109 |
-
fileh = logging.FileHandler(
|
| 110 |
fileh.setLevel(logging.INFO)
|
| 111 |
|
| 112 |
# create formatter and add it to the handlers
|
|
@@ -413,11 +413,17 @@ def coherence_score(topic_model, df_documents):
|
|
| 413 |
return coherence
|
| 414 |
|
| 415 |
def working(args: argparse.Namespace, name_dataset: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
|
| 417 |
-
|
| 418 |
############# Create logger##################################
|
| 419 |
-
fandc_logger = create_logger_file_and_console()
|
| 420 |
-
file_logger = create_logger_file()
|
| 421 |
console_logger = create_logger_console()
|
| 422 |
##############################################################
|
| 423 |
|
|
@@ -440,11 +446,6 @@ def working(args: argparse.Namespace, name_dataset: str):
|
|
| 440 |
fandc_logger.log(logging.INFO, f'Processing data for {name_dataset} dataset successfully!')
|
| 441 |
#######################################################################
|
| 442 |
|
| 443 |
-
source = f'en_{name_dataset}'
|
| 444 |
-
output_subdir_name = source + f'/bertopic2_non_zeroshot_{args.n_topics}topic_'+doc_type+'_'+doc_level+'_'+doc_time
|
| 445 |
-
output_subdir = os.path.join(output_dir, output_subdir_name)
|
| 446 |
-
if not os.path.exists(output_subdir):
|
| 447 |
-
os.makedirs(output_subdir)
|
| 448 |
|
| 449 |
|
| 450 |
# Create model
|
|
@@ -558,6 +559,7 @@ def working(args: argparse.Namespace, name_dataset: str):
|
|
| 558 |
|
| 559 |
# ****** 2
|
| 560 |
# Get reduce topics and topic over time for each n_topics
|
|
|
|
| 561 |
fandc_logger.log(logging.INFO, f'Staring reduce topics and topic over time from 10 to 50...')
|
| 562 |
for n_topics in [10,20,30,40,50]:
|
| 563 |
topic_model_copy = copy.deepcopy(topic_model)
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
|
| 79 |
+
def create_logger_file_and_console(path_file):
|
| 80 |
# create logger for "Sample App"
|
| 81 |
logger = logging.getLogger('automated_testing')
|
| 82 |
logger.setLevel(logging.DEBUG)
|
| 83 |
|
| 84 |
# create file handler which logs even debug messages
|
| 85 |
+
fileh = logging.FileHandler(path_file, mode='a')
|
| 86 |
fileh.setLevel(logging.DEBUG)
|
| 87 |
|
| 88 |
# create console handler with a higher log level
|
|
|
|
| 100 |
|
| 101 |
return logger
|
| 102 |
|
| 103 |
+
def create_logger_file(path_file):
|
| 104 |
# create logger for "Sample App"
|
| 105 |
logger = logging.getLogger('automated_testing')
|
| 106 |
logger.setLevel(logging.INFO)
|
| 107 |
|
| 108 |
# create file handler which logs even debug messages
|
| 109 |
+
fileh = logging.FileHandler(path_file, mode='a')
|
| 110 |
fileh.setLevel(logging.INFO)
|
| 111 |
|
| 112 |
# create formatter and add it to the handlers
|
|
|
|
| 413 |
return coherence
|
| 414 |
|
| 415 |
def working(args: argparse.Namespace, name_dataset: str):
|
| 416 |
+
|
| 417 |
+
source = f'en_{name_dataset}'
|
| 418 |
+
output_subdir_name = source + f'/bertopic2_non_zeroshot_{args.n_topics}topic_'+doc_type+'_'+doc_level+'_'+doc_time
|
| 419 |
+
output_subdir = os.path.join(output_dir, output_subdir_name)
|
| 420 |
+
if not os.path.exists(output_subdir):
|
| 421 |
+
os.makedirs(output_subdir)
|
| 422 |
|
| 423 |
+
info_log_out = os.path.join(output_subdir, 'info.log')
|
| 424 |
############# Create logger##################################
|
| 425 |
+
fandc_logger = create_logger_file_and_console(info_log_out)
|
| 426 |
+
file_logger = create_logger_file(info_log_out)
|
| 427 |
console_logger = create_logger_console()
|
| 428 |
##############################################################
|
| 429 |
|
|
|
|
| 446 |
fandc_logger.log(logging.INFO, f'Processing data for {name_dataset} dataset successfully!')
|
| 447 |
#######################################################################
|
| 448 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
|
| 450 |
|
| 451 |
# Create model
|
|
|
|
| 559 |
|
| 560 |
# ****** 2
|
| 561 |
# Get reduce topics and topic over time for each n_topics
|
| 562 |
+
if args.need_reduce_n_tops == 'yes':
|
| 563 |
fandc_logger.log(logging.INFO, f'Staring reduce topics and topic over time from 10 to 50...')
|
| 564 |
for n_topics in [10,20,30,40,50]:
|
| 565 |
topic_model_copy = copy.deepcopy(topic_model)
|