Update BERTopic/my_topic_modeling.py
Browse files
BERTopic/my_topic_modeling.py
CHANGED
|
@@ -52,9 +52,13 @@ from cuml.manifold import UMAP
|
|
| 52 |
import gensim.corpora as corpora
|
| 53 |
from gensim.models.coherencemodel import CoherenceModel
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
# Get working directory
|
| 57 |
-
working_dir = os.path.abspath(os.path.join("/
|
| 58 |
data_dir = os.path.join(working_dir, 'data')
|
| 59 |
lib_dir = os.path.join(working_dir, 'libs')
|
| 60 |
outer_output_dir = os.path.join(working_dir, 'outputs')
|
|
@@ -74,6 +78,18 @@ doc_type = 'reviews'
|
|
| 74 |
doc_level = 'sentence'
|
| 75 |
target_col = 'normalized_content'
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
def create_logger_file_and_console(path_file):
|
|
@@ -459,7 +475,7 @@ def working(args: argparse.Namespace, name_dataset: str):
|
|
| 459 |
|
| 460 |
# Create model
|
| 461 |
fandc_logger.log(logging.INFO, f'Create model for {name_dataset} dataset')
|
| 462 |
-
topic_model = create_model_bertopic_booking(args.n_topics)
|
| 463 |
|
| 464 |
# Fitting model
|
| 465 |
fandc_logger.log(logging.INFO, f'Training model for {name_dataset} dataset')
|
|
@@ -607,3 +623,5 @@ if __name__ == "__main__":
|
|
| 607 |
working(args, 'tripadvisor')
|
| 608 |
else:
|
| 609 |
working(args, args.name_dataset)
|
|
|
|
|
|
|
|
|
| 52 |
import gensim.corpora as corpora
|
| 53 |
from gensim.models.coherencemodel import CoherenceModel
|
| 54 |
|
| 55 |
+
import torch
|
| 56 |
+
from GPUtil import showUtilization as gpu_usage
|
| 57 |
+
from numba import cuda
|
| 58 |
+
|
| 59 |
|
| 60 |
# Get working directory
|
| 61 |
+
working_dir = os.path.abspath(os.path.join("/workspace", "TopicModelingRepo"))
|
| 62 |
data_dir = os.path.join(working_dir, 'data')
|
| 63 |
lib_dir = os.path.join(working_dir, 'libs')
|
| 64 |
outer_output_dir = os.path.join(working_dir, 'outputs')
|
|
|
|
| 78 |
doc_level = 'sentence'
|
| 79 |
target_col = 'normalized_content'
|
| 80 |
|
| 81 |
+
def free_gpu_cache():
|
| 82 |
+
print("Initial GPU Usage")
|
| 83 |
+
gpu_usage()
|
| 84 |
+
|
| 85 |
+
torch.cuda.empty_cache()
|
| 86 |
+
|
| 87 |
+
cuda.select_device(0)
|
| 88 |
+
cuda.close()
|
| 89 |
+
cuda.select_device(0)
|
| 90 |
+
|
| 91 |
+
print("GPU Usage after emptying the cache")
|
| 92 |
+
gpu_usage()
|
| 93 |
|
| 94 |
|
| 95 |
def create_logger_file_and_console(path_file):
|
|
|
|
| 475 |
|
| 476 |
# Create model
|
| 477 |
fandc_logger.log(logging.INFO, f'Create model for {name_dataset} dataset')
|
| 478 |
+
topic_model = create_model_bertopic_booking(args.n_topics)
|
| 479 |
|
| 480 |
# Fitting model
|
| 481 |
fandc_logger.log(logging.INFO, f'Training model for {name_dataset} dataset')
|
|
|
|
| 623 |
working(args, 'tripadvisor')
|
| 624 |
else:
|
| 625 |
working(args, args.name_dataset)
|
| 626 |
+
|
| 627 |
+
free_gpu_cache()
|