try bertopic
Browse files
app.py
CHANGED
|
@@ -449,7 +449,7 @@ def compute_coherence_value_bertopic(topic_model):
|
|
| 449 |
|
| 450 |
return coherence_score
|
| 451 |
|
| 452 |
-
def base_bertopic():
|
| 453 |
df['lemma_tokens_string'] = df['lemma_tokens'].apply(lambda x: ' '.join(x))
|
| 454 |
global id2word
|
| 455 |
id2word = Dictionary(df['lemma_tokens'])
|
|
@@ -472,7 +472,7 @@ def base_bertopic():
|
|
| 472 |
except:
|
| 473 |
print('Unable to generate meaningful topics (Base BERTopic model)')
|
| 474 |
|
| 475 |
-
def optimized_bertopic():
|
| 476 |
vectorizer_model = CountVectorizer(max_features=1_000, stop_words="english")
|
| 477 |
optimized_topic_model = BERTopic(umap_model=umap_model,
|
| 478 |
language="multilingual",
|
|
@@ -505,6 +505,7 @@ def optimized_bertopic():
|
|
| 505 |
tweets.append(df.loc[index, 'original_tweets'])
|
| 506 |
print(tweets)
|
| 507 |
top_tweets.append(tweets)
|
|
|
|
| 508 |
|
| 509 |
global examples
|
| 510 |
|
|
@@ -536,8 +537,8 @@ def main(dataset, model, progress=gr.Progress(track_tqdm=True)):
|
|
| 536 |
print('done lda')
|
| 537 |
place_data = 'test'
|
| 538 |
else:
|
| 539 |
-
base_bertopic()
|
| 540 |
-
optimized_bertopic()
|
| 541 |
|
| 542 |
print('doing topic summarization')
|
| 543 |
headlines = topic_summarization(top_tweets)
|
|
|
|
| 449 |
|
| 450 |
return coherence_score
|
| 451 |
|
| 452 |
+
def base_bertopic(df):
|
| 453 |
df['lemma_tokens_string'] = df['lemma_tokens'].apply(lambda x: ' '.join(x))
|
| 454 |
global id2word
|
| 455 |
id2word = Dictionary(df['lemma_tokens'])
|
|
|
|
| 472 |
except:
|
| 473 |
print('Unable to generate meaningful topics (Base BERTopic model)')
|
| 474 |
|
| 475 |
+
def optimized_bertopic(df):
|
| 476 |
vectorizer_model = CountVectorizer(max_features=1_000, stop_words="english")
|
| 477 |
optimized_topic_model = BERTopic(umap_model=umap_model,
|
| 478 |
language="multilingual",
|
|
|
|
| 505 |
tweets.append(df.loc[index, 'original_tweets'])
|
| 506 |
print(tweets)
|
| 507 |
top_tweets.append(tweets)
|
| 508 |
+
return top_tweets
|
| 509 |
|
| 510 |
global examples
|
| 511 |
|
|
|
|
| 537 |
print('done lda')
|
| 538 |
place_data = 'test'
|
| 539 |
else:
|
| 540 |
+
base_bertopic(df)
|
| 541 |
+
top_tweets = optimized_bertopic()
|
| 542 |
|
| 543 |
print('doing topic summarization')
|
| 544 |
headlines = topic_summarization(top_tweets)
|