more debugging
Browse files
app.py
CHANGED
|
@@ -185,6 +185,8 @@ def get_topic_value(row, i):
|
|
| 185 |
print(e)
|
| 186 |
|
| 187 |
def full_lda(df):
|
|
|
|
|
|
|
| 188 |
df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
|
| 189 |
|
| 190 |
# Apply the function above and get tweets free of emoji's
|
|
@@ -243,6 +245,7 @@ def full_lda(df):
|
|
| 243 |
# Apply tokenizer
|
| 244 |
df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
|
| 245 |
|
|
|
|
| 246 |
# Create a id2word dictionary
|
| 247 |
global id2word
|
| 248 |
id2word = Dictionary(df['lemma_tokens'])
|
|
@@ -289,6 +292,7 @@ def full_lda(df):
|
|
| 289 |
global num_topics
|
| 290 |
num_topics = coherence_averages.index(k_max) + 2
|
| 291 |
|
|
|
|
| 292 |
grid = {}
|
| 293 |
grid['Validation_Set'] = {}
|
| 294 |
|
|
@@ -360,6 +364,7 @@ def full_lda(df):
|
|
| 360 |
|
| 361 |
lda_topics = lda_model_final.show_topics(num_words=10)
|
| 362 |
|
|
|
|
| 363 |
topics = []
|
| 364 |
filters = [lambda x: x.lower(), strip_punctuation, strip_numeric]
|
| 365 |
|
|
@@ -377,6 +382,7 @@ def full_lda(df):
|
|
| 377 |
topic_clusters.append(df[df['max_topic'].isin(([i]))])
|
| 378 |
topic_clusters[i] = topic_clusters[i]['original_tweets'].tolist()
|
| 379 |
|
|
|
|
| 380 |
global top_tweets
|
| 381 |
top_tweets = []
|
| 382 |
for i in range(len(topic_clusters)):
|
|
|
|
| 185 |
print(e)
|
| 186 |
|
| 187 |
def full_lda(df):
|
| 188 |
+
|
| 189 |
+
print('cleaning')
|
| 190 |
df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
|
| 191 |
|
| 192 |
# Apply the function above and get tweets free of emoji's
|
|
|
|
| 245 |
# Apply tokenizer
|
| 246 |
df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
|
| 247 |
|
| 248 |
+
print('base model setup')
|
| 249 |
# Create a id2word dictionary
|
| 250 |
global id2word
|
| 251 |
id2word = Dictionary(df['lemma_tokens'])
|
|
|
|
| 292 |
global num_topics
|
| 293 |
num_topics = coherence_averages.index(k_max) + 2
|
| 294 |
|
| 295 |
+
print('hyperparameter opt')
|
| 296 |
grid = {}
|
| 297 |
grid['Validation_Set'] = {}
|
| 298 |
|
|
|
|
| 364 |
|
| 365 |
lda_topics = lda_model_final.show_topics(num_words=10)
|
| 366 |
|
| 367 |
+
print('assign topics')
|
| 368 |
topics = []
|
| 369 |
filters = [lambda x: x.lower(), strip_punctuation, strip_numeric]
|
| 370 |
|
|
|
|
| 382 |
topic_clusters.append(df[df['max_topic'].isin(([i]))])
|
| 383 |
topic_clusters[i] = topic_clusters[i]['original_tweets'].tolist()
|
| 384 |
|
| 385 |
+
print('rep topics')
|
| 386 |
global top_tweets
|
| 387 |
top_tweets = []
|
| 388 |
for i in range(len(topic_clusters)):
|
appv1.py
CHANGED
|
@@ -555,5 +555,6 @@ iface = gr.Interface(fn=main,
|
|
| 555 |
],
|
| 556 |
# examples=examples,
|
| 557 |
outputs=["text",
|
| 558 |
-
"text"]
|
|
|
|
| 559 |
iface.launch()
|
|
|
|
| 555 |
],
|
| 556 |
# examples=examples,
|
| 557 |
outputs=["text",
|
| 558 |
+
"text"]
|
| 559 |
+
)
|
| 560 |
iface.launch()
|