Spaces:

MarMont
/

MARITESS

Sleeping

MarMont commited on Oct 15, 2023

Commit

49e4936

1 Parent(s): 95dd02a

more debugging

Files changed (2) hide show

app.py CHANGED Viewed

@@ -185,6 +185,8 @@ def get_topic_value(row, i):
             print(e)
 def full_lda(df):
     df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
     # Apply the function above and get tweets free of emoji's
@@ -243,6 +245,7 @@ def full_lda(df):
     # Apply tokenizer
     df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
     # Create a id2word dictionary
     global id2word
     id2word = Dictionary(df['lemma_tokens'])
@@ -289,6 +292,7 @@ def full_lda(df):
     global num_topics
     num_topics = coherence_averages.index(k_max) + 2
     grid = {}
     grid['Validation_Set'] = {}
@@ -360,6 +364,7 @@ def full_lda(df):
     lda_topics = lda_model_final.show_topics(num_words=10)
     topics = []
     filters = [lambda x: x.lower(), strip_punctuation, strip_numeric]
@@ -377,6 +382,7 @@ def full_lda(df):
         topic_clusters.append(df[df['max_topic'].isin(([i]))])
         topic_clusters[i] = topic_clusters[i]['original_tweets'].tolist()
     global top_tweets
     top_tweets = []
     for i in range(len(topic_clusters)):

             print(e)
 def full_lda(df):
+    print('cleaning')
     df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
     # Apply the function above and get tweets free of emoji's
     # Apply tokenizer
     df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
+    print('base model setup')
     # Create a id2word dictionary
     global id2word
     id2word = Dictionary(df['lemma_tokens'])
     global num_topics
     num_topics = coherence_averages.index(k_max) + 2
+    print('hyperparameter opt')
     grid = {}
     grid['Validation_Set'] = {}
     lda_topics = lda_model_final.show_topics(num_words=10)
+    print('assign topics')
     topics = []
     filters = [lambda x: x.lower(), strip_punctuation, strip_numeric]
         topic_clusters.append(df[df['max_topic'].isin(([i]))])
         topic_clusters[i] = topic_clusters[i]['original_tweets'].tolist()
+    print('rep topics')
     global top_tweets
     top_tweets = []
     for i in range(len(topic_clusters)):

appv1.py CHANGED Viewed

@@ -555,5 +555,6 @@ iface = gr.Interface(fn=main,
                             ],
                     # examples=examples,
                     outputs=["text",
-                            "text"])
 iface.launch()

                             ],
                     # examples=examples,
                     outputs=["text",
+                            "text"]
+                    )
 iface.launch()