Spaces:

MarMont
/

MARITESS

Sleeping

MarMont commited on Oct 15, 2023

Commit

12bb295

1 Parent(s): 802e30e

separate function for cleaning

Files changed (1) hide show

app.py CHANGED Viewed

@@ -185,9 +185,7 @@ def get_topic_value(row, i):
         except Exception as e:
             print(e)
-def full_lda(df):
-    print('cleaning')
     df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
     # Apply the function above and get tweets free of emoji's
@@ -246,6 +244,13 @@ def full_lda(df):
     # Apply tokenizer
     df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
     print('base model setup')
     # Create a id2word dictionary
     global id2word
@@ -532,11 +537,13 @@ def main(dataset, model, progress=gr.Progress(track_tqdm=True)):
     print(df)
     if model == 'LDA':
         print('doing lda')
         top_tweets = full_lda(df)
         print('done lda')
         place_data = 'test'
     else:
         base_bertopic(df)
         top_tweets = optimized_bertopic()

         except Exception as e:
             print(e)
+def cleaning(df):
     df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
     # Apply the function above and get tweets free of emoji's
     # Apply tokenizer
     df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
+    return df
+def full_lda(df):
+    print('cleaning')
     print('base model setup')
     # Create a id2word dictionary
     global id2word
     print(df)
     if model == 'LDA':
+        df = cleaning(df)
         print('doing lda')
         top_tweets = full_lda(df)
         print('done lda')
         place_data = 'test'
     else:
+        df = cleaning(df)
         base_bertopic(df)
         top_tweets = optimized_bertopic()