Spaces:

somosnlp-hackathon-2022
/

Sexismdetection

Runtime error

App Files Files Community

robertou2 commited on Mar 21, 2022

Commit

315486a

1 Parent(s): a1d3912

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -27

app.py CHANGED Viewed

@@ -28,33 +28,7 @@ auth.set_access_token(access_token, access_token_secret)
 api = tw.API(auth, wait_on_rate_limit=True)
-def preprocess(text):
-    text=text.lower()
-    # remove hyperlinks
-    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
-    text = re.sub(r'http?:\/\/.*[\r\n]*', '', text)
-    #Replace &amp, &lt, &gt with &,<,> respectively
-    text=text.replace(r'&amp;?',r'and')
-    text=text.replace(r'&lt;',r'<')
-    text=text.replace(r'&gt;',r'>')
-    #remove hashtag sign
-    #text=re.sub(r"#","",text)
-    #remove mentions
-    text = re.sub(r"(?:\@)\w+", '', text)
-    #text=re.sub(r"@","",text)
-    #remove non ascii chars
-    text=text.encode("ascii",errors="ignore").decode()
-    #remove some puncts (except . ! ?)
-    text=re.sub(r'[:"#$%&\*+,-/:;<=>@\\^_`{|}~]+','',text)
-    text=re.sub(r'[!]+','!',text)
-    text=re.sub(r'[?]+','?',text)
-    text=re.sub(r'[.]+','.',text)
-    text=re.sub(r"'","",text)
-    text=re.sub(r"\(","",text)
-    text=re.sub(r"\)","",text)
-    text=" ".join(text.split())
-    return text
 st.title('Analisis de comentarios sexistas en Twitter con Tweepy and HuggingFace Transformers')
 st.markdown('Esta app utiliza tweepy para descargar tweets de twitter en base a la información de entrada y procesa los tweets usando transformers de HuggingFace para detectar comentarios sexistas. El resultado y los tweets correspondientes se almacenan en un dataframe para mostrarlo que es lo que se ve como resultado')
@@ -68,7 +42,6 @@ def run():
             tweets =tw.Cursor(api.search_tweets,q=search_words).items(number_of_tweets)
             tweet_list = [i.text for i in tweets]
             text= pd.DataFrame(tweet_list)
-            text[0] = text[0].apply(preprocess)
             text1=text[0].values
             indices1=tokenizer.batch_encode_plus(text1.tolist(),
                                      max_length=128,

 api = tw.API(auth, wait_on_rate_limit=True)
 st.title('Analisis de comentarios sexistas en Twitter con Tweepy and HuggingFace Transformers')
 st.markdown('Esta app utiliza tweepy para descargar tweets de twitter en base a la información de entrada y procesa los tweets usando transformers de HuggingFace para detectar comentarios sexistas. El resultado y los tweets correspondientes se almacenan en un dataframe para mostrarlo que es lo que se ve como resultado')
             tweets =tw.Cursor(api.search_tweets,q=search_words).items(number_of_tweets)
             tweet_list = [i.text for i in tweets]
             text= pd.DataFrame(tweet_list)
             text1=text[0].values
             indices1=tokenizer.batch_encode_plus(text1.tolist(),
                                      max_length=128,