Spaces:

somosnlp-hackathon-2022
/

Sexismdetection

Runtime error

robertou2 commited on Apr 9, 2022

Commit

77b96fb

1 Parent(s): f7445d8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,12 +4,12 @@ import pandas as pd
 import torch
 import numpy as np
 import re
 from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
 from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
-tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-exist2021-metwo')
-model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-exist2021-metwo")
 import torch
 if torch.cuda.is_available():
@@ -119,7 +119,8 @@ def run():
                 tweet_list = [i.text for i in tweets]
                 #tweet_list = [strip_undesired_chars(i.text) for i in tweets]
                 text= pd.DataFrame(tweet_list)
-                text[0] = text[0].apply(preprocess)
                 text1=text[0].values
                 indices1=tokenizer.batch_encode_plus(text1.tolist(),
                                          max_length=128,

 import torch
 import numpy as np
 import re
+from pysentimiento.preprocessing import preprocess_tweet
 from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
 from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
+tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
+model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
 import torch
 if torch.cuda.is_available():
                 tweet_list = [i.text for i in tweets]
                 #tweet_list = [strip_undesired_chars(i.text) for i in tweets]
                 text= pd.DataFrame(tweet_list)
+                #text[0] = text[0].apply(preprocess)
+                text[0] = text[0].apply(preprocess_tweet)
                 text1=text[0].values
                 indices1=tokenizer.batch_encode_plus(text1.tolist(),
                                          max_length=128,