Spaces:

zArabi
/

Persian-Sentiment-Analysis

Runtime error

App Files Files Community

zArabi commited on Nov 6, 2022

Commit

5d1cadc

1 Parent(s): 2c051ce

add the latest model

Browse files

Files changed (1) hide show

app.py +5 -92

app.py CHANGED Viewed

@@ -5,96 +5,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 import huggingface_hub
 from huggingface_hub import hf_hub_download
-import hazm
-from cleantext import clean
-import regex as re
-huggingface_hub.Repository = 'zArabi/Persian-Sentiment-Analysis'
-def cleanhtml(raw_html):
-    cleanr = re.compile('<.*?>')
-    cleantext = re.sub(cleanr, '', raw_html)
-    return cleantext
-def cleaning(text):
-    text = text.strip()
-    # regular cleaning
-    # https://pypi.org/project/clean-text/ >> works well for eng and de languages
-    text = clean(text,
-        fix_unicode=True,
-        to_ascii=False,
-        lower=True,
-        no_line_breaks=True,
-        no_urls=True,
-        no_emails=True,
-        no_phone_numbers=True,
-        no_numbers=False,
-        no_digits=False,
-        no_currency_symbols=True,
-        no_punct=False, #Keep the punc
-        replace_with_url="",
-        replace_with_email="",
-        replace_with_phone_number="",
-        replace_with_number="",
-        replace_with_digit="0",
-        replace_with_currency_symbol="",
-    )
-    # cleaning htmls
-    text = cleanhtml(text)
-    # normalizing > https://github.com/sobhe/hazm
-    normalizer = hazm.Normalizer()
-    text = normalizer.normalize(text)
-    # removing wierd patterns
-    wierd_pattern = re.compile("["
-        u"\U0001F600-\U0001F64F"  # emoticons
-        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
-        u"\U0001F680-\U0001F6FF"  # transport & map symbols
-        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
-        u"\U00002702-\U000027B0"
-        u"\U000024C2-\U0001F251"
-        u"\U0001f926-\U0001f937"
-        u'\U00010000-\U0010ffff'
-        u"\u200d"
-        u"\u2640-\u2642"
-        u"\u2600-\u2B55"
-        u"\u23cf"
-        u"\u23e9"
-        u"\u231a"
-        u"\u3030"
-        u"\ufe0f"
-        u"\u2069"
-        u"\u2066"
-        # u"\u200c"
-        u"\u2068"
-        u"\u2067"
-        "]+", flags=re.UNICODE)
-    text = wierd_pattern.sub(r'', text)
-    # removing extra spaces, hashtags
-    text = re.sub("#", "", text)
-    text = re.sub("\s+", " ", text)
-    return text
-class SentimentModel(nn.Module):
-    def __init__(self, config):
-        super(SentimentModel, self).__init__()
-        self.bert = BertModel.from_pretrained(modelName, return_dict=False)
-        self.dropout = nn.Dropout(0.3)
-        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
-    def forward(self, input_ids, attention_mask):
-        _, pooled_output = self.bert(
-            input_ids=input_ids,
-            attention_mask=attention_mask)
-        pooled_output = self.dropout(pooled_output)
-        logits = self.classifier(pooled_output)
-        return logits
 modelName = 'HooshvareLab/bert-fa-base-uncased'
 class_names = ['negative', 'neutral', 'positive']
@@ -107,8 +18,10 @@ config = BertConfig.from_pretrained(
     id2label=id2label,
     label2id=label2id)
-downloadedModelFile = hf_hub_download(repo_id="zArabi/Persian-Sentiment-Analysis", filename="persianModel")
 loaded_model = torch.load(downloadedModelFile,map_location="cpu")
 tokenizer = BertTokenizer.from_pretrained(modelName)
@@ -129,7 +42,7 @@ def predict(text):
   )
   input_ids = encoding["input_ids"].to(device)
   attention_mask = encoding["attention_mask"].to(device)
-  outputs = loaded_model (input_ids, attention_mask)
   probs = F.softmax(outputs,dim=1)
   values, indices = torch.max(probs, dim=1)
   data = {

 import torch.nn.functional as F
 import huggingface_hub
 from huggingface_hub import hf_hub_download
+from preprocessing import *
 modelName = 'HooshvareLab/bert-fa-base-uncased'
 class_names = ['negative', 'neutral', 'positive']
     id2label=id2label,
     label2id=label2id)
+path="HooshvareLab-bert-fa-base-uncased-3class-best-epoch-weight-decay=.001.bin"
+downloadedModelFile = hf_hub_download(repo_id="zArabi/Persian-Sentiment-Analysis", filename=path)
 loaded_model = torch.load(downloadedModelFile,map_location="cpu")
+loaded_model.eval()
 tokenizer = BertTokenizer.from_pretrained(modelName)
   )
   input_ids = encoding["input_ids"].to(device)
   attention_mask = encoding["attention_mask"].to(device)
+  outputs = loaded_model(input_ids, attention_mask)
   probs = F.softmax(outputs,dim=1)
   values, indices = torch.max(probs, dim=1)
   data = {