Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import BertModel, BertConfig, BertTokenizer | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import huggingface_hub | |
| from huggingface_hub import hf_hub_download | |
| import hazm | |
| from cleantext import clean | |
| import regex as re | |
| huggingface_hub.Repository = 'zArabi/Persian-Sentiment-Analysis' | |
| def cleanhtml(raw_html): | |
| cleanr = re.compile('<.*?>') | |
| cleantext = re.sub(cleanr, '', raw_html) | |
| return cleantext | |
| def cleaning(text): | |
| text = text.strip() | |
| # regular cleaning | |
| # https://pypi.org/project/clean-text/ >> works well for eng and de languages | |
| text = clean(text, | |
| fix_unicode=True, | |
| to_ascii=False, | |
| lower=True, | |
| no_line_breaks=True, | |
| no_urls=True, | |
| no_emails=True, | |
| no_phone_numbers=True, | |
| no_numbers=False, | |
| no_digits=False, | |
| no_currency_symbols=True, | |
| no_punct=False, #Keep the punc | |
| replace_with_url="", | |
| replace_with_email="", | |
| replace_with_phone_number="", | |
| replace_with_number="", | |
| replace_with_digit="0", | |
| replace_with_currency_symbol="", | |
| ) | |
| # cleaning htmls | |
| text = cleanhtml(text) | |
| # normalizing > https://github.com/sobhe/hazm | |
| normalizer = hazm.Normalizer() | |
| text = normalizer.normalize(text) | |
| # removing wierd patterns | |
| wierd_pattern = re.compile("[" | |
| u"\U0001F600-\U0001F64F" # emoticons | |
| u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
| u"\U0001F680-\U0001F6FF" # transport & map symbols | |
| u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
| u"\U00002702-\U000027B0" | |
| u"\U000024C2-\U0001F251" | |
| u"\U0001f926-\U0001f937" | |
| u'\U00010000-\U0010ffff' | |
| u"\u200d" | |
| u"\u2640-\u2642" | |
| u"\u2600-\u2B55" | |
| u"\u23cf" | |
| u"\u23e9" | |
| u"\u231a" | |
| u"\u3030" | |
| u"\ufe0f" | |
| u"\u2069" | |
| u"\u2066" | |
| # u"\u200c" | |
| u"\u2068" | |
| u"\u2067" | |
| "]+", flags=re.UNICODE) | |
| text = wierd_pattern.sub(r'', text) | |
| # removing extra spaces, hashtags | |
| text = re.sub("#", "", text) | |
| text = re.sub("\s+", " ", text) | |
| return text | |
| class SentimentModel(nn.Module): | |
| def __init__(self, config): | |
| super(SentimentModel, self).__init__() | |
| self.bert = BertModel.from_pretrained(modelName, return_dict=False) | |
| self.dropout = nn.Dropout(0.3) | |
| self.classifier = nn.Linear(config.hidden_size, config.num_labels) | |
| def forward(self, input_ids, attention_mask): | |
| _, pooled_output = self.bert( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask) | |
| pooled_output = self.dropout(pooled_output) | |
| logits = self.classifier(pooled_output) | |
| return logits | |
| modelName = 'HooshvareLab/bert-fa-base-uncased' | |
| class_names = ['negative', 'neutral', 'positive'] | |
| label2id = {label: i for i, label in enumerate(class_names)} | |
| id2label = {v: k for k, v in label2id.items()} | |
| config = BertConfig.from_pretrained( | |
| modelName, | |
| num_labels=len(class_names), | |
| id2label=id2label, | |
| label2id=label2id) | |
| downloadedModelFile = hf_hub_download(repo_id="zArabi/Persian-Sentiment-Analysis", filename="persianModel") | |
| loaded_model = torch.load(downloadedModelFile,map_location="cpu") | |
| tokenizer = BertTokenizer.from_pretrained(modelName) | |
| max_len=512 | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def predict(text): | |
| text = cleaning(text) | |
| encoding = tokenizer.encode_plus( | |
| text, | |
| max_length=max_len, | |
| truncation=True, | |
| padding="max_length", | |
| add_special_tokens=True, # Add '[CLS]' and '[SEP]' | |
| return_token_type_ids=True, | |
| return_attention_mask=True, | |
| return_tensors='pt', # Return PyTorch tensors | |
| ) | |
| input_ids = encoding["input_ids"].to(device) | |
| attention_mask = encoding["attention_mask"].to(device) | |
| outputs = loaded_model (input_ids, attention_mask) | |
| probs = F.softmax(outputs,dim=1) | |
| values, indices = torch.max(probs, dim=1) | |
| data = { | |
| 'comments': text, | |
| 'preds': indices.cpu().numpy()[0], | |
| 'label': class_names[indices.cpu().numpy()[0]], | |
| 'probablities': {class_names[i] : round(probs[0][i].item(),3) for i in range(len(probs[0]))} | |
| } | |
| return {class_names[i] : round(probs[0][i].item(),3) for i in range(len(probs[0]))} | |
| gr.Interface( | |
| predict, | |
| inputs=gr.Textbox(label="Explore your sentence!",lines=2, placeholder="Type Here..."), | |
| outputs=gr.outputs.Label(num_top_classes=3), | |
| title="How are feeling?!", | |
| ).launch() |