Spaces:

F-allahmoradi
/

SocialAnalyzer

Sleeping

F-allahmoradi commited on Dec 26, 2024

Commit

0b7a269

verified ·

1 Parent(s): e7b6607

Update model.py

Files changed (1) hide show

model.py CHANGED Viewed

@@ -25,17 +25,19 @@ stopwords = stopwords_list()
 # Load the BERT model for sentiment analysis
 dataset = Dataset.from_pandas(pd.DataFrame({"Comment": []}))
-config = AutoConfig.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2")
-tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2")
-model = BertForSequenceClassification.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2", num_labels=3)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 # Tokenization function for sentiment analysis
 def tokenize_function(examples):
-    return tokenizer(examples["Comment"], padding="max_length", truncation=True, max_length=256, return_tensors='pt')
 # Sentiment prediction function
 def predict_sentiment(batch):
@@ -49,8 +51,13 @@ def predict_sentiment(batch):
     return {'sentiment': predictions.cpu()}
 # Mapping sentiment labels
-sentiment_labels_en = { 2: 'منفی', 0: 'خنثی',  1: 'مثبت'}
 # Adding sentiment prediction to tokenized dataset
 def predict_sentiment_labels(text):
@@ -58,7 +65,7 @@ def predict_sentiment_labels(text):
     tokenized_dataset = dataset.map(tokenize_function, batched=True)
     predicted_sentiments = tokenized_dataset.map(predict_sentiment, batched=True)
     sentiment = predicted_sentiments[0]['sentiment']
-    return  sentiment_labels_en.get(sentiment, 'نامشخص')

 # Load the BERT model for sentiment analysis
 dataset = Dataset.from_pandas(pd.DataFrame({"Comment": []}))
+# بارگذاری مدل و توکنایزر
+model_name = "m3hrdadfi/albert-fa-base-v2-sentiment-deepsentipers-multi"
+model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5, ignore_mismatched_sizes=True)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# انتخاب دستگاه (GPU یا CPU)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 # Tokenization function for sentiment analysis
 def tokenize_function(examples):
+    return tokenizer(examples["Comment"], padding="max_length", truncation=True, max_length=128, return_tensors='pt')
 # Sentiment prediction function
 def predict_sentiment(batch):
     return {'sentiment': predictions.cpu()}
 # Mapping sentiment labels
+sentiment_labels = {
+    0: 'Furious',
+    1: 'Angry',
+    2: 'Neutral',
+    3: 'Happy',
+    4: 'Delighted'
+}
 # Adding sentiment prediction to tokenized dataset
 def predict_sentiment_labels(text):
     tokenized_dataset = dataset.map(tokenize_function, batched=True)
     predicted_sentiments = tokenized_dataset.map(predict_sentiment, batched=True)
     sentiment = predicted_sentiments[0]['sentiment']
+    return sentiment_labels.get(sentiment, 'نامشخص')