Spaces:
Sleeping
Sleeping
Update model.py
Browse files
model.py
CHANGED
|
@@ -25,17 +25,19 @@ stopwords = stopwords_list()
|
|
| 25 |
# Load the BERT model for sentiment analysis
|
| 26 |
dataset = Dataset.from_pandas(pd.DataFrame({"Comment": []}))
|
| 27 |
|
| 28 |
-
config = AutoConfig.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2")
|
| 29 |
-
tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2")
|
| 30 |
-
model = BertForSequenceClassification.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2", num_labels=3)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 33 |
model.to(device)
|
| 34 |
|
| 35 |
-
|
| 36 |
# Tokenization function for sentiment analysis
|
| 37 |
def tokenize_function(examples):
|
| 38 |
-
return tokenizer(examples["Comment"], padding="max_length", truncation=True, max_length=
|
| 39 |
|
| 40 |
# Sentiment prediction function
|
| 41 |
def predict_sentiment(batch):
|
|
@@ -49,8 +51,13 @@ def predict_sentiment(batch):
|
|
| 49 |
return {'sentiment': predictions.cpu()}
|
| 50 |
|
| 51 |
# Mapping sentiment labels
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# Adding sentiment prediction to tokenized dataset
|
| 56 |
def predict_sentiment_labels(text):
|
|
@@ -58,7 +65,7 @@ def predict_sentiment_labels(text):
|
|
| 58 |
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
| 59 |
predicted_sentiments = tokenized_dataset.map(predict_sentiment, batched=True)
|
| 60 |
sentiment = predicted_sentiments[0]['sentiment']
|
| 61 |
-
return
|
| 62 |
|
| 63 |
|
| 64 |
|
|
|
|
| 25 |
# Load the BERT model for sentiment analysis
|
| 26 |
dataset = Dataset.from_pandas(pd.DataFrame({"Comment": []}))
|
| 27 |
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
# بارگذاری مدل و توکنایزر
|
| 30 |
+
model_name = "m3hrdadfi/albert-fa-base-v2-sentiment-deepsentipers-multi"
|
| 31 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5, ignore_mismatched_sizes=True)
|
| 32 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 33 |
+
|
| 34 |
+
# انتخاب دستگاه (GPU یا CPU)
|
| 35 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 36 |
model.to(device)
|
| 37 |
|
|
|
|
| 38 |
# Tokenization function for sentiment analysis
|
| 39 |
def tokenize_function(examples):
|
| 40 |
+
return tokenizer(examples["Comment"], padding="max_length", truncation=True, max_length=128, return_tensors='pt')
|
| 41 |
|
| 42 |
# Sentiment prediction function
|
| 43 |
def predict_sentiment(batch):
|
|
|
|
| 51 |
return {'sentiment': predictions.cpu()}
|
| 52 |
|
| 53 |
# Mapping sentiment labels
|
| 54 |
+
sentiment_labels = {
|
| 55 |
+
0: 'Furious',
|
| 56 |
+
1: 'Angry',
|
| 57 |
+
2: 'Neutral',
|
| 58 |
+
3: 'Happy',
|
| 59 |
+
4: 'Delighted'
|
| 60 |
+
}
|
| 61 |
|
| 62 |
# Adding sentiment prediction to tokenized dataset
|
| 63 |
def predict_sentiment_labels(text):
|
|
|
|
| 65 |
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
| 66 |
predicted_sentiments = tokenized_dataset.map(predict_sentiment, batched=True)
|
| 67 |
sentiment = predicted_sentiments[0]['sentiment']
|
| 68 |
+
return sentiment_labels.get(sentiment, 'نامشخص')
|
| 69 |
|
| 70 |
|
| 71 |
|