Spaces:
Sleeping
Sleeping
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| import string | |
| from transformers import BertTokenizer, TFBertForSequenceClassification, TextClassificationPipeline | |
| import tensorflow as tf | |
| # Download NLTK resources (one-time step) | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| # Define stopwords and punctuation | |
| stop_words = set(stopwords.words('english')) | |
| punctuations = set(string.punctuation) | |
| # Function to preprocess text | |
| def preprocess_text(text): | |
| text = str(text) | |
| # Lowercase the text | |
| text = text.lower() | |
| # Tokenize the text | |
| tokens = word_tokenize(text) | |
| # Remove stopwords and punctuation | |
| tokens = [token for token in tokens if token not in stop_words and token not in punctuations] | |
| # Reconstruct the text | |
| preprocessed_text = ' '.join(tokens) | |
| return preprocessed_text | |
| bert_tokenizer = BertTokenizer.from_pretrained('mainakhf/bert-base-uncased-sentiment-analysis') | |
| # Load model | |
| bert_model = TFBertForSequenceClassification.from_pretrained('mainakhf/bert-base-uncased-sentiment-analysis') | |
| def Get_sentiment(Review, Tokenizer=bert_tokenizer, Model=bert_model): | |
| # Convert Review to a list if it's not already a list | |
| if not isinstance(Review, list): | |
| Review = [Review] | |
| model = bert_model | |
| model.config.id2label = {0: "Negative", 1: "Positive"} | |
| tokenizer = bert_tokenizer | |
| pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer) | |
| pred_labels=pipe(Review) | |
| return [pred_labels[0]['label']] |