Spaces:
Sleeping
Sleeping
| import joblib | |
| import re | |
| from Sastrawi.Stemmer.StemmerFactory import StemmerFactory | |
| import emoji | |
| # Load the model and vectorizer | |
| model = joblib.load("hard_voting_classifier.pkl") | |
| vectorizer = joblib.load("vectorizer.pkl") | |
| # Load custom stopwords | |
| with open("Indonesia_stopwords.txt", "r") as f: | |
| custom_stopwords = [word.strip() for word in f.readlines()] | |
| def preprocess_data(text): | |
| """Preprocess the input text.""" | |
| # Case Folding | |
| text = text.lower() | |
| # Sentence Normalization | |
| text = emoji.demojize(text) # Translate emojis to their word representation | |
| text = re.sub(r'http[s]?://\S+', '', text) # Remove URLs | |
| text = re.sub(r'\d+', '', text) # Remove numbers | |
| text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove non-alphanumeric characters except for spaces | |
| # Tokenization & Stemming | |
| stemmer = StemmerFactory().create_stemmer() | |
| tokens = [word for word in text.split() if word not in custom_stopwords] # Tokenization and Stopword Removal | |
| tokens = [stemmer.stem(word) for word in tokens] # Stemming | |
| return ' '.join(tokens) | |
| def predict_sentiment(text): | |
| """Predict the sentiment of the input text.""" | |
| preprocessed_text = preprocess_data(text) | |
| vectorized_text = vectorizer.transform([preprocessed_text]) | |
| prediction = model.predict(vectorized_text) | |
| return "Positive" if prediction[0] == 1 else "Negative" | |