Spaces:
Sleeping
Sleeping
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.pipeline import Pipeline | |
| import joblib | |
| import re | |
| import string | |
| import nltk | |
| nltk.download('stopwords') | |
| nltk.download('punkt') | |
| import streamlit as st | |
| # Preprocess function | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| def preprocess_text(text): | |
| # Remove URLs | |
| url_pattern = re.compile(r'https?://\S+') | |
| text = url_pattern.sub(' ', text) | |
| # Remove HTML Tags | |
| html_pattern = re.compile(r'<[^<>]+>') | |
| text = html_pattern.sub(' ', text) | |
| # Remove punctuation and digits | |
| text = re.sub(r'[^\w\s]', ' ', text) | |
| # Remove emojis | |
| emoji_pattern = re.compile("[" | |
| u"\U0001F600-\U0001F64F" | |
| u"\U0001F300-\U0001F5FF" | |
| u"\U0001F680-\U0001F6FF" | |
| u"\U0001F1E0-\U0001F1FF" | |
| u"\U0001F1F2-\U0001F1F4" | |
| u"\U0001F1E6-\U0001F1FF" | |
| u"\U0001F600-\U0001F64F" | |
| u"\U00002702-\U000027B0" | |
| u"\U000024C2-\U0001F251" | |
| u"\U0001f926-\U0001f937" | |
| u"\U0001F1F2" | |
| u"\U0001F1F4" | |
| u"\U0001F620" | |
| u"\u200d" | |
| u"\u2640-\u2642" | |
| "]+", flags=re.UNICODE) | |
| text = emoji_pattern.sub(' ', text) | |
| # Convert to lowercase | |
| text = text.lower() | |
| # Tokenize and remove stopwords | |
| stop_words = set(stopwords.words('english')) | |
| tokens = word_tokenize(text) | |
| tokens = [token for token in tokens if token not in stop_words] | |
| # Join tokens back into text | |
| text = ' '.join(tokens) | |
| return text | |
| # Main function | |
| model_NB_path = './model_NB.sav' | |
| model_NB = joblib.load(model_NB_path) | |
| model_LR_path = './model_LR.sav' | |
| model_LR = joblib.load(model_LR_path) | |
| def sentiment_analysis_LR(input): | |
| # Assuming you have a Logistic Regression model and TfidfVectorizer in the pipeline | |
| input = preprocess_text(input) | |
| vectorizer = model_LR.named_steps['tfidfvectorizer'] | |
| lr_classifier = model_LR.named_steps['logisticregression'] | |
| # Transform the user input using the TF-IDF vectorizer | |
| user_input_tfidf = vectorizer.transform([input]) | |
| # Make predictions | |
| user_pred = lr_classifier.predict(user_input_tfidf) | |
| # Display the prediction | |
| if user_pred[0] == 0: | |
| return 0 | |
| else: | |
| return 1 | |
| def sentiment_analysis_NB(input): | |
| input = preprocess_text(input) | |
| vectorizer = model_NB.named_steps['tfidf'] | |
| nb_classifier = model_NB.named_steps['nb'] | |
| # Transform the user input using the TF-IDF vectorizer | |
| user_input_tfidf = vectorizer.transform([input]) | |
| # Make predictions | |
| user_pred = nb_classifier.predict(user_input_tfidf) | |
| # Display the prediction | |
| if user_pred[0] == 0: | |
| return 0 | |
| else: | |
| return 1 | |
| text = st.text_area('Enter some text !!! (English text : D )') | |
| if text: | |
| out = sentiment_analysis_LR(text) | |
| if out == 0: | |
| st.write('The sentence is negative') | |
| else: | |
| st.write('The sentence is positive') |