Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from nltk.stem import WordNetLemmatizer | |
| import streamlit as st | |
| import pickle | |
| import pandas as pd | |
| import numpy as np | |
| import nltk | |
| import regex as re | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| from sklearn.ensemble import RandomForestClassifier | |
| import transformers | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig | |
| from scipy.special import softmax | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import ast | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| # Load the model | |
| def load_model(): | |
| with open('random_forest_model.pkl', 'rb') as file: | |
| loaded_model = pickle.load(file) | |
| return loaded_model | |
| def load_vectorizer(): | |
| with open('tfidf_vectorizer.pkl', 'rb') as file: | |
| loaded_vectorizer = pickle.load(file) | |
| return loaded_vectorizer | |
| def ratings(list_of_reviews): | |
| xidf = [] | |
| stopwords = nltk.corpus.stopwords.words('english') | |
| lemmatizer = WordNetLemmatizer() | |
| review = re.sub('[^a-zA-Z]', ' ', list_of_reviews) | |
| review = review.lower() | |
| review = review.split() | |
| review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords)] | |
| review = ' '.join(review) | |
| xidf.append(review) | |
| tf_idf_vectorizer = load_vectorizer() | |
| # Transform the new review using the loaded vectorizer | |
| tf_review = tf_idf_vectorizer.transform(xidf) | |
| model = load_model() | |
| prediction = model.predict(tf_review) | |
| return prediction | |
| def sentiment_analysis(texts): | |
| MODEL = "cardiffnlp/twitter-roberta-base-sentiment" | |
| task = 'sentiment' | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
| config = AutoConfig.from_pretrained(MODEL) | |
| # PT | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
| results = [] | |
| for text in texts: | |
| encoded_input = tokenizer(text, return_tensors='pt', max_length=512, truncation=True) | |
| output = model(**encoded_input) | |
| scores = output[0][0].detach().numpy() | |
| scores = softmax(scores) | |
| results.append(scores.tolist()) | |
| return results | |
| def get_sentiment_label(row): | |
| if row['positive_score'] > row['neutral_score'] and row['positive_score'] > row['negative_score']: | |
| return 'positive' | |
| elif row['negative_score'] > row['neutral_score'] and row['negative_score'] > row['positive_score']: | |
| return 'negative' | |
| else: | |
| return 'neutral' | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| st.set_page_config(page_title="Sustainable Shipping and Logistics Advisor", page_icon="GH", initial_sidebar_state="expanded") | |
| hide_streamlit_style = """ | |
| <style> | |
| #MainMenu {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| </style> | |
| """ | |
| st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |
| css_style = { | |
| "icon": {"color": "white"}, | |
| "nav-link": {"--hover-color": "grey"}, | |
| "nav-link-selected": {"background-color": "#FF4C1B"}, | |
| } | |
| # Create two columns | |
| col1, col2 = st.columns([0.5, 1.2]) # Adjust the ratio as needed | |
| # Column 1: Image | |
| with col1: | |
| st.image("img2.png", width=200) # Adjust the path and width as needed | |
| # Column 2: Text | |
| with col2: | |
| st.write(""" | |
| # Ratings Prediction & Reviews Sentiment Analysis App | |
| """) | |
| st.write(" This app predicts **the average rating of a product, given a list of reviews and also displays the sentiment of these reviews**!") | |
| st.write('---') | |
| sidebar_selection = st.sidebar.radio("Select an option:", ("Ratings Prediction", "Sentiment Analysis")) | |
| list_reviews = st.text_input("Enter the list of reviews: ") | |
| sentiment_review = list_reviews | |
| ratings_review = list_reviews | |
| submit_button = st.button("Submit") | |
| if sidebar_selection == "Ratings Prediction": | |
| # Check if the submit button is clicked and the input is not empty | |
| if submit_button and ratings_review: | |
| rating_pred = ratings(ratings_review) | |
| st.write(f"The predicted average rating for a product with the list of reviews above is: {rating_pred}") | |
| elif submit_button: | |
| # Display a message if the submit button is clicked but no review is provided | |
| st.write("Please enter a review to get a prediction.") | |
| elif sidebar_selection == "Sentiment Analysis": | |
| if submit_button and sentiment_review: | |
| # Create a DataFrame | |
| # Split the string into a list of reviews | |
| review_list = sentiment_review.split(',') | |
| df = pd.DataFrame(review_list, columns=['Review']) | |
| scores = sentiment_analysis(df['Review']) | |
| df['negative_score'] = [score[0] for score in scores] | |
| df['neutral_score'] = [score[1] for score in scores] | |
| df['positive_score'] = [score[2] for score in scores] | |
| df['sentiment'] = df.apply(get_sentiment_label, axis=1) | |
| # Display the sentiment distribution chart using Streamlit | |
| st.write("**Sentiment Distribution:**") | |
| plt.figure(figsize=(8, 6)) | |
| sns.countplot(data=df, x='sentiment', color='blue') | |
| # Display values on top of the bars | |
| for p in plt.gca().patches: | |
| plt.gca().annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2, p.get_height()), ha='center', | |
| va='bottom') | |
| # Set plot labels and title | |
| plt.xlabel('Sentiment') | |
| plt.ylabel('Count') | |
| plt.title('Sentiment Distribution') | |
| st.pyplot(plt) | |