Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import torch
|
|
| 6 |
import io
|
| 7 |
import base64
|
| 8 |
from stqdm import stqdm
|
| 9 |
-
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
import numpy as np
|
| 12 |
|
|
@@ -17,6 +17,12 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 18 |
st.set_page_config(layout="wide")
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
#defs
|
| 21 |
def classify_reviews(reviews):
|
| 22 |
inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
|
|
@@ -35,6 +41,15 @@ def get_table_download_link(df):
|
|
| 35 |
b64 = base64.b64encode(csv.encode()).decode()
|
| 36 |
return f'<a href="data:file/csv;base64,{b64}" download="data.csv">Download csv file</a>'
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def main():
|
| 39 |
st.title('Sentiment Analysis')
|
| 40 |
st.markdown('Upload an Excel file to get sentiment analytics')
|
|
@@ -42,6 +57,7 @@ def main():
|
|
| 42 |
file = st.file_uploader("Upload an excel file", type=['xlsx'])
|
| 43 |
review_column = None
|
| 44 |
df = None
|
|
|
|
| 45 |
|
| 46 |
if file is not None:
|
| 47 |
try:
|
|
@@ -53,6 +69,8 @@ def main():
|
|
| 53 |
df = df.dropna(how='all')
|
| 54 |
review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
|
| 55 |
df[review_column] = df[review_column].astype(str)
|
|
|
|
|
|
|
| 56 |
except Exception as e:
|
| 57 |
st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
|
| 58 |
return
|
|
@@ -64,9 +82,13 @@ def main():
|
|
| 64 |
df = df[df[review_column].notna()]
|
| 65 |
df = df[df[review_column].str.strip() != '']
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
if review_column in df.columns:
|
| 68 |
with st.spinner('Performing sentiment analysis...'):
|
| 69 |
-
df, df_display = process_reviews(df, review_column)
|
| 70 |
|
| 71 |
display_ratings(df, review_column) # updated this line
|
| 72 |
display_dataframe(df, df_display)
|
|
@@ -76,7 +98,8 @@ def main():
|
|
| 76 |
|
| 77 |
|
| 78 |
|
| 79 |
-
|
|
|
|
| 80 |
with st.spinner('Classifying reviews...'):
|
| 81 |
progress_bar = st.progress(0)
|
| 82 |
total_reviews = len(df[review_column].tolist())
|
|
@@ -91,6 +114,16 @@ def process_reviews(df, review_column):
|
|
| 91 |
raw_scores.extend(batch_scores)
|
| 92 |
review_counter += len(batch_reviews)
|
| 93 |
progress_bar.progress(review_counter / total_reviews)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
df_new = df.copy()
|
| 96 |
df_new['raw_scores'] = raw_scores
|
|
@@ -101,29 +134,15 @@ def process_reviews(df, review_column):
|
|
| 101 |
remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star']]
|
| 102 |
|
| 103 |
# Reorder the dataframe with selected columns first, created columns next, then the remaining columns
|
| 104 |
-
df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + remaining_columns]
|
| 105 |
|
| 106 |
# Reorder df_display as well
|
| 107 |
-
df_display = df_display[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + remaining_columns]
|
| 108 |
|
| 109 |
return df_new, df_display
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
for i in range(1, 6):
|
| 114 |
-
# Create a sub-dataframe for each rating category
|
| 115 |
-
sub_df = df[df['Rating'] == i]
|
| 116 |
-
# Join all the reviews in this sub-dataframe
|
| 117 |
-
text = ' '.join(review for review in sub_df[review_column])
|
| 118 |
-
# Generate a word cloud
|
| 119 |
-
wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(text)
|
| 120 |
-
# Display the generated image with matplotlib
|
| 121 |
-
plt.figure()
|
| 122 |
-
plt.imshow(wordcloud, interpolation="bilinear")
|
| 123 |
-
plt.axis("off")
|
| 124 |
-
plt.title(f"Rating {i}")
|
| 125 |
-
st.pyplot(plt)
|
| 126 |
-
plt.close()
|
| 127 |
|
| 128 |
|
| 129 |
def scores_to_df(df):
|
|
@@ -172,23 +191,6 @@ def display_ratings(df, review_column):
|
|
| 172 |
cols[i-1].markdown(f"### {rating_counts}")
|
| 173 |
cols[i-1].markdown(f"{'⭐' * i}")
|
| 174 |
|
| 175 |
-
# Generate wordcloud for the given rating category
|
| 176 |
-
sub_df = df[df['Rating'] == i]
|
| 177 |
-
text = ' '.join(review for review in sub_df[review_column])
|
| 178 |
-
|
| 179 |
-
if text.strip(): # Only generate a word cloud if text is not empty
|
| 180 |
-
wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(text)
|
| 181 |
-
|
| 182 |
-
# Display the generated image with matplotlib
|
| 183 |
-
plt.figure()
|
| 184 |
-
plt.imshow(wordcloud, interpolation="bilinear")
|
| 185 |
-
plt.axis("off")
|
| 186 |
-
plt.title(f"Rating {i}")
|
| 187 |
-
cols[i-1].pyplot(plt)
|
| 188 |
-
plt.close()
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
|
| 193 |
|
| 194 |
if __name__ == "__main__":
|
|
|
|
| 6 |
import io
|
| 7 |
import base64
|
| 8 |
from stqdm import stqdm
|
| 9 |
+
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
import numpy as np
|
| 12 |
|
|
|
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 18 |
st.set_page_config(layout="wide")
|
| 19 |
|
| 20 |
+
# Import the new model and tokenizer
|
| 21 |
+
class_model_name = 'facebook/bart-large-mnli'
|
| 22 |
+
class_model = AutoModelForSequenceClassification.from_pretrained(class_model_name)
|
| 23 |
+
class_tokenizer = AutoTokenizer.from_pretrained(class_model_name)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
#defs
|
| 27 |
def classify_reviews(reviews):
|
| 28 |
inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
|
|
|
|
| 41 |
b64 = base64.b64encode(csv.encode()).decode()
|
| 42 |
return f'<a href="data:file/csv;base64,{b64}" download="data.csv">Download csv file</a>'
|
| 43 |
|
| 44 |
+
|
| 45 |
+
# Function for classifying with the new model
|
| 46 |
+
def classify_with_new_classes(reviews, class_name):
|
| 47 |
+
inputs = class_tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
|
| 48 |
+
outputs = class_model(**inputs)
|
| 49 |
+
probabilities = F.softmax(outputs.logits, dim=1).tolist()
|
| 50 |
+
class_scores = [prob[1] for prob in probabilities] # Assuming binary classification
|
| 51 |
+
return class_scores
|
| 52 |
+
|
| 53 |
def main():
|
| 54 |
st.title('Sentiment Analysis')
|
| 55 |
st.markdown('Upload an Excel file to get sentiment analytics')
|
|
|
|
| 57 |
file = st.file_uploader("Upload an excel file", type=['xlsx'])
|
| 58 |
review_column = None
|
| 59 |
df = None
|
| 60 |
+
class_names = None # New variable for class names
|
| 61 |
|
| 62 |
if file is not None:
|
| 63 |
try:
|
|
|
|
| 69 |
df = df.dropna(how='all')
|
| 70 |
review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
|
| 71 |
df[review_column] = df[review_column].astype(str)
|
| 72 |
+
|
| 73 |
+
class_names = st.text_input('Enter the possible class names separated by comma') # New input field for class names
|
| 74 |
except Exception as e:
|
| 75 |
st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
|
| 76 |
return
|
|
|
|
| 82 |
df = df[df[review_column].notna()]
|
| 83 |
df = df[df[review_column].str.strip() != '']
|
| 84 |
|
| 85 |
+
class_names = [name.strip() for name in class_names.split(',')] # Split class names into a list
|
| 86 |
+
for name in class_names: # Add a new column for each class name
|
| 87 |
+
df[name] = 0.0
|
| 88 |
+
|
| 89 |
if review_column in df.columns:
|
| 90 |
with st.spinner('Performing sentiment analysis...'):
|
| 91 |
+
df, df_display = process_reviews(df, review_column, class_names)
|
| 92 |
|
| 93 |
display_ratings(df, review_column) # updated this line
|
| 94 |
display_dataframe(df, df_display)
|
|
|
|
| 98 |
|
| 99 |
|
| 100 |
|
| 101 |
+
|
| 102 |
+
def process_reviews(df, review_column, class_names):
|
| 103 |
with st.spinner('Classifying reviews...'):
|
| 104 |
progress_bar = st.progress(0)
|
| 105 |
total_reviews = len(df[review_column].tolist())
|
|
|
|
| 114 |
raw_scores.extend(batch_scores)
|
| 115 |
review_counter += len(batch_reviews)
|
| 116 |
progress_bar.progress(review_counter / total_reviews)
|
| 117 |
+
|
| 118 |
+
class_scores_dict = {} # New dictionary to store class scores
|
| 119 |
+
for name in class_names:
|
| 120 |
+
with st.spinner(f'Generating classes for {name}...'):
|
| 121 |
+
class_scores = classify_with_new_classes(df[review_column].tolist(), name)
|
| 122 |
+
df[name] = class_scores
|
| 123 |
+
class_scores_dict[name] = class_scores # Store class scores in the dictionary
|
| 124 |
+
|
| 125 |
+
# Add a new column with the class that has the highest score
|
| 126 |
+
df['Highest Class'] = df[class_names].idxmax(axis=1)
|
| 127 |
|
| 128 |
df_new = df.copy()
|
| 129 |
df_new['raw_scores'] = raw_scores
|
|
|
|
| 134 |
remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star']]
|
| 135 |
|
| 136 |
# Reorder the dataframe with selected columns first, created columns next, then the remaining columns
|
| 137 |
+
df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class'] + remaining_columns]
|
| 138 |
|
| 139 |
# Reorder df_display as well
|
| 140 |
+
df_display = df_display[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class'] + remaining_columns]
|
| 141 |
|
| 142 |
return df_new, df_display
|
| 143 |
|
| 144 |
+
|
| 145 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
|
| 148 |
def scores_to_df(df):
|
|
|
|
| 191 |
cols[i-1].markdown(f"### {rating_counts}")
|
| 192 |
cols[i-1].markdown(f"{'⭐' * i}")
|
| 193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
|
| 196 |
if __name__ == "__main__":
|