Spaces:

BulatF
/

StreamlitSentiment

Runtime error

App Files Files Community

BulatF commited on Jul 5, 2023

Commit

b968a85

1 Parent(s): a8cc4f6

Upload app.py

Browse files

Files changed (1) hide show

app.py +40 -38

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import torch
 import io
 import base64
 from stqdm import stqdm
-from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 import numpy as np
@@ -17,6 +17,12 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 st.set_page_config(layout="wide")
 #defs
 def classify_reviews(reviews):
     inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
@@ -35,6 +41,15 @@ def get_table_download_link(df):
     b64 = base64.b64encode(csv.encode()).decode()
     return f'<a href="data:file/csv;base64,{b64}" download="data.csv">Download csv file</a>'
 def main():
     st.title('Sentiment Analysis')
     st.markdown('Upload an Excel file to get sentiment analytics')
@@ -42,6 +57,7 @@ def main():
     file = st.file_uploader("Upload an excel file", type=['xlsx'])
     review_column = None
     df = None
     if file is not None:
         try:
@@ -53,6 +69,8 @@ def main():
             df = df.dropna(how='all')
             review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
             df[review_column] = df[review_column].astype(str)
         except Exception as e:
             st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
             return
@@ -64,9 +82,13 @@ def main():
         df = df[df[review_column].notna()]
         df = df[df[review_column].str.strip() != '']
         if review_column in df.columns:
             with st.spinner('Performing sentiment analysis...'):
-                df, df_display = process_reviews(df, review_column)
             display_ratings(df, review_column)  # updated this line
             display_dataframe(df, df_display)
@@ -76,7 +98,8 @@ def main():
-def process_reviews(df, review_column):
     with st.spinner('Classifying reviews...'):
         progress_bar = st.progress(0)
         total_reviews = len(df[review_column].tolist())
@@ -91,6 +114,16 @@ def process_reviews(df, review_column):
             raw_scores.extend(batch_scores)
             review_counter += len(batch_reviews)
             progress_bar.progress(review_counter / total_reviews)
     df_new = df.copy()
     df_new['raw_scores'] = raw_scores
@@ -101,29 +134,15 @@ def process_reviews(df, review_column):
     remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star']]
     # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
-    df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + remaining_columns]
     # Reorder df_display as well
-    df_display = df_display[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + remaining_columns]
     return df_new, df_display
-def generate_wordclouds(df, review_column):
-    st.markdown("# Word Clouds for each rating category")
-    for i in range(1, 6):
-        # Create a sub-dataframe for each rating category
-        sub_df = df[df['Rating'] == i]
-        # Join all the reviews in this sub-dataframe
-        text = ' '.join(review for review in sub_df[review_column])
-        # Generate a word cloud
-        wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(text)
-        # Display the generated image with matplotlib
-        plt.figure()
-        plt.imshow(wordcloud, interpolation="bilinear")
-        plt.axis("off")
-        plt.title(f"Rating {i}")
-        st.pyplot(plt)
-        plt.close()
 def scores_to_df(df):
@@ -172,23 +191,6 @@ def display_ratings(df, review_column):
         cols[i-1].markdown(f"### {rating_counts}")
         cols[i-1].markdown(f"{'⭐' * i}")
-        # Generate wordcloud for the given rating category
-        sub_df = df[df['Rating'] == i]
-        text = ' '.join(review for review in sub_df[review_column])
-        if text.strip():  # Only generate a word cloud if text is not empty
-            wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(text)
-            # Display the generated image with matplotlib
-            plt.figure()
-            plt.imshow(wordcloud, interpolation="bilinear")
-            plt.axis("off")
-            plt.title(f"Rating {i}")
-            cols[i-1].pyplot(plt)
-            plt.close()
 if __name__ == "__main__":

 import io
 import base64
 from stqdm import stqdm
 import matplotlib.pyplot as plt
 import numpy as np
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 st.set_page_config(layout="wide")
+# Import the new model and tokenizer
+class_model_name = 'facebook/bart-large-mnli'
+class_model = AutoModelForSequenceClassification.from_pretrained(class_model_name)
+class_tokenizer = AutoTokenizer.from_pretrained(class_model_name)
 #defs
 def classify_reviews(reviews):
     inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
     b64 = base64.b64encode(csv.encode()).decode()
     return f'<a href="data:file/csv;base64,{b64}" download="data.csv">Download csv file</a>'
+# Function for classifying with the new model
+def classify_with_new_classes(reviews, class_name):
+    inputs = class_tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
+    outputs = class_model(**inputs)
+    probabilities = F.softmax(outputs.logits, dim=1).tolist()
+    class_scores = [prob[1] for prob in probabilities]  # Assuming binary classification
+    return class_scores
 def main():
     st.title('Sentiment Analysis')
     st.markdown('Upload an Excel file to get sentiment analytics')
     file = st.file_uploader("Upload an excel file", type=['xlsx'])
     review_column = None
     df = None
+    class_names = None  # New variable for class names
     if file is not None:
         try:
             df = df.dropna(how='all')
             review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
             df[review_column] = df[review_column].astype(str)
+            class_names = st.text_input('Enter the possible class names separated by comma')  # New input field for class names
         except Exception as e:
             st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
             return
         df = df[df[review_column].notna()]
         df = df[df[review_column].str.strip() != '']
+        class_names = [name.strip() for name in class_names.split(',')]  # Split class names into a list
+        for name in class_names:  # Add a new column for each class name
+            df[name] = 0.0
         if review_column in df.columns:
             with st.spinner('Performing sentiment analysis...'):
+                df, df_display = process_reviews(df, review_column, class_names)
             display_ratings(df, review_column)  # updated this line
             display_dataframe(df, df_display)
+def process_reviews(df, review_column, class_names):
     with st.spinner('Classifying reviews...'):
         progress_bar = st.progress(0)
         total_reviews = len(df[review_column].tolist())
             raw_scores.extend(batch_scores)
             review_counter += len(batch_reviews)
             progress_bar.progress(review_counter / total_reviews)
+    class_scores_dict = {}  # New dictionary to store class scores
+    for name in class_names:
+        with st.spinner(f'Generating classes for {name}...'):
+            class_scores = classify_with_new_classes(df[review_column].tolist(), name)
+            df[name] = class_scores
+            class_scores_dict[name] = class_scores  # Store class scores in the dictionary
+    # Add a new column with the class that has the highest score
+    df['Highest Class'] = df[class_names].idxmax(axis=1)
     df_new = df.copy()
     df_new['raw_scores'] = raw_scores
     remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star']]
     # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
+    df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class'] + remaining_columns]
     # Reorder df_display as well
+    df_display = df_display[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class'] + remaining_columns]
     return df_new, df_display
 def scores_to_df(df):
         cols[i-1].markdown(f"### {rating_counts}")
         cols[i-1].markdown(f"{'⭐' * i}")
 if __name__ == "__main__":