Spaces:

sumit-ai-ml
/

ExcelWordCloud

Sleeping

App Files Files Community

sumit-ai-ml commited on Mar 10, 2024

Commit

6d2fd9a

verified ·

1 Parent(s): ac9031a

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -31

app.py CHANGED Viewed

@@ -1,52 +1,82 @@
 import gradio as gr
 import pandas as pd
 from wordcloud import WordCloud, STOPWORDS
-import matplotlib.pyplot as plt
 from nltk.corpus import opinion_lexicon
 from nltk.tokenize import word_tokenize
 import nltk
-from PIL import Image
-import numpy as np
-import io
 # Ensure NLTK data is downloaded
 nltk.download('opinion_lexicon')
 nltk.download('punkt')
-# Your existing logic for generate_word_cloud goes here
-def generate_word_cloud(excel_file, column_name):
-    # Adapt your existing word cloud generation code to work with the input Excel file and column name.
-    # Instead of displaying the plot, save it to a buffer and return the image.
-    # Placeholder for your existing logic
-    # Save the plot to a buffer
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    buf.seek(0)
-    image = Image.open(buf)
-    # Convert to numpy array for Gradio output
-    image_array = np.array(image)
-    return image_array
-# Define Gradio interface
-def process_excel(file_obj, column_name):
-    # Save the uploaded file to a temporary location
-    with open("temp_excel_file.xlsx", "wb") as f:
-        f.write(file_obj.read())
-    # Generate the word cloud
-    image = generate_word_cloud("temp_excel_file.xlsx", column_name)
-    # Return the image
-    return image
-iface = gr.Interface(fn=process_excel,
-                     inputs=[gr.File(file_count=1, label="Upload Excel File"), gr.Textbox(label="Column Name")],
-                     outputs=gr.Image(type="numpy", label="Word Cloud"),
-                     title="Word Cloud Generator",
-                     description="Upload an Excel file and enter the column name to generate a word cloud of positive and negative words.")
 if __name__ == "__main__":
     iface.launch()

 import gradio as gr
 import pandas as pd
 from wordcloud import WordCloud, STOPWORDS
 from nltk.corpus import opinion_lexicon
 from nltk.tokenize import word_tokenize
 import nltk
+import tempfile
+import os
 # Ensure NLTK data is downloaded
 nltk.download('opinion_lexicon')
 nltk.download('punkt')
+# Load positive and negative words
+positive_words = set(opinion_lexicon.positive())
+negative_words = set(opinion_lexicon.negative())
+class SimpleGroupedColorFunc(object):
+    """Create a color function object which assigns specified colors to words based on the membership in predefined lists."""
+    def __init__(self, color_to_words, default_color):
+        self.color_to_words = color_to_words
+        self.default_color = default_color
+        self.words_to_color = {word: color for (color, words) in color_to_words.items() for word in words}
+    def __call__(self, word, **kwargs):
+        return self.words_to_color.get(word, self.default_color)
+def generate_wordcloud(excel_file, column_name):
+    # Read the Excel file from the uploaded file
+    df = pd.read_excel(excel_file)
+    # Use the user-provided column name
+    if column_name not in df.columns:
+        return "The specified column name does not exist in the Excel file."
+    text_data = ' '.join(df[column_name].dropna().astype(str))
+    # Tokenize the text
+    tokens = word_tokenize(text_data.lower())
+    # Combine positive and negative tokens
+    combined_tokens = [word for word in tokens if word in positive_words or word in negative_words]
+    # Generate word cloud text
+    combined_text = ' '.join(combined_tokens)
+    # Generate a word cloud
+    wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=400).generate(combined_text)
+    # Map of words to colors
+    color_to_words = {
+        'green': positive_words,
+        'red': negative_words
+    }
+    # Default color for words not in any predefined list
+    default_color = 'gray'
+    # Create the custom color function
+    grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color)
+    # Apply our color function
+    wordcloud.recolor(color_func=grouped_color_func)
+    # Save the word cloud to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp:
+        wordcloud.to_image().save(tmp.name)
+        # Return the path to the temporary file
+        return tmp.name
+# Define Gradio interface with an additional input for the column name
+iface = gr.Interface(fn=generate_wordcloud,
+                     inputs=[gr.File(label="Upload Excel File"), gr.Text(label="Column Name")],
+                     outputs=gr.Image(label="Word Cloud"),
+                     title="Sentiment Analysis Word Cloud Generator",
+                     description="Upload an Excel file and specify a column name to generate a sentiment-based word cloud.")
 if __name__ == "__main__":
     iface.launch()