Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,52 +1,82 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
from wordcloud import WordCloud, STOPWORDS
|
| 4 |
-
import matplotlib.pyplot as plt
|
| 5 |
from nltk.corpus import opinion_lexicon
|
| 6 |
from nltk.tokenize import word_tokenize
|
| 7 |
import nltk
|
| 8 |
-
|
| 9 |
-
import
|
| 10 |
-
import io
|
| 11 |
|
| 12 |
# Ensure NLTK data is downloaded
|
| 13 |
nltk.download('opinion_lexicon')
|
| 14 |
nltk.download('punkt')
|
| 15 |
|
| 16 |
-
#
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
buf = io.BytesIO()
|
| 26 |
-
plt.savefig(buf, format='png')
|
| 27 |
-
buf.seek(0)
|
| 28 |
-
image = Image.open(buf)
|
| 29 |
-
# Convert to numpy array for Gradio output
|
| 30 |
-
image_array = np.array(image)
|
| 31 |
-
return image_array
|
| 32 |
|
| 33 |
-
#
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
# Generate the word cloud
|
| 40 |
-
image = generate_word_cloud("temp_excel_file.xlsx", column_name)
|
| 41 |
|
| 42 |
-
# Return the image
|
| 43 |
-
return image
|
| 44 |
|
| 45 |
-
iface = gr.Interface(fn=process_excel,
|
| 46 |
-
inputs=[gr.File(file_count=1, label="Upload Excel File"), gr.Textbox(label="Column Name")],
|
| 47 |
-
outputs=gr.Image(type="numpy", label="Word Cloud"),
|
| 48 |
-
title="Word Cloud Generator",
|
| 49 |
-
description="Upload an Excel file and enter the column name to generate a word cloud of positive and negative words.")
|
| 50 |
|
| 51 |
if __name__ == "__main__":
|
| 52 |
iface.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
from wordcloud import WordCloud, STOPWORDS
|
|
|
|
| 4 |
from nltk.corpus import opinion_lexicon
|
| 5 |
from nltk.tokenize import word_tokenize
|
| 6 |
import nltk
|
| 7 |
+
import tempfile
|
| 8 |
+
import os
|
|
|
|
| 9 |
|
| 10 |
# Ensure NLTK data is downloaded
|
| 11 |
nltk.download('opinion_lexicon')
|
| 12 |
nltk.download('punkt')
|
| 13 |
|
| 14 |
+
# Load positive and negative words
|
| 15 |
+
positive_words = set(opinion_lexicon.positive())
|
| 16 |
+
negative_words = set(opinion_lexicon.negative())
|
| 17 |
|
| 18 |
+
class SimpleGroupedColorFunc(object):
|
| 19 |
+
"""Create a color function object which assigns specified colors to words based on the membership in predefined lists."""
|
| 20 |
+
def __init__(self, color_to_words, default_color):
|
| 21 |
+
self.color_to_words = color_to_words
|
| 22 |
+
self.default_color = default_color
|
| 23 |
+
self.words_to_color = {word: color for (color, words) in color_to_words.items() for word in words}
|
| 24 |
+
|
| 25 |
+
def __call__(self, word, **kwargs):
|
| 26 |
+
return self.words_to_color.get(word, self.default_color)
|
| 27 |
|
| 28 |
+
def generate_wordcloud(excel_file, column_name):
|
| 29 |
+
# Read the Excel file from the uploaded file
|
| 30 |
+
df = pd.read_excel(excel_file)
|
| 31 |
+
|
| 32 |
+
# Use the user-provided column name
|
| 33 |
+
if column_name not in df.columns:
|
| 34 |
+
return "The specified column name does not exist in the Excel file."
|
| 35 |
|
| 36 |
+
text_data = ' '.join(df[column_name].dropna().astype(str))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
+
# Tokenize the text
|
| 39 |
+
tokens = word_tokenize(text_data.lower())
|
| 40 |
+
|
| 41 |
+
# Combine positive and negative tokens
|
| 42 |
+
combined_tokens = [word for word in tokens if word in positive_words or word in negative_words]
|
| 43 |
+
|
| 44 |
+
# Generate word cloud text
|
| 45 |
+
combined_text = ' '.join(combined_tokens)
|
| 46 |
+
|
| 47 |
+
# Generate a word cloud
|
| 48 |
+
wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=400).generate(combined_text)
|
| 49 |
+
|
| 50 |
+
# Map of words to colors
|
| 51 |
+
color_to_words = {
|
| 52 |
+
'green': positive_words,
|
| 53 |
+
'red': negative_words
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
# Default color for words not in any predefined list
|
| 57 |
+
default_color = 'gray'
|
| 58 |
+
|
| 59 |
+
# Create the custom color function
|
| 60 |
+
grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color)
|
| 61 |
+
|
| 62 |
+
# Apply our color function
|
| 63 |
+
wordcloud.recolor(color_func=grouped_color_func)
|
| 64 |
+
|
| 65 |
+
# Save the word cloud to a temporary file
|
| 66 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp:
|
| 67 |
+
wordcloud.to_image().save(tmp.name)
|
| 68 |
+
# Return the path to the temporary file
|
| 69 |
+
return tmp.name
|
| 70 |
+
|
| 71 |
+
# Define Gradio interface with an additional input for the column name
|
| 72 |
+
iface = gr.Interface(fn=generate_wordcloud,
|
| 73 |
+
inputs=[gr.File(label="Upload Excel File"), gr.Text(label="Column Name")],
|
| 74 |
+
outputs=gr.Image(label="Word Cloud"),
|
| 75 |
+
title="Sentiment Analysis Word Cloud Generator",
|
| 76 |
+
description="Upload an Excel file and specify a column name to generate a sentiment-based word cloud.")
|
| 77 |
|
|
|
|
|
|
|
| 78 |
|
|
|
|
|
|
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
if __name__ == "__main__":
|
| 82 |
iface.launch()
|