Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from wordcloud import WordCloud, STOPWORDS | |
| from nltk.corpus import opinion_lexicon | |
| from nltk.tokenize import word_tokenize | |
| import nltk | |
| import tempfile | |
| import os | |
| # Ensure NLTK data is downloaded | |
| nltk.download('opinion_lexicon') | |
| nltk.download('punkt') | |
| # Load positive and negative words | |
| positive_words = set(opinion_lexicon.positive()) | |
| negative_words = set(opinion_lexicon.negative()) | |
| class SimpleGroupedColorFunc(object): | |
| """Create a color function object which assigns specified colors to words based on the membership in predefined lists.""" | |
| def __init__(self, color_to_words, default_color): | |
| self.color_to_words = color_to_words | |
| self.default_color = default_color | |
| self.words_to_color = {word: color for (color, words) in color_to_words.items() for word in words} | |
| def __call__(self, word, **kwargs): | |
| return self.words_to_color.get(word, self.default_color) | |
| def generate_wordcloud(excel_file, column_name): | |
| # Read the Excel file from the uploaded file | |
| df = pd.read_excel(excel_file) | |
| # Use the user-provided column name | |
| if column_name not in df.columns: | |
| return "The specified column name does not exist in the Excel file." | |
| text_data = ' '.join(df[column_name].dropna().astype(str)) | |
| # Tokenize the text | |
| tokens = word_tokenize(text_data.lower()) | |
| # Combine positive and negative tokens | |
| combined_tokens = [word for word in tokens if word in positive_words or word in negative_words] | |
| # Generate word cloud text | |
| combined_text = ' '.join(combined_tokens) | |
| # Generate a word cloud | |
| wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=400).generate(combined_text) | |
| # Map of words to colors | |
| color_to_words = { | |
| 'green': positive_words, | |
| 'red': negative_words | |
| } | |
| # Default color for words not in any predefined list | |
| default_color = 'gray' | |
| # Create the custom color function | |
| grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color) | |
| # Apply our color function | |
| wordcloud.recolor(color_func=grouped_color_func) | |
| # Save the word cloud to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp: | |
| wordcloud.to_image().save(tmp.name) | |
| # Return the path to the temporary file | |
| return tmp.name | |
| # Define Gradio interface with an additional input for the column name | |
| iface = gr.Interface(fn=generate_wordcloud, | |
| inputs=[gr.File(label="Upload Excel File"), gr.Text(label="Column Name")], | |
| outputs=gr.Image(label="Word Cloud"), | |
| title="Sentiment Analysis Word Cloud Generator", | |
| description="Upload an Excel file and specify a column name to generate a sentiment-based word cloud.") | |
| if __name__ == "__main__": | |
| iface.launch() | |