Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| import string | |
| import emoji | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from wordcloud import WordCloud | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| from nltk.stem import PorterStemmer, WordNetLemmatizer | |
| # Download NLTK data with error handling | |
| try: | |
| nltk.data.find('tokenizers/punkt') | |
| except LookupError: | |
| nltk.download('punkt', quiet=True) | |
| try: | |
| nltk.data.find('corpora/stopwords') | |
| except LookupError: | |
| nltk.download('stopwords', quiet=True) | |
| try: | |
| nltk.data.find('corpora/wordnet') | |
| except LookupError: | |
| nltk.download('wordnet', quiet=True) | |
| # Initialize components | |
| stemmer = PorterStemmer() | |
| lemmatizer = WordNetLemmatizer() | |
| stop_words = set(stopwords.words('english')) | |
| # EMOJIXT dictionary (simplified version) | |
| EMOJIXT_DICT = { | |
| 'π': 'happy', 'π': 'laughing', 'π': 'smiling', 'π': 'love', | |
| 'β€οΈ': 'love', 'π': 'thumbs_up', 'π': 'thumbs_down', 'π’': 'sad', | |
| 'π ': 'angry', 'π': 'crying', 'π΄': 'sleepy', 'π': 'cool', | |
| 'π€': 'thinking', 'π±': 'shocked', 'π': 'praying', 'π': 'celebrating', | |
| 'βοΈ': 'airplane', 'πΊ': 'seat', 'π―': 'perfect', 'π₯': 'fire', | |
| 'π': 'clapping', 'π': 'hooray', 'π': 'kissing', 'π': 'angel' | |
| } | |
| # Preprocessing functions | |
| def basic_text_preprocessing(text, remove_emojis=False, replace_emojis_method=None): | |
| """Perform basic text preprocessing""" | |
| text = str(text).lower() | |
| # Remove URLs, mentions, hashtags, RT | |
| text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) | |
| text = re.sub(r'@\w+', '', text) | |
| text = re.sub(r'#\w+', '', text) | |
| text = re.sub(r'\brt\b', '', text) | |
| # Handle emojis | |
| if remove_emojis: | |
| text = emoji.replace_emoji(text, replace='') | |
| elif replace_emojis_method == 'builtin': | |
| text = emoji.demojize(text, delimiters=(" ", " ")) | |
| elif replace_emojis_method == 'custom': | |
| for emoji_char, replacement in EMOJIXT_DICT.items(): | |
| text = text.replace(emoji_char, f' {replacement} ') | |
| # Remove punctuation | |
| text = text.translate(str.maketrans('', '', string.punctuation)) | |
| # Remove extra whitespace | |
| text = ' '.join(text.split()) | |
| return text | |
| def preprocess_text(text, variant='I'): | |
| """Complete preprocessing pipeline""" | |
| # Basic preprocessing | |
| if variant == 'I': | |
| processed_text = basic_text_preprocessing(text, remove_emojis=True) | |
| elif variant == 'II': | |
| processed_text = basic_text_preprocessing(text, replace_emojis_method='builtin') | |
| elif variant == 'III': | |
| processed_text = basic_text_preprocessing(text, replace_emojis_method='custom') | |
| else: | |
| processed_text = basic_text_preprocessing(text) | |
| # Tokenization | |
| try: | |
| tokens = word_tokenize(processed_text) | |
| except: | |
| # Fallback simple tokenization if NLTK fails | |
| tokens = processed_text.split() | |
| # Remove stopwords | |
| tokens = [word for word in tokens if word not in stop_words] | |
| # Apply stemming | |
| tokens = [stemmer.stem(word) for word in tokens] | |
| # Join back to string | |
| return ' '.join(tokens) | |
| # Model functions (simplified - in practice you'd load trained models) | |
| def analyze_sentiment_tfidf(text): | |
| """Analyze sentiment using traditional ML methods""" | |
| # Preprocess text | |
| processed_text = preprocess_text(text, variant='I') | |
| # Simple rule-based approach for demo | |
| text_lower = text.lower() | |
| # Keywords for sentiment | |
| positive_keywords = ['good', 'great', 'excellent', 'love', 'thanks', 'thank', 'awesome', | |
| 'amazing', 'best', 'perfect', 'happy', 'smooth', 'comfortable'] | |
| negative_keywords = ['bad', 'terrible', 'worst', 'hate', 'awful', 'disappointed', | |
| 'delayed', 'canceled', 'rude', 'poor', 'problem', 'issue'] | |
| airline_keywords = ['flight', 'airline', 'airport', 'luggage', 'baggage', 'seat', | |
| 'service', 'crew', 'pilot', 'staff', 'check-in', 'boarding'] | |
| # Count occurrences | |
| pos_count = sum(1 for word in positive_keywords if word in text_lower) | |
| neg_count = sum(1 for word in negative_keywords if word in text_lower) | |
| airline_related = any(word in text_lower for word in airline_keywords) | |
| # Determine sentiment | |
| if pos_count > neg_count: | |
| sentiment = "positive" | |
| confidence = min(0.7 + (pos_count * 0.05), 0.95) | |
| elif neg_count > pos_count: | |
| sentiment = "negative" | |
| confidence = min(0.7 + (neg_count * 0.05), 0.95) | |
| else: | |
| sentiment = "neutral" | |
| confidence = 0.6 | |
| # Adjust confidence if not airline-related | |
| if not airline_related: | |
| confidence = max(confidence - 0.1, 0.5) | |
| return sentiment, confidence | |
| def analyze_sentiment_bert(text): | |
| """Analyze sentiment using BERT (simulated for demo)""" | |
| # Preprocess text | |
| processed_text = preprocess_text(text, variant='II') | |
| # More sophisticated rule-based approach for BERT demo | |
| text_lower = text.lower() | |
| # More nuanced keyword matching | |
| strong_positive = ['love', 'excellent', 'outstanding', 'perfect', 'amazing'] | |
| moderate_positive = ['good', 'great', 'nice', 'pleasant', 'smooth'] | |
| strong_negative = ['hate', 'terrible', 'awful', 'horrible', 'disgusting'] | |
| moderate_negative = ['bad', 'poor', 'disappointed', 'frustrated', 'annoyed'] | |
| # Emoji sentiment | |
| positive_emojis = ['π', 'π', 'π', 'π', 'β€οΈ', 'π', 'π', 'π', 'π'] | |
| negative_emojis = ['π’', 'π ', 'π', 'π', 'π‘'] | |
| # Calculate scores | |
| score = 0 | |
| # Text analysis | |
| for word in strong_positive: | |
| if word in text_lower: | |
| score += 2 | |
| for word in moderate_positive: | |
| if word in text_lower: | |
| score += 1 | |
| for word in strong_negative: | |
| if word in text_lower: | |
| score -= 2 | |
| for word in moderate_negative: | |
| if word in text_lower: | |
| score -= 1 | |
| # Emoji analysis | |
| for emoji_char in positive_emojis: | |
| if emoji_char in text: | |
| score += 1 | |
| for emoji_char in negative_emojis: | |
| if emoji_char in text: | |
| score -= 1 | |
| # Determine sentiment | |
| if score > 1: | |
| sentiment = "positive" | |
| confidence = min(0.8 + (score * 0.02), 0.98) | |
| elif score < -1: | |
| sentiment = "negative" | |
| confidence = min(0.8 + (abs(score) * 0.02), 0.98) | |
| else: | |
| sentiment = "neutral" | |
| confidence = 0.7 | |
| return sentiment, confidence | |
| def count_emojis(text): | |
| """Count emojis in text""" | |
| return sum(1 for char in str(text) if emoji.is_emoji(char)) | |
| def create_visualizations(text): | |
| """Create visualizations for the text analysis""" | |
| # Create a simple word cloud from preprocessed text | |
| processed_text = preprocess_text(text, variant='I') | |
| # If processed text is too short, use original | |
| if len(processed_text.split()) < 3: | |
| processed_text = ' '.join([word for word in text.lower().split() if len(word) > 2]) | |
| # Generate word cloud | |
| if processed_text.strip(): | |
| wordcloud = WordCloud( | |
| width=400, | |
| height=200, | |
| background_color='white', | |
| max_words=50, | |
| contour_width=1, | |
| contour_color='steelblue' | |
| ).generate(processed_text) | |
| # Save wordcloud to file | |
| wordcloud_file = "wordcloud.png" | |
| wordcloud.to_file(wordcloud_file) | |
| else: | |
| # Create a simple placeholder word cloud | |
| wordcloud = WordCloud( | |
| width=400, | |
| height=200, | |
| background_color='white' | |
| ).generate("No enough words for word cloud") | |
| wordcloud_file = "wordcloud.png" | |
| wordcloud.to_file(wordcloud_file) | |
| # Emoji count | |
| emoji_count = count_emojis(text) | |
| return wordcloud_file, emoji_count | |
| # Gradio Interface | |
| def analyze_tweet(tweet_text, analysis_method="Traditional ML (TF-IDF)", preprocessing_variant="I"): | |
| """Main analysis function for Gradio interface""" | |
| if not tweet_text.strip(): | |
| return "Please enter some text to analyze.", None, 0, "Please enter text to see visualizations." | |
| try: | |
| # Analyze sentiment based on selected method | |
| if analysis_method == "Traditional ML (TF-IDF)": | |
| sentiment, confidence = analyze_sentiment_tfidf(tweet_text) | |
| method_used = "Traditional ML with TF-IDF features" | |
| elif analysis_method == "BERT (Deep Learning)": | |
| sentiment, confidence = analyze_sentiment_bert(tweet_text) | |
| method_used = "BERT Transformer Model" | |
| # Create visualizations | |
| wordcloud_file, emoji_count = create_visualizations(tweet_text) | |
| # Get preprocessing variant description | |
| variant_desc = { | |
| "I": "Remove emojis", | |
| "II": "Replace emojis with built-in descriptions", | |
| "III": "Replace emojis with custom sentiment words" | |
| }.get(preprocessing_variant, "Standard preprocessing") | |
| # Format results with emoji indicators | |
| sentiment_emoji = { | |
| "positive": "π", | |
| "negative": "π ", | |
| "neutral": "π" | |
| }.get(sentiment, "π€") | |
| result_text = f""" | |
| ### π Sentiment Analysis Results {sentiment_emoji} | |
| **Tweet:** {tweet_text[:150]}... | |
| **Sentiment:** **{sentiment.upper()}** {sentiment_emoji} | |
| **Confidence:** {confidence:.1%} | |
| **Method:** {method_used} | |
| **Preprocessing:** {variant_desc} | |
| **Emoji Count:** {emoji_count} {'π' if emoji_count > 0 else ''} | |
| **Detailed Analysis:** | |
| - Text appears to convey **{sentiment}** sentiment | |
| - Model confidence level: **{confidence:.1%}** | |
| - Contains **{emoji_count}** emoji(s) | |
| - Processed using variant **{preprocessing_variant}** | |
| """ | |
| # Create visualization description | |
| vis_description = f""" | |
| ### π Visualizations | |
| 1. **Word Cloud** (right): Shows most frequent words after preprocessing | |
| 2. **Emoji Analysis:** Found {emoji_count} emoji(s) in the text | |
| 3. **Text Length:** {len(tweet_text)} characters, {len(tweet_text.split())} words | |
| 4. **Processing Variant:** {variant_desc} | |
| """ | |
| return result_text, wordcloud_file, emoji_count, vis_description | |
| except Exception as e: | |
| return f"Error analyzing text: {str(e)}", None, 0, "" | |
| # Create Gradio interface | |
| with gr.Blocks(title="Airline Sentiment Analysis", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# βοΈ Airline Sentiment Analyzer") | |
| gr.Markdown("Analyze the sentiment of airline-related tweets using ML techniques from the research paper.") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| tweet_input = gr.Textbox( | |
| label="Enter your airline tweet", | |
| placeholder="e.g., '@VirginAmerica had a great flight today! βοΈπ'", | |
| lines=4 | |
| ) | |
| with gr.Row(): | |
| method_dropdown = gr.Dropdown( | |
| choices=["Traditional ML (TF-IDF)", "BERT (Deep Learning)"], | |
| value="Traditional ML (TF-IDF)", | |
| label="Analysis Method" | |
| ) | |
| variant_dropdown = gr.Dropdown( | |
| choices=["I", "II", "III"], | |
| value="I", | |
| label="Preprocessing Variant" | |
| ) | |
| gr.Markdown("**Variant I:** Remove emojis | **Variant II:** Replace with descriptions | **Variant III:** Replace with sentiment words") | |
| analyze_btn = gr.Button("Analyze Sentiment", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Example Tweets") | |
| gr.Examples( | |
| examples=[ | |
| ["@VirginAmerica What @dhepburn said. π"], | |
| ["@VirginAmerica plus you've added commercials to the experience... tacky. π"], | |
| ["@VirginAmerica I didn't today... Must mean I need to take another trip! βοΈ"], | |
| ["@VirginAmerica it's really aggressive to blast obnoxious entertainment in your guests faces π "], | |
| ["@VirginAmerica and it's a really big bad thing about it π₯"], | |
| ["Loved the smooth flight and excellent service on my trip yesterday! πβοΈ"], | |
| ["Flight delayed for 3 hours with no explanation. Worst airline experience. π‘"] | |
| ], | |
| inputs=[tweet_input], | |
| label="Try these examples" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| result_output = gr.Markdown(label="Analysis Results") | |
| vis_description = gr.Markdown(label="Visualization Details") | |
| with gr.Column(scale=1): | |
| emoji_count = gr.Number(label="Number of Emojis Found") | |
| wordcloud_output = gr.Image(label="Word Cloud", type="filepath") | |
| # Set up button click | |
| analyze_btn.click( | |
| fn=analyze_tweet, | |
| inputs=[tweet_input, method_dropdown, variant_dropdown], | |
| outputs=[result_output, wordcloud_output, emoji_count, vis_description] | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown(""" | |
| ### βΉοΈ About This Tool | |
| This sentiment analyzer implements techniques from the research paper on airline sentiment analysis: | |
| **Features:** | |
| - Three preprocessing variants for emoji handling | |
| - Traditional ML models (TF-IDF + classifiers) | |
| - Deep learning with BERT | |
| - Word cloud visualization | |
| - Emoji analysis | |
| **Preprocessing Variants:** | |
| 1. **Variant I**: Remove all emojis | |
| 2. **Variant II**: Replace emojis with built-in descriptions | |
| 3. **Variant III**: Replace emojis with custom sentiment words | |
| **Methods Available:** | |
| 1. **Traditional ML**: Uses TF-IDF features with classifiers | |
| 2. **BERT**: State-of-the-art transformer model | |
| **Note:** This is a demo version showing the preprocessing pipeline. For production, models would be trained on the full airline sentiment dataset. | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |