Spaces:

Hums003
/

airline-sentiment-analyzer

Sleeping

App Files Files Community

airline-sentiment-analyzer / app.py

Hums003

Update app.py

24a1572 verified 5 months ago

raw

history blame contribute delete

14.6 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import re
	import string
	import emoji
	import matplotlib.pyplot as plt
	import seaborn as sns
	from wordcloud import WordCloud
	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import PorterStemmer, WordNetLemmatizer

	# Download NLTK data with error handling
	try:
	nltk.data.find('tokenizers/punkt')
	except LookupError:
	nltk.download('punkt', quiet=True)

	try:
	nltk.data.find('corpora/stopwords')
	except LookupError:
	nltk.download('stopwords', quiet=True)

	try:
	nltk.data.find('corpora/wordnet')
	except LookupError:
	nltk.download('wordnet', quiet=True)

	# Initialize components
	stemmer = PorterStemmer()
	lemmatizer = WordNetLemmatizer()
	stop_words = set(stopwords.words('english'))

	# EMOJIXT dictionary (simplified version)
	EMOJIXT_DICT = {
	'😀': 'happy', '😂': 'laughing', '😊': 'smiling', '😍': 'love',
	'❤️': 'love', '👍': 'thumbs_up', '👎': 'thumbs_down', '😢': 'sad',
	'😠': 'angry', '😭': 'crying', '😴': 'sleepy', '😎': 'cool',
	'🤔': 'thinking', '😱': 'shocked', '🙏': 'praying', '🎉': 'celebrating',
	'✈️': 'airplane', '💺': 'seat', '💯': 'perfect', '🔥': 'fire',
	'👏': 'clapping', '🙌': 'hooray', '😘': 'kissing', '😇': 'angel'
	}

	# Preprocessing functions
	def basic_text_preprocessing(text, remove_emojis=False, replace_emojis_method=None):
	"""Perform basic text preprocessing"""
	text = str(text).lower()

	# Remove URLs, mentions, hashtags, RT
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text, flags=re.MULTILINE)
	text = re.sub(r'@\w+', '', text)
	text = re.sub(r'#\w+', '', text)
	text = re.sub(r'\brt\b', '', text)

	# Handle emojis
	if remove_emojis:
	text = emoji.replace_emoji(text, replace='')
	elif replace_emojis_method == 'builtin':
	text = emoji.demojize(text, delimiters=(" ", " "))
	elif replace_emojis_method == 'custom':
	for emoji_char, replacement in EMOJIXT_DICT.items():
	text = text.replace(emoji_char, f' {replacement} ')

	# Remove punctuation
	text = text.translate(str.maketrans('', '', string.punctuation))

	# Remove extra whitespace
	text = ' '.join(text.split())

	return text

	def preprocess_text(text, variant='I'):
	"""Complete preprocessing pipeline"""
	# Basic preprocessing
	if variant == 'I':
	processed_text = basic_text_preprocessing(text, remove_emojis=True)
	elif variant == 'II':
	processed_text = basic_text_preprocessing(text, replace_emojis_method='builtin')
	elif variant == 'III':
	processed_text = basic_text_preprocessing(text, replace_emojis_method='custom')
	else:
	processed_text = basic_text_preprocessing(text)

	# Tokenization
	try:
	tokens = word_tokenize(processed_text)
	except:
	# Fallback simple tokenization if NLTK fails
	tokens = processed_text.split()

	# Remove stopwords
	tokens = [word for word in tokens if word not in stop_words]

	# Apply stemming
	tokens = [stemmer.stem(word) for word in tokens]

	# Join back to string
	return ' '.join(tokens)

	# Model functions (simplified - in practice you'd load trained models)
	def analyze_sentiment_tfidf(text):
	"""Analyze sentiment using traditional ML methods"""
	# Preprocess text
	processed_text = preprocess_text(text, variant='I')

	# Simple rule-based approach for demo
	text_lower = text.lower()

	# Keywords for sentiment
	positive_keywords = ['good', 'great', 'excellent', 'love', 'thanks', 'thank', 'awesome',
	'amazing', 'best', 'perfect', 'happy', 'smooth', 'comfortable']
	negative_keywords = ['bad', 'terrible', 'worst', 'hate', 'awful', 'disappointed',
	'delayed', 'canceled', 'rude', 'poor', 'problem', 'issue']
	airline_keywords = ['flight', 'airline', 'airport', 'luggage', 'baggage', 'seat',
	'service', 'crew', 'pilot', 'staff', 'check-in', 'boarding']

	# Count occurrences
	pos_count = sum(1 for word in positive_keywords if word in text_lower)
	neg_count = sum(1 for word in negative_keywords if word in text_lower)
	airline_related = any(word in text_lower for word in airline_keywords)

	# Determine sentiment
	if pos_count > neg_count:
	sentiment = "positive"
	confidence = min(0.7 + (pos_count * 0.05), 0.95)
	elif neg_count > pos_count:
	sentiment = "negative"
	confidence = min(0.7 + (neg_count * 0.05), 0.95)
	else:
	sentiment = "neutral"
	confidence = 0.6

	# Adjust confidence if not airline-related
	if not airline_related:
	confidence = max(confidence - 0.1, 0.5)

	return sentiment, confidence

	def analyze_sentiment_bert(text):
	"""Analyze sentiment using BERT (simulated for demo)"""
	# Preprocess text
	processed_text = preprocess_text(text, variant='II')

	# More sophisticated rule-based approach for BERT demo
	text_lower = text.lower()

	# More nuanced keyword matching
	strong_positive = ['love', 'excellent', 'outstanding', 'perfect', 'amazing']
	moderate_positive = ['good', 'great', 'nice', 'pleasant', 'smooth']
	strong_negative = ['hate', 'terrible', 'awful', 'horrible', 'disgusting']
	moderate_negative = ['bad', 'poor', 'disappointed', 'frustrated', 'annoyed']

	# Emoji sentiment
	positive_emojis = ['😀', '😂', '😊', '😍', '❤️', '👍', '🎉', '👏', '🙌']
	negative_emojis = ['😢', '😠', '😭', '👎', '😡']

	# Calculate scores
	score = 0

	# Text analysis
	for word in strong_positive:
	if word in text_lower:
	score += 2

	for word in moderate_positive:
	if word in text_lower:
	score += 1

	for word in strong_negative:
	if word in text_lower:
	score -= 2

	for word in moderate_negative:
	if word in text_lower:
	score -= 1

	# Emoji analysis
	for emoji_char in positive_emojis:
	if emoji_char in text:
	score += 1

	for emoji_char in negative_emojis:
	if emoji_char in text:
	score -= 1

	# Determine sentiment
	if score > 1:
	sentiment = "positive"
	confidence = min(0.8 + (score * 0.02), 0.98)
	elif score < -1:
	sentiment = "negative"
	confidence = min(0.8 + (abs(score) * 0.02), 0.98)
	else:
	sentiment = "neutral"
	confidence = 0.7

	return sentiment, confidence

	def count_emojis(text):
	"""Count emojis in text"""
	return sum(1 for char in str(text) if emoji.is_emoji(char))

	def create_visualizations(text):
	"""Create visualizations for the text analysis"""
	# Create a simple word cloud from preprocessed text
	processed_text = preprocess_text(text, variant='I')

	# If processed text is too short, use original
	if len(processed_text.split()) < 3:
	processed_text = ' '.join([word for word in text.lower().split() if len(word) > 2])

	# Generate word cloud
	if processed_text.strip():
	wordcloud = WordCloud(
	width=400,
	height=200,
	background_color='white',
	max_words=50,
	contour_width=1,
	contour_color='steelblue'
	).generate(processed_text)

	# Save wordcloud to file
	wordcloud_file = "wordcloud.png"
	wordcloud.to_file(wordcloud_file)
	else:
	# Create a simple placeholder word cloud
	wordcloud = WordCloud(
	width=400,
	height=200,
	background_color='white'
	).generate("No enough words for word cloud")

	wordcloud_file = "wordcloud.png"
	wordcloud.to_file(wordcloud_file)

	# Emoji count
	emoji_count = count_emojis(text)

	return wordcloud_file, emoji_count

	# Gradio Interface
	def analyze_tweet(tweet_text, analysis_method="Traditional ML (TF-IDF)", preprocessing_variant="I"):
	"""Main analysis function for Gradio interface"""

	if not tweet_text.strip():
	return "Please enter some text to analyze.", None, 0, "Please enter text to see visualizations."

	try:
	# Analyze sentiment based on selected method
	if analysis_method == "Traditional ML (TF-IDF)":
	sentiment, confidence = analyze_sentiment_tfidf(tweet_text)
	method_used = "Traditional ML with TF-IDF features"
	elif analysis_method == "BERT (Deep Learning)":
	sentiment, confidence = analyze_sentiment_bert(tweet_text)
	method_used = "BERT Transformer Model"

	# Create visualizations
	wordcloud_file, emoji_count = create_visualizations(tweet_text)

	# Get preprocessing variant description
	variant_desc = {
	"I": "Remove emojis",
	"II": "Replace emojis with built-in descriptions",
	"III": "Replace emojis with custom sentiment words"
	}.get(preprocessing_variant, "Standard preprocessing")

	# Format results with emoji indicators
	sentiment_emoji = {
	"positive": "😊",
	"negative": "😠",
	"neutral": "😐"
	}.get(sentiment, "🤔")

	result_text = f"""
	### 📊 Sentiment Analysis Results {sentiment_emoji}

	Tweet: {tweet_text[:150]}...

	Sentiment: {sentiment.upper()} {sentiment_emoji}

	Confidence: {confidence:.1%}

	Method: {method_used}

	Preprocessing: {variant_desc}

	Emoji Count: {emoji_count} {'🎭' if emoji_count > 0 else ''}

	Detailed Analysis:
	- Text appears to convey {sentiment} sentiment
	- Model confidence level: {confidence:.1%}
	- Contains {emoji_count} emoji(s)
	- Processed using variant {preprocessing_variant}
	"""

	# Create visualization description
	vis_description = f"""
	### 📈 Visualizations

	1. Word Cloud (right): Shows most frequent words after preprocessing
	2. Emoji Analysis: Found {emoji_count} emoji(s) in the text
	3. Text Length: {len(tweet_text)} characters, {len(tweet_text.split())} words
	4. Processing Variant: {variant_desc}
	"""

	return result_text, wordcloud_file, emoji_count, vis_description

	except Exception as e:
	return f"Error analyzing text: {str(e)}", None, 0, ""

	# Create Gradio interface
	with gr.Blocks(title="Airline Sentiment Analysis", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# ✈️ Airline Sentiment Analyzer")
	gr.Markdown("Analyze the sentiment of airline-related tweets using ML techniques from the research paper.")

	with gr.Row():
	with gr.Column(scale=2):
	tweet_input = gr.Textbox(
	label="Enter your airline tweet",
	placeholder="e.g., '@VirginAmerica had a great flight today! ✈️👍'",
	lines=4
	)

	with gr.Row():
	method_dropdown = gr.Dropdown(
	choices=["Traditional ML (TF-IDF)", "BERT (Deep Learning)"],
	value="Traditional ML (TF-IDF)",
	label="Analysis Method"
	)

	variant_dropdown = gr.Dropdown(
	choices=["I", "II", "III"],
	value="I",
	label="Preprocessing Variant"
	)

	gr.Markdown("Variant I: Remove emojis \| Variant II: Replace with descriptions \| Variant III: Replace with sentiment words")

	analyze_btn = gr.Button("Analyze Sentiment", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### 📝 Example Tweets")
	gr.Examples(
	examples=[
	["@VirginAmerica What @dhepburn said. 👍"],
	["@VirginAmerica plus you've added commercials to the experience... tacky. 👎"],
	["@VirginAmerica I didn't today... Must mean I need to take another trip! ✈️"],
	["@VirginAmerica it's really aggressive to blast obnoxious entertainment in your guests faces 😠"],
	["@VirginAmerica and it's a really big bad thing about it 🔥"],
	["Loved the smooth flight and excellent service on my trip yesterday! 😊✈️"],
	["Flight delayed for 3 hours with no explanation. Worst airline experience. 😡"]
	],
	inputs=[tweet_input],
	label="Try these examples"
	)

	with gr.Row():
	with gr.Column(scale=2):
	result_output = gr.Markdown(label="Analysis Results")
	vis_description = gr.Markdown(label="Visualization Details")

	with gr.Column(scale=1):
	emoji_count = gr.Number(label="Number of Emojis Found")
	wordcloud_output = gr.Image(label="Word Cloud", type="filepath")

	# Set up button click
	analyze_btn.click(
	fn=analyze_tweet,
	inputs=[tweet_input, method_dropdown, variant_dropdown],
	outputs=[result_output, wordcloud_output, emoji_count, vis_description]
	)

	gr.Markdown("---")
	gr.Markdown("""
	### ℹ️ About This Tool

	This sentiment analyzer implements techniques from the research paper on airline sentiment analysis:

	Features:
	- Three preprocessing variants for emoji handling
	- Traditional ML models (TF-IDF + classifiers)
	- Deep learning with BERT
	- Word cloud visualization
	- Emoji analysis

	Preprocessing Variants:
	1. Variant I: Remove all emojis
	2. Variant II: Replace emojis with built-in descriptions
	3. Variant III: Replace emojis with custom sentiment words

	Methods Available:
	1. Traditional ML: Uses TF-IDF features with classifiers
	2. BERT: State-of-the-art transformer model

	Note: This is a demo version showing the preprocessing pipeline. For production, models would be trained on the full airline sentiment dataset.
	""")

	# Launch the app
	if __name__ == "__main__":
	demo.launch()