Spaces:

DevNumb
/

Costumerfeelings

Sleeping

App Files Files Community

Costumerfeelings / app.py

DevNumb

Update app.py

35b4db1 verified 3 months ago

raw

history blame contribute delete

21.6 kB

	# app.py
	import gradio as gr
	import torch
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	import pandas as pd
	import numpy as np
	from datetime import datetime
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import json

	class AdvancedSentimentAnalyzer:
	def __init__(self, model_name="tabularisai/multilingual-sentiment-analysis"):
	print("Loading model and tokenizer...")
	self.model_name = model_name
	try:
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.model = AutoModelForSequenceClassification.from_pretrained(model_name)

	# Use the modern pipeline syntax
	self.classifier = pipeline(
	"text-classification",
	model=self.model,
	tokenizer=self.tokenizer,
	top_k=None # This replaces return_all_scores=True
	)

	except Exception as e:
	print(f"Error loading model: {e}")
	# Fallback to basic sentiment analysis
	self.classifier = None

	self.sentiment_map = {
	0: "Very Negative",
	1: "Negative",
	2: "Neutral",
	3: "Positive",
	4: "Very Positive"
	}

	self.sentiment_colors = {
	"Very Negative": "#FF6B6B",
	"Negative": "#FFA8A8",
	"Neutral": "#FFD93D",
	"Positive": "#6BCF7F",
	"Very Positive": "#4ECDC4"
	}

	self.language_detection_keywords = {
	'english': ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'for'],
	'spanish': ['el', 'la', 'de', 'que', 'y', 'en', 'un', 'por'],
	'french': ['le', 'la', 'de', 'et', 'que', 'en', 'un', 'pour'],
	'german': ['der', 'die', 'das', 'und', 'zu', 'in', 'den', 'mit'],
	'italian': ['il', 'la', 'di', 'e', 'che', 'in', 'un', 'per'],
	'portuguese': ['o', 'a', 'de', 'e', 'que', 'em', 'um', 'para'],
	'dutch': ['de', 'het', 'en', 'van', 'te', 'in', 'een', 'voor'],
	'russian': ['и', 'в', 'не', 'на', 'я', 'что', 'он', 'с'],
	'chinese': ['的', '是', '在', '了', '有', '和', '为', '我'],
	'japanese': ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て'],
	'korean': ['이', '에', 'は', 'を', '다', 'が', 'で', 'て'],
	'arabic': ['ال', 'في', 'من', 'على', 'أن', 'ما', 'هو', 'إلى'],
	'hindi': ['की', 'से', 'है', 'और', 'के', 'में', 'यह', 'को'],
	'turkish': ['ve', 'bir', 'bu', 'ile', 'için', 'ama', 'da', 'de']
	}

	print("Model loaded successfully!")

	def detect_language(self, text):
	"""Simple language detection based on common words"""
	if not text or not isinstance(text, str):
	return 'Unknown'

	text_lower = text.lower()
	scores = {}

	for lang, keywords in self.language_detection_keywords.items():
	score = sum(1 for keyword in keywords if keyword in text_lower)
	scores[lang] = score

	# Only return a language if we have reasonable confidence
	detected_lang = max(scores, key=scores.get) if scores and max(scores.values()) > 0 else 'unknown'
	return detected_lang.capitalize()

	def analyze_sentiment(self, text):
	"""Advanced sentiment analysis with detailed metrics"""
	if not text or not text.strip():
	return {
	'text': text,
	'sentiment': 'Neutral',
	'confidence': 0.0,
	'scores': {sent: 0.2 for sent in self.sentiment_map.values()},
	'sentiment_score': 0,
	'language': 'Unknown',
	'emotional_intensity': 0.0,
	'error': 'No text provided'
	}

	try:
	# Get predictions using modern pipeline syntax
	predictions = self.classifier(text)[0]

	# Convert to structured format - ensure proper mapping
	sentiment_scores = {}
	for pred in predictions:
	label = pred['label']
	score = pred['score']

	# Map label to our sentiment scale
	if 'very negative' in label.lower() or label == 'LABEL_0':
	sentiment_scores["Very Negative"] = score
	elif 'negative' in label.lower() or label == 'LABEL_1':
	sentiment_scores["Negative"] = score
	elif 'neutral' in label.lower() or label == 'LABEL_2':
	sentiment_scores["Neutral"] = score
	elif 'positive' in label.lower() or label == 'LABEL_3':
	sentiment_scores["Positive"] = score
	elif 'very positive' in label.lower() or label == 'LABEL_4':
	sentiment_scores["Very Positive"] = score
	else:
	# Fallback: assign by order
	sentiment_keys = list(self.sentiment_map.values())
	for i, key in enumerate(sentiment_keys):
	if key not in sentiment_scores:
	sentiment_scores[key] = score
	break

	# Ensure all sentiment categories are present
	for sentiment in self.sentiment_map.values():
	if sentiment not in sentiment_scores:
	sentiment_scores[sentiment] = 0.0

	# Determine dominant sentiment
	dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get)
	confidence = sentiment_scores[dominant_sentiment]

	# Calculate sentiment score (-2 to +2 scale)
	sentiment_score = (
	sentiment_scores["Very Positive"] * 2 +
	sentiment_scores["Positive"] * 1 +
	sentiment_scores["Neutral"] * 0 +
	sentiment_scores["Negative"] * -1 +
	sentiment_scores["Very Negative"] * -2
	)

	# Detect language
	detected_language = self.detect_language(text)

	# Emotional intensity
	emotional_intensity = max(sentiment_scores.values()) - min(sentiment_scores.values())

	return {
	'text': text,
	'sentiment': dominant_sentiment,
	'confidence': confidence,
	'scores': sentiment_scores,
	'sentiment_score': sentiment_score,
	'language': detected_language,
	'emotional_intensity': emotional_intensity,
	'timestamp': datetime.now().isoformat()
	}

	except Exception as e:
	print(f"Error in sentiment analysis: {e}")
	return {
	'text': text,
	'sentiment': 'Neutral',
	'confidence': 0.0,
	'scores': {sent: 0.2 for sent in self.sentiment_map.values()},
	'sentiment_score': 0,
	'language': 'Unknown',
	'emotional_intensity': 0.0,
	'error': str(e)
	}

	def batch_analyze(self, texts):
	"""Analyze multiple texts"""
	results = []
	for i, text in enumerate(texts):
	if i % 10 == 0:
	print(f"Processing {i}/{len(texts)}...")
	results.append(self.analyze_sentiment(text))
	return results

	# Initialize analyzer
	print("Initializing sentiment analyzer...")
	analyzer = AdvancedSentimentAnalyzer()

	def create_sentiment_chart(scores):
	"""Create beautiful sentiment distribution chart"""
	try:
	fig = go.Figure(data=[
	go.Bar(
	x=list(scores.keys()),
	y=list(scores.values()),
	marker_color=[analyzer.sentiment_colors[sent] for sent in scores.keys()],
	text=[f'{score:.1%}' for score in scores.values()],
	textposition='auto',
	)
	])

	fig.update_layout(
	title="Sentiment Distribution",
	xaxis_title="Sentiment",
	yaxis_title="Confidence Score",
	template="plotly_white",
	height=300
	)

	return fig
	except Exception as e:
	print(f"Error creating chart: {e}")
	return None

	def create_radar_chart(scores):
	"""Create radar chart for sentiment analysis"""
	try:
	fig = go.Figure(data=go.Scatterpolar(
	r=list(scores.values()),
	theta=list(scores.keys()),
	fill='toself',
	line=dict(color='#4ECDC4'),
	marker=dict(size=8)
	))

	fig.update_layout(
	polar=dict(
	radialaxis=dict(
	visible=True,
	range=[0, 1]
	)),
	showlegend=False,
	template="plotly_white",
	height=300
	)

	return fig
	except Exception as e:
	print(f"Error creating radar chart: {e}")
	return None

	def analyze_single_review(review_text):
	"""Analyze single review with enhanced visualization"""
	if not review_text or not review_text.strip():
	return "❌ Please enter some text to analyze.", None, None

	print(f"Analyzing: {review_text[:100]}...")
	result = analyzer.analyze_sentiment(review_text)

	# Create main output
	sentiment_color = analyzer.sentiment_colors.get(result['sentiment'], '#FFD93D')

	output_html = f"""
	<div style="padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {sentiment_color}20, {sentiment_color}40); border-left: 5px solid {sentiment_color};">
	<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
	<h3 style="margin: 0; color: #2D3748;">🎯 Analysis Result</h3>
	<span style="background-color: {sentiment_color}; color: white; padding: 5px 15px; border-radius: 20px; font-weight: bold;">
	{result['sentiment'].upper()}
	</span>
	</div>

	<div style="background: white; padding: 15px; border-radius: 10px; margin: 10px 0;">
	<p style="margin: 0; font-style: italic;">"{result['text']}"</p>
	</div>

	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-top: 20px;">
	<div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
	<div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">📊</div>
	<div style="font-weight: bold; color: #4A5568;">Confidence</div>
	<div style="font-size: 18px; color: #2D3748;">{result['confidence']:.1%}</div>
	</div>

	<div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
	<div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">🌐</div>
	<div style="font-weight: bold; color: #4A5568;">Language</div>
	<div style="font-size: 18px; color: #2D3748;">{result['language']}</div>
	</div>

	<div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
	<div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">⚡</div>
	<div style="font-weight: bold; color: #4A5568;">Intensity</div>
	<div style="font-size: 18px; color: #2D3748;">{result['emotional_intensity']:.2f}</div>
	</div>
	</div>
	</div>
	"""

	# Create charts
	bar_chart = create_sentiment_chart(result['scores'])
	radar_chart = create_radar_chart(result['scores'])

	return output_html, bar_chart, radar_chart

	def analyze_csv_file(csv_file):
	"""Analyze reviews from CSV file with advanced analytics"""
	try:
	if csv_file is None:
	return "❌ Please upload a CSV file.", None, None

	print("Reading CSV file...")
	df = pd.read_csv(csv_file.name)

	# Assume first column contains reviews
	review_column = df.columns[0]
	reviews = df[review_column].dropna().tolist()

	if not reviews:
	return "❌ No reviews found in the CSV file.", None, None

	print(f"Analyzing {len(reviews)} reviews...")
	results = analyzer.batch_analyze(reviews)

	# Create comprehensive results dataframe
	results_df = pd.DataFrame({
	'Review': [r['text'] for r in results],
	'Sentiment': [r['sentiment'] for r in results],
	'Confidence': [r['confidence'] for r in results],
	'Sentiment_Score': [r['sentiment_score'] for r in results],
	'Language': [r['language'] for r in results],
	'Emotional_Intensity': [r['emotional_intensity'] for r in results],
	'Very_Negative_Score': [r['scores']['Very Negative'] for r in results],
	'Negative_Score': [r['scores']['Negative'] for r in results],
	'Neutral_Score': [r['scores']['Neutral'] for r in results],
	'Positive_Score': [r['scores']['Positive'] for r in results],
	'Very_Positive_Score': [r['scores']['Very Positive'] for r in results],
	})

	# Generate analytics
	sentiment_counts = results_df['Sentiment'].value_counts()
	avg_confidence = results_df['Confidence'].mean()
	avg_sentiment_score = results_df['Sentiment_Score'].mean()
	language_distribution = results_df['Language'].value_counts()

	# Create summary visualization
	fig = make_subplots(
	rows=2, cols=2,
	subplot_titles=('Sentiment Distribution', 'Language Distribution',
	'Confidence Distribution', 'Sentiment Scores'),
	specs=[[{"type": "pie"}, {"type": "pie"}],
	[{"type": "histogram"}, {"type": "histogram"}]]
	)

	# Sentiment pie chart
	fig.add_trace(
	go.Pie(
	labels=sentiment_counts.index,
	values=sentiment_counts.values,
	marker_colors=[analyzer.sentiment_colors.get(sent, '#FFD93D') for sent in sentiment_counts.index]
	), 1, 1
	)

	# Language pie chart (top 10 languages)
	top_languages = language_distribution.head(10)
	fig.add_trace(
	go.Pie(labels=top_languages.index, values=top_languages.values),
	1, 2
	)

	# Confidence histogram
	fig.add_trace(go.Histogram(x=results_df['Confidence'], nbinsx=20), 2, 1)

	# Sentiment score histogram
	fig.add_trace(go.Histogram(x=results_df['Sentiment_Score'], nbinsx=20), 2, 2)

	fig.update_layout(height=600, showlegend=False, template="plotly_white")

	# Save results
	output_filename = f"advanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
	results_df.to_csv(output_filename, index=False)

	# Generate comprehensive summary
	summary = f"""
	## 📊 BATCH ANALYSIS COMPLETE

	Dataset Overview:
	- 📝 Total Reviews Analyzed: {len(results):,}
	- 🌐 Languages Detected: {len(language_distribution)}
	- ⏱️ Analysis Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

	Sentiment Breakdown:
	- 🟢 Very Positive: {sentiment_counts.get('Very Positive', 0):,}
	- 🟡 Positive: {sentiment_counts.get('Positive', 0):,}
	- ⚪ Neutral: {sentiment_counts.get('Neutral', 0):,}
	- 🟠 Negative: {sentiment_counts.get('Negative', 0):,}
	- 🔴 Very Negative: {sentiment_counts.get('Very Negative', 0):,}

	Performance Metrics:
	- 📈 Average Confidence: {avg_confidence:.1%}
	- 🎯 Average Sentiment Score: {avg_sentiment_score:.2f}
	- 🏆 Most Common Language: {language_distribution.index[0] if len(language_distribution) > 0 else 'N/A'}

	Files Generated:
	- 💾 Results CSV: `{output_filename}`
	- 📊 Analytics Dashboard: See chart below

	Next Steps:
	- Download the CSV for detailed analysis
	- Use filters to segment by sentiment or language
	- Identify trends and patterns in customer feedback
	"""

	return summary, output_filename, fig

	except Exception as e:
	error_msg = f"❌ Error processing file: {str(e)}"
	print(error_msg)
	return error_msg, None, None

	# Create simple Gradio interface without any unsupported parameters
	with gr.Blocks() as demo:

	gr.Markdown("""
	# 🌍 Advanced Multilingual Sentiment Analysis

	Powered by fine-tuned multilingual transformer model supporting 23 languages

	Analyze customer reviews, social media posts, and feedback across multiple languages with state-of-the-art accuracy.
	""")

	with gr.Tab("🔍 Single Review Analysis"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📥 Input Review")
	single_review = gr.Textbox(
	label="Enter text in any supported language",
	placeholder="Type your review here... (Supports 23 languages including English, Spanish, Chinese, French, German, Arabic, etc.)",
	lines=4
	)
	analyze_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")

	gr.Markdown("""
	Supported Languages:
	English, Chinese, Spanish, Hindi, Arabic, Bengali, Portuguese, Russian,
	Japanese, German, Malay, Telugu, Vietnamese, Korean, French, Turkish,
	Italian, Polish, Ukrainian, Tagalog, Dutch, Swiss German, Swahili
	""")

	with gr.Column():
	gr.Markdown("### 📊 Analysis Results")
	output_html = gr.HTML(label="Detailed Analysis")

	with gr.Row():
	bar_chart = gr.Plot(label="Sentiment Distribution")
	radar_chart = gr.Plot(label="Sentiment Radar")

	analyze_btn.click(
	analyze_single_review,
	inputs=single_review,
	outputs=[output_html, bar_chart, radar_chart]
	)

	with gr.Tab("📁 Batch CSV Analysis"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📤 Upload CSV File")
	csv_upload = gr.File(
	label="Upload CSV file with reviews",
	file_types=[".csv"]
	)
	gr.Markdown("""
	CSV Format Requirements:
	- First column should contain the review text
	- File should be UTF-8 encoded
	- Maximum file size: 100MB
	- Supports up to 10,000 reviews per batch
	""")

	batch_analyze_btn = gr.Button("📈 Analyze Batch", variant="primary")

	with gr.Column():
	gr.Markdown("### 📋 Analysis Summary")
	batch_output = gr.Markdown(label="Batch Summary")
	download_output = gr.File(label="Download Results")
	batch_chart = gr.Plot(label="Batch Analytics")

	batch_analyze_btn.click(
	analyze_csv_file,
	inputs=csv_upload,
	outputs=[batch_output, download_output, batch_chart]
	)

	with gr.Tab("ℹ️ About & Instructions"):
	gr.Markdown("""
	## 🎯 About This Tool

	This advanced sentiment analysis system uses a fine-tuned multilingual transformer model to analyze text in 23 languages.

	### 🌟 Key Features

	- Multilingual Support: Analyze sentiment in 23 languages
	- 5-Point Scale: Very Negative → Negative → Neutral → Positive → Very Positive
	- Advanced Analytics: Confidence scores, emotional intensity, language detection
	- Batch Processing: Analyze thousands of reviews via CSV upload
	- Visual Analytics: Interactive charts and comprehensive dashboards

	### 🚀 Use Cases

	- E-commerce: Product reviews from global marketplaces
	- Customer Support: Analyze support tickets and feedback
	- Social Media: Monitor brand sentiment across languages
	- Market Research: Understand international customer opinions

	### 🔧 Technical Details

	- Base Model: DistilBERT Multilingual
	- Languages: 23 languages
	- Sentiment Scale: 5-point (Very Negative to Very Positive)
	- Processing: Real-time analysis with batch capabilities
	""")

	# Launch the application
	if __name__ == "__main__":
	demo.launch(share=False, debug=True)