Spaces:

DevNumb
/

Costumerfeelings

Sleeping

File size: 21,610 Bytes

# app.py
import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json

class AdvancedSentimentAnalyzer:
    def __init__(self, model_name="tabularisai/multilingual-sentiment-analysis"):
        print("Loading model and tokenizer...")
        self.model_name = model_name
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
            
            # Use the modern pipeline syntax
            self.classifier = pipeline(
                "text-classification",
                model=self.model,
                tokenizer=self.tokenizer,
                top_k=None  # This replaces return_all_scores=True
            )
            
        except Exception as e:
            print(f"Error loading model: {e}")
            # Fallback to basic sentiment analysis
            self.classifier = None
        
        self.sentiment_map = {
            0: "Very Negative", 
            1: "Negative", 
            2: "Neutral", 
            3: "Positive", 
            4: "Very Positive"
        }
        
        self.sentiment_colors = {
            "Very Negative": "#FF6B6B",
            "Negative": "#FFA8A8", 
            "Neutral": "#FFD93D",
            "Positive": "#6BCF7F",
            "Very Positive": "#4ECDC4"
        }
        
        self.language_detection_keywords = {
            'english': ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'for'],
            'spanish': ['el', 'la', 'de', 'que', 'y', 'en', 'un', 'por'],
            'french': ['le', 'la', 'de', 'et', 'que', 'en', 'un', 'pour'],
            'german': ['der', 'die', 'das', 'und', 'zu', 'in', 'den', 'mit'],
            'italian': ['il', 'la', 'di', 'e', 'che', 'in', 'un', 'per'],
            'portuguese': ['o', 'a', 'de', 'e', 'que', 'em', 'um', 'para'],
            'dutch': ['de', 'het', 'en', 'van', 'te', 'in', 'een', 'voor'],
            'russian': ['и', 'в', 'не', 'на', 'я', 'что', 'он', 'с'],
            'chinese': ['的', '是', '在', '了', '有', '和', '为', '我'],
            'japanese': ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て'],
            'korean': ['이', '에', 'は', 'を', '다', 'が', 'で', 'て'],
            'arabic': ['ال', 'في', 'من', 'على', 'أن', 'ما', 'هو', 'إلى'],
            'hindi': ['की', 'से', 'है', 'और', 'के', 'में', 'यह', 'को'],
            'turkish': ['ve', 'bir', 'bu', 'ile', 'için', 'ama', 'da', 'de']
        }
        
        print("Model loaded successfully!")

    def detect_language(self, text):
        """Simple language detection based on common words"""
        if not text or not isinstance(text, str):
            return 'Unknown'
            
        text_lower = text.lower()
        scores = {}
        
        for lang, keywords in self.language_detection_keywords.items():
            score = sum(1 for keyword in keywords if keyword in text_lower)
            scores[lang] = score
        
        # Only return a language if we have reasonable confidence
        detected_lang = max(scores, key=scores.get) if scores and max(scores.values()) > 0 else 'unknown'
        return detected_lang.capitalize()

    def analyze_sentiment(self, text):
        """Advanced sentiment analysis with detailed metrics"""
        if not text or not text.strip():
            return {
                'text': text,
                'sentiment': 'Neutral',
                'confidence': 0.0,
                'scores': {sent: 0.2 for sent in self.sentiment_map.values()},
                'sentiment_score': 0,
                'language': 'Unknown',
                'emotional_intensity': 0.0,
                'error': 'No text provided'
            }
            
        try:
            # Get predictions using modern pipeline syntax
            predictions = self.classifier(text)[0]
            
            # Convert to structured format - ensure proper mapping
            sentiment_scores = {}
            for pred in predictions:
                label = pred['label']
                score = pred['score']
                
                # Map label to our sentiment scale
                if 'very negative' in label.lower() or label == 'LABEL_0':
                    sentiment_scores["Very Negative"] = score
                elif 'negative' in label.lower() or label == 'LABEL_1':
                    sentiment_scores["Negative"] = score
                elif 'neutral' in label.lower() or label == 'LABEL_2':
                    sentiment_scores["Neutral"] = score
                elif 'positive' in label.lower() or label == 'LABEL_3':
                    sentiment_scores["Positive"] = score
                elif 'very positive' in label.lower() or label == 'LABEL_4':
                    sentiment_scores["Very Positive"] = score
                else:
                    # Fallback: assign by order
                    sentiment_keys = list(self.sentiment_map.values())
                    for i, key in enumerate(sentiment_keys):
                        if key not in sentiment_scores:
                            sentiment_scores[key] = score
                            break
            
            # Ensure all sentiment categories are present
            for sentiment in self.sentiment_map.values():
                if sentiment not in sentiment_scores:
                    sentiment_scores[sentiment] = 0.0
            
            # Determine dominant sentiment
            dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get)
            confidence = sentiment_scores[dominant_sentiment]
            
            # Calculate sentiment score (-2 to +2 scale)
            sentiment_score = (
                sentiment_scores["Very Positive"] * 2 +
                sentiment_scores["Positive"] * 1 +
                sentiment_scores["Neutral"] * 0 +
                sentiment_scores["Negative"] * -1 +
                sentiment_scores["Very Negative"] * -2
            )
            
            # Detect language
            detected_language = self.detect_language(text)
            
            # Emotional intensity
            emotional_intensity = max(sentiment_scores.values()) - min(sentiment_scores.values())
            
            return {
                'text': text,
                'sentiment': dominant_sentiment,
                'confidence': confidence,
                'scores': sentiment_scores,
                'sentiment_score': sentiment_score,
                'language': detected_language,
                'emotional_intensity': emotional_intensity,
                'timestamp': datetime.now().isoformat()
            }
            
        except Exception as e:
            print(f"Error in sentiment analysis: {e}")
            return {
                'text': text,
                'sentiment': 'Neutral',
                'confidence': 0.0,
                'scores': {sent: 0.2 for sent in self.sentiment_map.values()},
                'sentiment_score': 0,
                'language': 'Unknown',
                'emotional_intensity': 0.0,
                'error': str(e)
            }

    def batch_analyze(self, texts):
        """Analyze multiple texts"""
        results = []
        for i, text in enumerate(texts):
            if i % 10 == 0:
                print(f"Processing {i}/{len(texts)}...")
            results.append(self.analyze_sentiment(text))
        return results

# Initialize analyzer
print("Initializing sentiment analyzer...")
analyzer = AdvancedSentimentAnalyzer()

def create_sentiment_chart(scores):
    """Create beautiful sentiment distribution chart"""
    try:
        fig = go.Figure(data=[
            go.Bar(
                x=list(scores.keys()),
                y=list(scores.values()),
                marker_color=[analyzer.sentiment_colors[sent] for sent in scores.keys()],
                text=[f'{score:.1%}' for score in scores.values()],
                textposition='auto',
            )
        ])
        
        fig.update_layout(
            title="Sentiment Distribution",
            xaxis_title="Sentiment",
            yaxis_title="Confidence Score",
            template="plotly_white",
            height=300
        )
        
        return fig
    except Exception as e:
        print(f"Error creating chart: {e}")
        return None

def create_radar_chart(scores):
    """Create radar chart for sentiment analysis"""
    try:
        fig = go.Figure(data=go.Scatterpolar(
            r=list(scores.values()),
            theta=list(scores.keys()),
            fill='toself',
            line=dict(color='#4ECDC4'),
            marker=dict(size=8)
        ))
        
        fig.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, 1]
                )),
            showlegend=False,
            template="plotly_white",
            height=300
        )
        
        return fig
    except Exception as e:
        print(f"Error creating radar chart: {e}")
        return None

def analyze_single_review(review_text):
    """Analyze single review with enhanced visualization"""
    if not review_text or not review_text.strip():
        return "❌ Please enter some text to analyze.", None, None
    
    print(f"Analyzing: {review_text[:100]}...")
    result = analyzer.analyze_sentiment(review_text)
    
    # Create main output
    sentiment_color = analyzer.sentiment_colors.get(result['sentiment'], '#FFD93D')
    
    output_html = f"""
    <div style="padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {sentiment_color}20, {sentiment_color}40); border-left: 5px solid {sentiment_color};">
        <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
            <h3 style="margin: 0; color: #2D3748;">🎯 Analysis Result</h3>
            <span style="background-color: {sentiment_color}; color: white; padding: 5px 15px; border-radius: 20px; font-weight: bold;">
                {result['sentiment'].upper()}
            </span>
        </div>
        
        <div style="background: white; padding: 15px; border-radius: 10px; margin: 10px 0;">
            <p style="margin: 0; font-style: italic;">"{result['text']}"</p>
        </div>
        
        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-top: 20px;">
            <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
                <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">📊</div>
                <div style="font-weight: bold; color: #4A5568;">Confidence</div>
                <div style="font-size: 18px; color: #2D3748;">{result['confidence']:.1%}</div>
            </div>
            
            <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
                <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">🌐</div>
                <div style="font-weight: bold; color: #4A5568;">Language</div>
                <div style="font-size: 18px; color: #2D3748;">{result['language']}</div>
            </div>
            
            <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
                <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">⚡</div>
                <div style="font-weight: bold; color: #4A5568;">Intensity</div>
                <div style="font-size: 18px; color: #2D3748;">{result['emotional_intensity']:.2f}</div>
            </div>
        </div>
    </div>
    """
    
    # Create charts
    bar_chart = create_sentiment_chart(result['scores'])
    radar_chart = create_radar_chart(result['scores'])
    
    return output_html, bar_chart, radar_chart

def analyze_csv_file(csv_file):
    """Analyze reviews from CSV file with advanced analytics"""
    try:
        if csv_file is None:
            return "❌ Please upload a CSV file.", None, None
            
        print("Reading CSV file...")
        df = pd.read_csv(csv_file.name)
        
        # Assume first column contains reviews
        review_column = df.columns[0]
        reviews = df[review_column].dropna().tolist()
        
        if not reviews:
            return "❌ No reviews found in the CSV file.", None, None
            
        print(f"Analyzing {len(reviews)} reviews...")
        results = analyzer.batch_analyze(reviews)
        
        # Create comprehensive results dataframe
        results_df = pd.DataFrame({
            'Review': [r['text'] for r in results],
            'Sentiment': [r['sentiment'] for r in results],
            'Confidence': [r['confidence'] for r in results],
            'Sentiment_Score': [r['sentiment_score'] for r in results],
            'Language': [r['language'] for r in results],
            'Emotional_Intensity': [r['emotional_intensity'] for r in results],
            'Very_Negative_Score': [r['scores']['Very Negative'] for r in results],
            'Negative_Score': [r['scores']['Negative'] for r in results],
            'Neutral_Score': [r['scores']['Neutral'] for r in results],
            'Positive_Score': [r['scores']['Positive'] for r in results],
            'Very_Positive_Score': [r['scores']['Very Positive'] for r in results],
        })
        
        # Generate analytics
        sentiment_counts = results_df['Sentiment'].value_counts()
        avg_confidence = results_df['Confidence'].mean()
        avg_sentiment_score = results_df['Sentiment_Score'].mean()
        language_distribution = results_df['Language'].value_counts()
        
        # Create summary visualization
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('Sentiment Distribution', 'Language Distribution', 
                          'Confidence Distribution', 'Sentiment Scores'),
            specs=[[{"type": "pie"}, {"type": "pie"}],
                   [{"type": "histogram"}, {"type": "histogram"}]]
        )
        
        # Sentiment pie chart
        fig.add_trace(
            go.Pie(
                labels=sentiment_counts.index,
                values=sentiment_counts.values,
                marker_colors=[analyzer.sentiment_colors.get(sent, '#FFD93D') for sent in sentiment_counts.index]
            ), 1, 1
        )
        
        # Language pie chart (top 10 languages)
        top_languages = language_distribution.head(10)
        fig.add_trace(
            go.Pie(labels=top_languages.index, values=top_languages.values),
            1, 2
        )
        
        # Confidence histogram
        fig.add_trace(go.Histogram(x=results_df['Confidence'], nbinsx=20), 2, 1)
        
        # Sentiment score histogram
        fig.add_trace(go.Histogram(x=results_df['Sentiment_Score'], nbinsx=20), 2, 2)
        
        fig.update_layout(height=600, showlegend=False, template="plotly_white")
        
        # Save results
        output_filename = f"advanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        results_df.to_csv(output_filename, index=False)
        
        # Generate comprehensive summary
        summary = f"""
        ## 📊 BATCH ANALYSIS COMPLETE
        
        **Dataset Overview:**
        - 📝 **Total Reviews Analyzed:** {len(results):,}
        - 🌐 **Languages Detected:** {len(language_distribution)}
        - ⏱️ **Analysis Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
        
        **Sentiment Breakdown:**
        - 🟢 **Very Positive:** {sentiment_counts.get('Very Positive', 0):,}
        - 🟡 **Positive:** {sentiment_counts.get('Positive', 0):,}  
        - ⚪ **Neutral:** {sentiment_counts.get('Neutral', 0):,}
        - 🟠 **Negative:** {sentiment_counts.get('Negative', 0):,}
        - 🔴 **Very Negative:** {sentiment_counts.get('Very Negative', 0):,}
        
        **Performance Metrics:**
        - 📈 **Average Confidence:** {avg_confidence:.1%}
        - 🎯 **Average Sentiment Score:** {avg_sentiment_score:.2f}
        - 🏆 **Most Common Language:** {language_distribution.index[0] if len(language_distribution) > 0 else 'N/A'}
        
        **Files Generated:**
        - 💾 **Results CSV:** `{output_filename}`
        - 📊 **Analytics Dashboard:** See chart below
        
        **Next Steps:**
        - Download the CSV for detailed analysis
        - Use filters to segment by sentiment or language
        - Identify trends and patterns in customer feedback
        """
        
        return summary, output_filename, fig
        
    except Exception as e:
        error_msg = f"❌ Error processing file: {str(e)}"
        print(error_msg)
        return error_msg, None, None

# Create simple Gradio interface without any unsupported parameters
with gr.Blocks() as demo:
    
    gr.Markdown("""
    # 🌍 Advanced Multilingual Sentiment Analysis
    
    *Powered by fine-tuned multilingual transformer model supporting 23 languages*
    
    Analyze customer reviews, social media posts, and feedback across multiple languages with state-of-the-art accuracy.
    """)
    
    with gr.Tab("🔍 Single Review Analysis"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("### 📥 Input Review")
                single_review = gr.Textbox(
                    label="Enter text in any supported language",
                    placeholder="Type your review here... (Supports 23 languages including English, Spanish, Chinese, French, German, Arabic, etc.)",
                    lines=4
                )
                analyze_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")
                
                gr.Markdown("""
                **Supported Languages:** 
                English, Chinese, Spanish, Hindi, Arabic, Bengali, Portuguese, Russian, 
                Japanese, German, Malay, Telugu, Vietnamese, Korean, French, Turkish, 
                Italian, Polish, Ukrainian, Tagalog, Dutch, Swiss German, Swahili
                """)
            
            with gr.Column():
                gr.Markdown("### 📊 Analysis Results")
                output_html = gr.HTML(label="Detailed Analysis")
                
                with gr.Row():
                    bar_chart = gr.Plot(label="Sentiment Distribution")
                    radar_chart = gr.Plot(label="Sentiment Radar")
        
        analyze_btn.click(
            analyze_single_review,
            inputs=single_review,
            outputs=[output_html, bar_chart, radar_chart]
        )
    
    with gr.Tab("📁 Batch CSV Analysis"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("### 📤 Upload CSV File")
                csv_upload = gr.File(
                    label="Upload CSV file with reviews",
                    file_types=[".csv"]
                )
                gr.Markdown("""
                **CSV Format Requirements:**
                - First column should contain the review text
                - File should be UTF-8 encoded
                - Maximum file size: 100MB
                - Supports up to 10,000 reviews per batch
                """)
                
                batch_analyze_btn = gr.Button("📈 Analyze Batch", variant="primary")
            
            with gr.Column():
                gr.Markdown("### 📋 Analysis Summary")
                batch_output = gr.Markdown(label="Batch Summary")
                download_output = gr.File(label="Download Results")
                batch_chart = gr.Plot(label="Batch Analytics")
        
        batch_analyze_btn.click(
            analyze_csv_file,
            inputs=csv_upload,
            outputs=[batch_output, download_output, batch_chart]
        )
    
    with gr.Tab("ℹ️ About & Instructions"):
        gr.Markdown("""
        ## 🎯 About This Tool
        
        This advanced sentiment analysis system uses a fine-tuned multilingual transformer model to analyze text in 23 languages.
        
        ### 🌟 Key Features
        
        - **Multilingual Support**: Analyze sentiment in 23 languages
        - **5-Point Scale**: Very Negative → Negative → Neutral → Positive → Very Positive
        - **Advanced Analytics**: Confidence scores, emotional intensity, language detection
        - **Batch Processing**: Analyze thousands of reviews via CSV upload
        - **Visual Analytics**: Interactive charts and comprehensive dashboards
        
        ### 🚀 Use Cases
        
        - **E-commerce**: Product reviews from global marketplaces
        - **Customer Support**: Analyze support tickets and feedback
        - **Social Media**: Monitor brand sentiment across languages
        - **Market Research**: Understand international customer opinions
        
        ### 🔧 Technical Details
        
        - **Base Model**: DistilBERT Multilingual
        - **Languages**: 23 languages
        - **Sentiment Scale**: 5-point (Very Negative to Very Positive)
        - **Processing**: Real-time analysis with batch capabilities
        """)

# Launch the application
if __name__ == "__main__":
    demo.launch(share=False, debug=True)