Spaces:

DevNumb
/

Costumerfeelings

Sleeping

App Files Files Community

DevNumb commited on Nov 24, 2025

Commit

fde3c7b

verified ·

1 Parent(s): f842ed2

Update app.py

Browse files

Files changed (1) hide show

app.py +181 -113

app.py CHANGED Viewed

@@ -8,18 +8,28 @@ from datetime import datetime
 import plotly.express as px
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 class AdvancedSentimentAnalyzer:
     def __init__(self, model_name="tabularisai/multilingual-sentiment-analysis"):
         self.model_name = model_name
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
-        self.classifier = pipeline(
-            "text-classification",
-            model=self.model,
-            tokenizer=self.tokenizer,
-            return_all_scores=True
-        )
         self.sentiment_map = {
             0: "Very Negative",
@@ -38,24 +48,29 @@ class AdvancedSentimentAnalyzer:
         }
         self.language_detection_keywords = {
-            'english': ['the', 'and', 'is', 'in', 'to'],
-            'spanish': ['el', 'la', 'de', 'que', 'y'],
-            'french': ['le', 'la', 'de', 'et', 'que'],
-            'german': ['der', 'die', 'das', 'und', 'zu'],
-            'italian': ['il', 'la', 'di', 'e', 'che'],
-            'portuguese': ['o', 'a', 'de', 'e', 'que'],
-            'dutch': ['de', 'het', 'en', 'van', 'te'],
-            'russian': ['и', 'в', 'не', 'на', 'я'],
-            'chinese': ['的', '是', '在', '了', '有'],
-            'japanese': ['の', 'に', 'は', 'を', 'た'],
-            'korean': ['이', '에', '는', '을', '다'],
-            'arabic': ['ال', 'في', 'من', 'على', 'أن'],
-            'hindi': ['की', 'से', 'है', 'और', 'के'],
-            'turkish': ['ve', 'bir', 'bu', 'ile', 'için']
         }
     def detect_language(self, text):
         """Simple language detection based on common words"""
         text_lower = text.lower()
         scores = {}
@@ -63,20 +78,57 @@ class AdvancedSentimentAnalyzer:
             score = sum(1 for keyword in keywords if keyword in text_lower)
             scores[lang] = score
-        detected_lang = max(scores, key=scores.get) if scores else 'unknown'
         return detected_lang.capitalize()
     def analyze_sentiment(self, text):
         """Advanced sentiment analysis with detailed metrics"""
         try:
-            # Get predictions
             predictions = self.classifier(text)[0]
-            # Convert to structured format
-            sentiment_scores = {
-                self.sentiment_map[i]: pred['score']
-                for i, pred in enumerate(predictions)
-            }
             # Determine dominant sentiment
             dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get)
@@ -109,6 +161,7 @@ class AdvancedSentimentAnalyzer:
             }
         except Exception as e:
             return {
                 'text': text,
                 'sentiment': 'Neutral',
@@ -119,68 +172,83 @@ class AdvancedSentimentAnalyzer:
                 'emotional_intensity': 0.0,
                 'error': str(e)
             }
     def batch_analyze(self, texts):
         """Analyze multiple texts"""
-        return [self.analyze_sentiment(text) for text in texts]
 # Initialize analyzer
 analyzer = AdvancedSentimentAnalyzer()
 def create_sentiment_chart(scores):
     """Create beautiful sentiment distribution chart"""
-    fig = go.Figure(data=[
-        go.Bar(
-            x=list(scores.keys()),
-            y=list(scores.values()),
-            marker_color=[analyzer.sentiment_colors[sent] for sent in scores.keys()],
-            text=[f'{score:.1%}' for score in scores.values()],
-            textposition='auto',
         )
-    ])
-    fig.update_layout(
-        title="Sentiment Distribution",
-        xaxis_title="Sentiment",
-        yaxis_title="Confidence Score",
-        template="plotly_white",
-        height=300
-    )
-    return fig
 def create_radar_chart(scores):
     """Create radar chart for sentiment analysis"""
-    fig = go.Figure(data=go.Scatterpolar(
-        r=list(scores.values()),
-        theta=list(scores.keys()),
-        fill='toself',
-        line=dict(color='#4ECDC4'),
-        marker=dict(size=8)
-    ))
-    fig.update_layout(
-        polar=dict(
-            radialaxis=dict(
-                visible=True,
-                range=[0, 1]
-            )),
-        showlegend=False,
-        template="plotly_white",
-        height=300
-    )
-    return fig
 def analyze_single_review(review_text):
     """Analyze single review with enhanced visualization"""
-    if not review_text.strip():
-        return "Please enter some text to analyze.", None, None
     result = analyzer.analyze_sentiment(review_text)
     # Create main output
-    sentiment_color = analyzer.sentiment_colors[result['sentiment']]
     output_html = f"""
     <div style="padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {sentiment_color}20, {sentiment_color}40); border-left: 5px solid {sentiment_color};">
@@ -226,12 +294,19 @@ def analyze_single_review(review_text):
 def analyze_csv_file(csv_file):
     """Analyze reviews from CSV file with advanced analytics"""
     try:
         df = pd.read_csv(csv_file.name)
         # Assume first column contains reviews
         review_column = df.columns[0]
         reviews = df[review_column].dropna().tolist()
         print(f"Analyzing {len(reviews)} reviews...")
         results = analyzer.batch_analyze(reviews)
@@ -270,13 +345,14 @@ def analyze_csv_file(csv_file):
             go.Pie(
                 labels=sentiment_counts.index,
                 values=sentiment_counts.values,
-                marker_colors=[analyzer.sentiment_colors[sent] for sent in sentiment_counts.index]
             ), 1, 1
         )
-        # Language pie chart
         fig.add_trace(
-            go.Pie(labels=language_distribution.index, values=language_distribution.values),
             1, 2
         )
@@ -294,28 +370,28 @@ def analyze_csv_file(csv_file):
         # Generate comprehensive summary
         summary = f"""
-        📊 **BATCH ANALYSIS COMPLETE**
         **Dataset Overview:**
-        - 📝 Total Reviews Analyzed: {len(results):,}
-        - 🌐 Languages Detected: {len(language_distribution)}
-        - ⏱️ Analysis Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
         **Sentiment Breakdown:**
-        🟢 Very Positive: {sentiment_counts.get('Very Positive', 0):,}
-        🟡 Positive: {sentiment_counts.get('Positive', 0):,}
-        ⚪ Neutral: {sentiment_counts.get('Neutral', 0):,}
-        🟠 Negative: {sentiment_counts.get('Negative', 0):,}
-        🔴 Very Negative: {sentiment_counts.get('Very Negative', 0):,}
         **Performance Metrics:**
-        - 📈 Average Confidence: {avg_confidence:.1%}
-        - 🎯 Average Sentiment Score: {avg_sentiment_score:.2f}
-        - 🏆 Most Common Language: {language_distribution.index[0] if len(language_distribution) > 0 else 'N/A'}
         **Files Generated:**
-        - 💾 Results CSV: `{output_filename}`
-        - 📊 Analytics Dashboard: See chart below
         **Next Steps:**
         - Download the CSV for detailed analysis
@@ -326,19 +402,22 @@ def analyze_csv_file(csv_file):
         return summary, output_filename, fig
     except Exception as e:
-        return f"❌ Error processing file: {str(e)}", None, None
-# Create enhanced Gradio interface
 with gr.Blocks(
-    theme=gr.themes.Soft(),
     title="🌍 Multilingual Sentiment Analyzer",
     css="""
     .gradio-container {
         max-width: 1200px !important;
     }
-    .sentiment-positive { border-left: 4px solid #6BCF7F !important; }
-    .sentiment-negative { border-left: 4px solid #FF6B6B !important; }
-    .sentiment-neutral { border-left: 4px solid #FFD93D !important; }
     """
 ) as demo:
@@ -357,8 +436,7 @@ with gr.Blocks(
                 single_review = gr.Textbox(
                     label="Enter text in any supported language",
                     placeholder="Type your review here... (Supports 23 languages including English, Spanish, Chinese, French, German, Arabic, etc.)",
-                    lines=4,
-                    elem_id="review-input"
                 )
                 analyze_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")
@@ -389,8 +467,7 @@ with gr.Blocks(
                 gr.Markdown("### 📤 Upload CSV File")
                 csv_upload = gr.File(
                     label="Upload CSV file with reviews",
-                    file_types=[".csv"],
-                    type="filepath"
                 )
                 gr.Markdown("""
                 **CSV Format Requirements:**
@@ -434,22 +511,13 @@ with gr.Blocks(
         - **Customer Support**: Analyze support tickets and feedback
         - **Social Media**: Monitor brand sentiment across languages
         - **Market Research**: Understand international customer opinions
-        ### 📊 Model Information
-        - **Base Model**: `distilbert-base-multilingual-cased`
-        - **Fine-tuned on**: Synthetic multilingual data
-        - **Languages**: 23 languages including major world languages
-        - **Accuracy**: State-of-the-art performance across languages
-        ### 🔧 Technical Details
-        The model uses a transformer architecture fine-tuned specifically for sentiment analysis across multiple languages and cultural contexts.
         """)
 if __name__ == "__main__":
     demo.launch(
-        share=True,
         server_name="0.0.0.0",
         show_error=True
     )

 import plotly.express as px
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
+import json
 class AdvancedSentimentAnalyzer:
     def __init__(self, model_name="tabularisai/multilingual-sentiment-analysis"):
+        print("Loading model and tokenizer...")
         self.model_name = model_name
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
+            # Use the modern pipeline syntax
+            self.classifier = pipeline(
+                "text-classification",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                top_k=None  # This replaces return_all_scores=True
+            )
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            # Fallback to basic sentiment analysis
+            self.classifier = None
         self.sentiment_map = {
             0: "Very Negative",
         }
         self.language_detection_keywords = {
+            'english': ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'for'],
+            'spanish': ['el', 'la', 'de', 'que', 'y', 'en', 'un', 'por'],
+            'french': ['le', 'la', 'de', 'et', 'que', 'en', 'un', 'pour'],
+            'german': ['der', 'die', 'das', 'und', 'zu', 'in', 'den', 'mit'],
+            'italian': ['il', 'la', 'di', 'e', 'che', 'in', 'un', 'per'],
+            'portuguese': ['o', 'a', 'de', 'e', 'que', 'em', 'um', 'para'],
+            'dutch': ['de', 'het', 'en', 'van', 'te', 'in', 'een', 'voor'],
+            'russian': ['и', 'в', 'не', 'на', 'я', 'что', 'он', 'с'],
+            'chinese': ['的', '是', '在', '了', '有', '和', '为', '我'],
+            'japanese': ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て'],
+            'korean': ['이', '에', '는', '을', '다', '가', '로', '고'],
+            'arabic': ['ال', 'في', 'من', 'على', 'أن', 'ما', 'هو', 'إلى'],
+            'hindi': ['की', 'से', 'है', 'और', 'के', 'में', 'यह', 'को'],
+            'turkish': ['ve', 'bir', 'bu', 'ile', 'için', 'ama', 'da', 'de']
         }
+        print("Model loaded successfully!")
     def detect_language(self, text):
         """Simple language detection based on common words"""
+        if not text or not isinstance(text, str):
+            return 'Unknown'
         text_lower = text.lower()
         scores = {}
             score = sum(1 for keyword in keywords if keyword in text_lower)
             scores[lang] = score
+        # Only return a language if we have reasonable confidence
+        detected_lang = max(scores, key=scores.get) if scores and max(scores.values()) > 0 else 'unknown'
         return detected_lang.capitalize()
     def analyze_sentiment(self, text):
         """Advanced sentiment analysis with detailed metrics"""
+        if not text or not text.strip():
+            return {
+                'text': text,
+                'sentiment': 'Neutral',
+                'confidence': 0.0,
+                'scores': {sent: 0.2 for sent in self.sentiment_map.values()},
+                'sentiment_score': 0,
+                'language': 'Unknown',
+                'emotional_intensity': 0.0,
+                'error': 'No text provided'
+            }
         try:
+            # Get predictions using modern pipeline syntax
             predictions = self.classifier(text)[0]
+            # Convert to structured format - ensure proper mapping
+            sentiment_scores = {}
+            for pred in predictions:
+                label = pred['label']
+                score = pred['score']
+                # Map label to our sentiment scale
+                if 'very negative' in label.lower() or label == 'LABEL_0':
+                    sentiment_scores["Very Negative"] = score
+                elif 'negative' in label.lower() or label == 'LABEL_1':
+                    sentiment_scores["Negative"] = score
+                elif 'neutral' in label.lower() or label == 'LABEL_2':
+                    sentiment_scores["Neutral"] = score
+                elif 'positive' in label.lower() or label == 'LABEL_3':
+                    sentiment_scores["Positive"] = score
+                elif 'very positive' in label.lower() or label == 'LABEL_4':
+                    sentiment_scores["Very Positive"] = score
+                else:
+                    # Fallback: assign by order
+                    sentiment_keys = list(self.sentiment_map.values())
+                    for i, key in enumerate(sentiment_keys):
+                        if key not in sentiment_scores:
+                            sentiment_scores[key] = score
+                            break
+            # Ensure all sentiment categories are present
+            for sentiment in self.sentiment_map.values():
+                if sentiment not in sentiment_scores:
+                    sentiment_scores[sentiment] = 0.0
             # Determine dominant sentiment
             dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get)
             }
         except Exception as e:
+            print(f"Error in sentiment analysis: {e}")
             return {
                 'text': text,
                 'sentiment': 'Neutral',
                 'emotional_intensity': 0.0,
                 'error': str(e)
             }
     def batch_analyze(self, texts):
         """Analyze multiple texts"""
+        results = []
+        for i, text in enumerate(texts):
+            if i % 10 == 0:
+                print(f"Processing {i}/{len(texts)}...")
+            results.append(self.analyze_sentiment(text))
+        return results
 # Initialize analyzer
+print("Initializing sentiment analyzer...")
 analyzer = AdvancedSentimentAnalyzer()
 def create_sentiment_chart(scores):
     """Create beautiful sentiment distribution chart"""
+    try:
+        fig = go.Figure(data=[
+            go.Bar(
+                x=list(scores.keys()),
+                y=list(scores.values()),
+                marker_color=[analyzer.sentiment_colors[sent] for sent in scores.keys()],
+                text=[f'{score:.1%}' for score in scores.values()],
+                textposition='auto',
+            )
+        ])
+        fig.update_layout(
+            title="Sentiment Distribution",
+            xaxis_title="Sentiment",
+            yaxis_title="Confidence Score",
+            template="plotly_white",
+            height=300
         )
+        return fig
+    except Exception as e:
+        print(f"Error creating chart: {e}")
+        return None
 def create_radar_chart(scores):
     """Create radar chart for sentiment analysis"""
+    try:
+        fig = go.Figure(data=go.Scatterpolar(
+            r=list(scores.values()),
+            theta=list(scores.keys()),
+            fill='toself',
+            line=dict(color='#4ECDC4'),
+            marker=dict(size=8)
+        ))
+        fig.update_layout(
+            polar=dict(
+                radialaxis=dict(
+                    visible=True,
+                    range=[0, 1]
+                )),
+            showlegend=False,
+            template="plotly_white",
+            height=300
+        )
+        return fig
+    except Exception as e:
+        print(f"Error creating radar chart: {e}")
+        return None
 def analyze_single_review(review_text):
     """Analyze single review with enhanced visualization"""
+    if not review_text or not review_text.strip():
+        return "❌ Please enter some text to analyze.", None, None
+    print(f"Analyzing: {review_text[:100]}...")
     result = analyzer.analyze_sentiment(review_text)
     # Create main output
+    sentiment_color = analyzer.sentiment_colors.get(result['sentiment'], '#FFD93D')
     output_html = f"""
     <div style="padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {sentiment_color}20, {sentiment_color}40); border-left: 5px solid {sentiment_color};">
 def analyze_csv_file(csv_file):
     """Analyze reviews from CSV file with advanced analytics"""
     try:
+        if csv_file is None:
+            return "❌ Please upload a CSV file.", None, None
+        print("Reading CSV file...")
         df = pd.read_csv(csv_file.name)
         # Assume first column contains reviews
         review_column = df.columns[0]
         reviews = df[review_column].dropna().tolist()
+        if not reviews:
+            return "❌ No reviews found in the CSV file.", None, None
         print(f"Analyzing {len(reviews)} reviews...")
         results = analyzer.batch_analyze(reviews)
             go.Pie(
                 labels=sentiment_counts.index,
                 values=sentiment_counts.values,
+                marker_colors=[analyzer.sentiment_colors.get(sent, '#FFD93D') for sent in sentiment_counts.index]
             ), 1, 1
         )
+        # Language pie chart (top 10 languages)
+        top_languages = language_distribution.head(10)
         fig.add_trace(
+            go.Pie(labels=top_languages.index, values=top_languages.values),
             1, 2
         )
         # Generate comprehensive summary
         summary = f"""
+        ## 📊 BATCH ANALYSIS COMPLETE
         **Dataset Overview:**
+        - 📝 **Total Reviews Analyzed:** {len(results):,}
+        - 🌐 **Languages Detected:** {len(language_distribution)}
+        - ⏱️ **Analysis Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
         **Sentiment Breakdown:**
+        - 🟢 **Very Positive:** {sentiment_counts.get('Very Positive', 0):,}
+        - 🟡 **Positive:** {sentiment_counts.get('Positive', 0):,}
+        - ⚪ **Neutral:** {sentiment_counts.get('Neutral', 0):,}
+        - 🟠 **Negative:** {sentiment_counts.get('Negative', 0):,}
+        - 🔴 **Very Negative:** {sentiment_counts.get('Very Negative', 0):,}
         **Performance Metrics:**
+        - 📈 **Average Confidence:** {avg_confidence:.1%}
+        - 🎯 **Average Sentiment Score:** {avg_sentiment_score:.2f}
+        - 🏆 **Most Common Language:** {language_distribution.index[0] if len(language_distribution) > 0 else 'N/A'}
         **Files Generated:**
+        - 💾 **Results CSV:** `{output_filename}`
+        - 📊 **Analytics Dashboard:** See chart below
         **Next Steps:**
         - Download the CSV for detailed analysis
         return summary, output_filename, fig
     except Exception as e:
+        error_msg = f"❌ Error processing file: {str(e)}"
+        print(error_msg)
+        return error_msg, None, None
+# Create Gradio interface with compatibility for Gradio 3.x
 with gr.Blocks(
     title="🌍 Multilingual Sentiment Analyzer",
     css="""
     .gradio-container {
         max-width: 1200px !important;
+        margin: 0 auto;
+    }
+    .container {
+        max-width: 1200px;
+        margin: 0 auto;
     }
     """
 ) as demo:
                 single_review = gr.Textbox(
                     label="Enter text in any supported language",
                     placeholder="Type your review here... (Supports 23 languages including English, Spanish, Chinese, French, German, Arabic, etc.)",
+                    lines=4
                 )
                 analyze_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")
                 gr.Markdown("### 📤 Upload CSV File")
                 csv_upload = gr.File(
                     label="Upload CSV file with reviews",
+                    file_types=[".csv"]
                 )
                 gr.Markdown("""
                 **CSV Format Requirements:**
         - **Customer Support**: Analyze support tickets and feedback
         - **Social Media**: Monitor brand sentiment across languages
         - **Market Research**: Understand international customer opinions
         """)
+# Launch the application
 if __name__ == "__main__":
     demo.launch(
+        share=False,
         server_name="0.0.0.0",
+        debug=True,
         show_error=True
     )