sentiment-analysis2

Sleeping

App Files Files Community

entropy25 commited on Jul 24, 2025

Commit

3644b14

verified ·

1 Parent(s): 7f8f92d

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -33

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ from contextlib import contextmanager
 import gc
 import pandas as pd
 from lime.lime_text import LimeTextExplainer
 @dataclass
 class Config:
@@ -156,10 +157,11 @@ class HistoryManager:
 # Core Analysis Engine
 class SentimentEngine:
-    """Streamlined sentiment analysis engine"""
     def __init__(self):
         self.model_manager = ModelManager()
         self.lime_explainer = LimeTextExplainer(class_names=['Negative', 'Positive'])
     def predict_proba(self, texts):
         """Prediction function for LIME"""
@@ -212,6 +214,37 @@ class SentimentEngine:
             logger.error(f"LIME extraction failed: {e}")
             return []
     def create_heatmap_html(self, text: str, word_scores: Dict[str, float]) -> str:
         """Create HTML heatmap visualization"""
         words = text.split()
@@ -244,20 +277,21 @@ class SentimentEngine:
         html_parts.append('</div>')
         return ''.join(html_parts)
-    @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'key_words': [], 'heatmap_html': ''})
     def analyze_single_advanced(self, text: str) -> Dict:
-        """Advanced single text analysis with LIME explanation"""
         if not text.strip():
             raise ValueError("Empty text")
         probs = self.predict_proba([text])[0]
         sentiment = "Positive" if probs[1] > probs[0] else "Negative"
-        # Extract key words using LIME
-        key_words = self.extract_key_words_lime(text)
-        # Create heatmap HTML
-        word_scores_dict = dict(key_words)
         heatmap_html = self.create_heatmap_html(text, word_scores_dict)
         return {
@@ -265,7 +299,8 @@ class SentimentEngine:
             'confidence': float(probs.max()),
             'pos_prob': float(probs[1]),
             'neg_prob': float(probs[0]),
-            'key_words': key_words,
             'heatmap_html': heatmap_html
         }
@@ -362,24 +397,54 @@ class PlotFactory:
     @staticmethod
     @handle_errors(default_return=None)
-    def create_keyword_chart(key_words: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> Optional[plt.Figure]:
-        """Create horizontal bar chart for key contributing words"""
-        if not key_words:
             return None
         with managed_figure(figsize=config.FIGURE_SIZE_SINGLE) as fig:
             ax = fig.add_subplot(111)
-            words = [word for word, score in key_words]
-            scores = [score for word, score in key_words]
             color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg']
             bars = ax.barh(range(len(words)), scores, color=color, alpha=0.7)
             ax.set_yticks(range(len(words)))
             ax.set_yticklabels(words)
-            ax.set_xlabel('Attention Weight')
-            ax.set_title(f'Top Contributing Words ({sentiment})', fontweight='bold')
             for i, (bar, score) in enumerate(zip(bars, scores)):
                 ax.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2.,
@@ -580,11 +645,11 @@ class SentimentApp:
         return result_text, prob_plot, gauge_plot, cloud_plot
-    @handle_errors(default_return=("Please enter text", None, None, None, None, None))
     def analyze_single_advanced(self, text: str, theme: str = 'default'):
-        """Advanced single text analysis with LIME explanation"""
         if not text.strip():
-            return "Please enter text", None, None, None, None, None
         result = self.engine.analyze_single_advanced(text)
@@ -595,18 +660,18 @@ class SentimentApp:
         })
         theme_ctx = ThemeContext(theme)
-        probs = np.array([result['neg_prob'], result['pos_prob']])
-        prob_plot = PlotFactory.create_sentiment_bars(probs, theme_ctx)
-        gauge_plot = PlotFactory.create_confidence_gauge(result['confidence'], result['sentiment'], theme_ctx)
-        cloud_plot = PlotFactory.create_wordcloud(text, result['sentiment'], theme_ctx)
-        keyword_plot = PlotFactory.create_keyword_chart(result['key_words'], result['sentiment'], theme_ctx)
-        key_words_str = ", ".join([f"{word}({score:.3f})" for word, score in result['key_words'][:5]])
         result_text = (f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.3f})\n"
-                      f"Key Words: {key_words_str}")
-        return result_text, prob_plot, gauge_plot, cloud_plot, keyword_plot, result['heatmap_html']
     @handle_errors(default_return=None)
     def analyze_batch(self, reviews: str, progress=None):
@@ -726,16 +791,14 @@ def create_interface():
                     )
                 with gr.Column():
-                    adv_result_output = gr.Textbox(label="Analysis Result", lines=3)
-                    heatmap_output = gr.HTML(label="Word Importance Heatmap")
             with gr.Row():
-                adv_prob_plot = gr.Plot(label="Probabilities")
-                adv_gauge_plot = gr.Plot(label="Confidence")
             with gr.Row():
-                adv_wordcloud_plot = gr.Plot(label="Word Cloud")
-                keyword_plot = gr.Plot(label="Key Contributing Words")
         with gr.Tab("Batch Analysis"):
             with gr.Row():
@@ -778,7 +841,7 @@ def create_interface():
         adv_analyze_btn.click(
             app.analyze_single_advanced,
             inputs=[adv_text_input, adv_theme_selector],
-            outputs=[adv_result_output, adv_prob_plot, adv_gauge_plot, adv_wordcloud_plot, keyword_plot, heatmap_output]
         )
         # Event bindings for Batch Analysis

 import gc
 import pandas as pd
 from lime.lime_text import LimeTextExplainer
+import shap
 @dataclass
 class Config:
 # Core Analysis Engine
 class SentimentEngine:
+    """Streamlined sentiment analysis engine with LIME and SHAP"""
     def __init__(self):
         self.model_manager = ModelManager()
         self.lime_explainer = LimeTextExplainer(class_names=['Negative', 'Positive'])
+        self.shap_explainer = None
     def predict_proba(self, texts):
         """Prediction function for LIME"""
             logger.error(f"LIME extraction failed: {e}")
             return []
+    def extract_key_words_shap(self, text: str, top_k: int = 10) -> List[Tuple[str, float]]:
+        """Advanced keyword extraction using SHAP"""
+        try:
+            # Initialize SHAP explainer if not already done
+            if self.shap_explainer is None:
+                self.shap_explainer = shap.Explainer(self.predict_proba, self.model_manager.tokenizer)
+            # Get SHAP values
+            shap_values = self.shap_explainer([text])
+            # Extract word importance
+            words = text.split()
+            if len(shap_values.values) > 0 and len(shap_values.values[0]) > 0:
+                # Get positive class SHAP values
+                pos_shap_values = shap_values.values[0][:, 1] if len(shap_values.values[0].shape) > 1 else shap_values.values[0]
+                word_scores = []
+                for i, word in enumerate(words[:len(pos_shap_values)]):
+                    clean_word = re.sub(r'[^\w]', '', word.lower())
+                    if len(clean_word) >= config.MIN_WORD_LENGTH:
+                        word_scores.append((clean_word, abs(float(pos_shap_values[i]))))
+                word_scores.sort(key=lambda x: x[1], reverse=True)
+                return word_scores[:top_k]
+            return []
+        except Exception as e:
+            logger.error(f"SHAP extraction failed: {e}")
+            return []
     def create_heatmap_html(self, text: str, word_scores: Dict[str, float]) -> str:
         """Create HTML heatmap visualization"""
         words = text.split()
         html_parts.append('</div>')
         return ''.join(html_parts)
+    @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'lime_words': [], 'shap_words': [], 'heatmap_html': ''})
     def analyze_single_advanced(self, text: str) -> Dict:
+        """Advanced single text analysis with LIME and SHAP explanation"""
         if not text.strip():
             raise ValueError("Empty text")
         probs = self.predict_proba([text])[0]
         sentiment = "Positive" if probs[1] > probs[0] else "Negative"
+        # Extract key words using both LIME and SHAP
+        lime_words = self.extract_key_words_lime(text)
+        shap_words = self.extract_key_words_shap(text)
+        # Create heatmap HTML using LIME results
+        word_scores_dict = dict(lime_words)
         heatmap_html = self.create_heatmap_html(text, word_scores_dict)
         return {
             'confidence': float(probs.max()),
             'pos_prob': float(probs[1]),
             'neg_prob': float(probs[0]),
+            'lime_words': lime_words,
+            'shap_words': shap_words,
             'heatmap_html': heatmap_html
         }
     @staticmethod
     @handle_errors(default_return=None)
+    def create_lime_keyword_chart(lime_words: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> Optional[plt.Figure]:
+        """Create horizontal bar chart for LIME key contributing words"""
+        if not lime_words:
             return None
         with managed_figure(figsize=config.FIGURE_SIZE_SINGLE) as fig:
             ax = fig.add_subplot(111)
+            words = [word for word, score in lime_words]
+            scores = [score for word, score in lime_words]
             color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg']
             bars = ax.barh(range(len(words)), scores, color=color, alpha=0.7)
             ax.set_yticks(range(len(words)))
             ax.set_yticklabels(words)
+            ax.set_xlabel('LIME Attention Weight')
+            ax.set_title(f'LIME: Top Contributing Words ({sentiment})', fontweight='bold')
+            for i, (bar, score) in enumerate(zip(bars, scores)):
+                ax.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2.,
+                       f'{score:.3f}', ha='left', va='center', fontsize=9)
+            ax.invert_yaxis()
+            ax.grid(axis='x', alpha=0.3)
+            fig.tight_layout()
+            return fig
+    @staticmethod
+    @handle_errors(default_return=None)
+    def create_shap_keyword_chart(shap_words: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> Optional[plt.Figure]:
+        """Create horizontal bar chart for SHAP key contributing words"""
+        if not shap_words:
+            return None
+        with managed_figure(figsize=config.FIGURE_SIZE_SINGLE) as fig:
+            ax = fig.add_subplot(111)
+            words = [word for word, score in shap_words]
+            scores = [score for word, score in shap_words]
+            color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg']
+            bars = ax.barh(range(len(words)), scores, color=color, alpha=0.7)
+            ax.set_yticks(range(len(words)))
+            ax.set_yticklabels(words)
+            ax.set_xlabel('SHAP Value')
+            ax.set_title(f'SHAP: Top Contributing Words ({sentiment})', fontweight='bold')
             for i, (bar, score) in enumerate(zip(bars, scores)):
                 ax.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2.,
         return result_text, prob_plot, gauge_plot, cloud_plot
+    @handle_errors(default_return=("Please enter text", None, None, None))
     def analyze_single_advanced(self, text: str, theme: str = 'default'):
+        """Advanced single text analysis with LIME and SHAP explanation"""
         if not text.strip():
+            return "Please enter text", None, None, None
         result = self.engine.analyze_single_advanced(text)
         })
         theme_ctx = ThemeContext(theme)
+        lime_plot = PlotFactory.create_lime_keyword_chart(result['lime_words'], result['sentiment'], theme_ctx)
+        shap_plot = PlotFactory.create_shap_keyword_chart(result['shap_words'], result['sentiment'], theme_ctx)
+        lime_words_str = ", ".join([f"{word}({score:.3f})" for word, score in result['lime_words'][:5]])
+        shap_words_str = ", ".join([f"{word}({score:.3f})" for word, score in result['shap_words'][:5]])
         result_text = (f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.3f})\n"
+                      f"LIME Key Words: {lime_words_str}\n"
+                      f"SHAP Key Words: {shap_words_str}")
+        return result_text, lime_plot, shap_plot, result['heatmap_html']
     @handle_errors(default_return=None)
     def analyze_batch(self, reviews: str, progress=None):
                     )
                 with gr.Column():
+                    adv_result_output = gr.Textbox(label="Analysis Result", lines=4)
             with gr.Row():
+                lime_plot = gr.Plot(label="LIME: Key Contributing Words")
+                shap_plot = gr.Plot(label="SHAP: Key Contributing Words")
             with gr.Row():
+                heatmap_output = gr.HTML(label="Word Importance Heatmap (LIME-based)")
         with gr.Tab("Batch Analysis"):
             with gr.Row():
         adv_analyze_btn.click(
             app.analyze_single_advanced,
             inputs=[adv_text_input, adv_theme_selector],
+            outputs=[adv_result_output, lime_plot, shap_plot, heatmap_output]
         )
         # Event bindings for Batch Analysis