Spaces:

zaid002
/

ai-echo-sentiment-analysis

Sleeping

App Files Files Community

zaid002 commited on Oct 6, 2025

Commit

b9fbeb2

verified ·

1 Parent(s): ce5f8c0

Create app.py

Browse files

Files changed (1) hide show

app.py +414 -0

app.py ADDED Viewed

	@@ -0,0 +1,414 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import re
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+import pickle
+import plotly.express as px
+import os
+# Download NLTK data
+@st.cache_resource
+def download_nltk_data():
+    nltk.download('punkt', quiet=True)
+    nltk.download('stopwords', quiet=True)
+    nltk.download('wordnet', quiet=True)
+download_nltk_data()
+class DataPreprocessor:
+    def __init__(self):
+        self.lemmatizer = WordNetLemmatizer()
+        self.stop_words = set(stopwords.words('english'))
+    def clean_text(self, text):
+        if text is None or text != text:  # Check for NaN
+            return ""
+        # Convert to lowercase
+        text = str(text).lower()
+        # Remove special characters and digits
+        text = re.sub(r'[^a-zA-Z\s]', '', text)
+        # Remove extra whitespace
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text
+    def tokenize_and_lemmatize(self, text):
+        tokens = word_tokenize(text)
+        tokens = [self.lemmatizer.lemmatize(token) for token in tokens
+                 if token not in self.stop_words and len(token) > 2]
+        return ' '.join(tokens)
+class SentimentAnalyzerApp:
+    def __init__(self):
+        self.preprocessor = DataPreprocessor()
+        self.model = None
+        self.vectorizer = None
+        self.df = None
+    def load_sample_data(self):
+        """Create sample data for demo purposes"""
+        sample_data = {
+            'date': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05'],
+            'review': [
+                'This app is absolutely amazing and very helpful!',
+                'The application works okay but could be better.',
+                'I am very disappointed with the performance.',
+                'Excellent features and great user interface.',
+                'Not what I expected, needs improvement.'
+            ],
+            'rating': [5, 3, 1, 5, 2],
+            'platform': ['Web', 'Mobile', 'Web', 'Mobile', 'Web'],
+            'language': ['en', 'en', 'en', 'en', 'en'],
+            'location': ['USA', 'UK', 'Canada', 'Australia', 'India'],
+            'verified_purchase': ['Yes', 'No', 'Yes', 'Yes', 'No'],
+            'helpful_votes': [10, 2, 5, 8, 1]
+        }
+        self.df = pd.DataFrame(sample_data)
+        self.df['date'] = pd.to_datetime(self.df['date'])
+        # Create sentiment labels
+        def get_sentiment(rating):
+            if rating >= 4:
+                return 'Positive'
+            elif rating == 3:
+                return 'Neutral'
+            else:
+                return 'Negative'
+        self.df['sentiment'] = self.df['rating'].apply(get_sentiment)
+        return True
+    def load_model(self):
+        """Try to load model, but use simulated predictions if not available"""
+        try:
+            model_path = 'models/sentiment_model.pkl'
+            if os.path.exists(model_path):
+                with open(model_path, 'rb') as f:
+                    model_data = pickle.load(f)
+                self.model = model_data['model']
+                self.vectorizer = model_data['vectorizer']
+                return True
+            else:
+                st.info("🤖 Using simulated sentiment analysis for demo. Upload a trained model for accurate predictions.")
+                return False
+        except Exception as e:
+            st.warning(f"Model loading failed: {e}. Using simulated mode.")
+            return False
+    def predict_sentiment(self, text):
+        """Predict sentiment for new text"""
+        if self.model is not None and self.vectorizer is not None:
+            # Use actual model
+            cleaned_text = self.preprocessor.clean_text(text)
+            processed_text = self.preprocessor.tokenize_and_lemmatize(cleaned_text)
+            text_vector = self.vectorizer.transform([processed_text])
+            prediction = self.model.predict(text_vector)[0]
+            probability = self.model.predict_proba(text_vector)[0]
+            return prediction, dict(zip(self.model.classes_, probability))
+        else:
+            # Simulate prediction
+            positive_words = ['good', 'great', 'excellent', 'amazing', 'love', 'awesome', 'perfect', 'fantastic', 'wonderful', 'outstanding']
+            negative_words = ['bad', 'terrible', 'awful', 'hate', 'worst', 'disappointed', 'poor', 'horrible', 'waste', 'useless']
+            text_lower = text.lower()
+            positive_count = sum(1 for word in positive_words if word in text_lower)
+            negative_count = sum(1 for word in negative_words if word in text_lower)
+            if positive_count > negative_count:
+                prediction = "Positive"
+                confidence = min(0.8 + (positive_count * 0.05), 0.95)
+            elif negative_count > positive_count:
+                prediction = "Negative"
+                confidence = min(0.8 + (negative_count * 0.05), 0.95)
+            else:
+                prediction = "Neutral"
+                confidence = 0.6
+            # Simulate probabilities
+            if prediction == "Positive":
+                probabilities = {'Positive': confidence, 'Neutral': (1-confidence)/2, 'Negative': (1-confidence)/2}
+            elif prediction == "Negative":
+                probabilities = {'Positive': (1-confidence)/2, 'Neutral': (1-confidence)/2, 'Negative': confidence}
+            else:
+                probabilities = {'Positive': 0.2, 'Neutral': confidence, 'Negative': 0.2}
+            return prediction, probabilities
+    def run(self):
+        """Main application"""
+        st.set_page_config(
+            page_title="AI Echo - Sentiment Analysis",
+            page_icon="🤖",
+            layout="wide",
+            initial_sidebar_state="expanded"
+        )
+        # Custom CSS
+        st.markdown("""
+        <style>
+        .main-header {
+            font-size: 2.5rem;
+            color: #1f77b4;
+            text-align: center;
+            margin-bottom: 2rem;
+        }
+        .metric-card {
+            background-color: #f0f2f6;
+            padding: 1rem;
+            border-radius: 10px;
+            border-left: 4px solid #1f77b4;
+        }
+        </style>
+        """, unsafe_allow_html=True)
+        st.markdown('<h1 class="main-header">🤖 AI Echo: Sentiment Analysis</h1>', unsafe_allow_html=True)
+        st.markdown("### Customer Review Sentiment Analysis Dashboard")
+        # Initialize and load data
+        if 'data_loaded' not in st.session_state:
+            self.load_sample_data()
+            st.session_state.data_loaded = True
+        if 'model_loaded' not in st.session_state:
+            st.session_state.model_loaded = self.load_model()
+        # Sidebar
+        st.sidebar.title("Navigation")
+        page = st.sidebar.selectbox(
+            "Choose a page:",
+            ["📊 Overview", "🤖 Model Demo", "📈 Analysis", "💡 Insights"]
+        )
+        # Page routing
+        if page == "📊 Overview":
+            self.show_overview()
+        elif page == "🤖 Model Demo":
+            self.show_model_demo()
+        elif page == "📈 Analysis":
+            self.show_analysis()
+        else:
+            self.show_insights()
+    def show_overview(self):
+        """Overview page"""
+        st.header("📊 Project Overview")
+        # Key metrics
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            total_reviews = len(self.df)
+            st.metric("Total Reviews", total_reviews)
+        with col2:
+            avg_rating = self.df['rating'].mean()
+            st.metric("Average Rating", f"{avg_rating:.2f} ⭐")
+        with col3:
+            positive_pct = (self.df['sentiment'] == 'Positive').mean() * 100
+            st.metric("Positive Reviews", f"{positive_pct:.1f}%")
+        with col4:
+            helpful_reviews = self.df['helpful_votes'].sum()
+            st.metric("Total Helpful Votes", helpful_reviews)
+        st.markdown("---")
+        # Visualizations
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Review Rating Distribution")
+            rating_counts = self.df['rating'].value_counts().sort_index()
+            fig = px.bar(rating_counts, x=rating_counts.index, y=rating_counts.values,
+                        labels={'x': 'Rating', 'y': 'Count'},
+                        title='Distribution of Ratings')
+            st.plotly_chart(fig, use_container_width=True)
+        with col2:
+            st.subheader("Sentiment Distribution")
+            sentiment_counts = self.df['sentiment'].value_counts()
+            fig = px.pie(values=sentiment_counts.values, names=sentiment_counts.index,
+                        title='Sentiment Distribution')
+            st.plotly_chart(fig, use_container_width=True)
+        st.info("💡 This is a demo with sample data. Upload your dataset to the 'data' folder for real analysis.")
+    def show_model_demo(self):
+        """Interactive model demo"""
+        st.header("🤖 Sentiment Analysis Demo")
+        st.markdown("""
+        Enter your own review text below to analyze its sentiment.
+        The model will predict whether the sentiment is **Positive**, **Neutral**, or **Negative**.
+        """)
+        # Text input
+        user_text = st.text_area(
+            "Enter your review text:",
+            height=150,
+            placeholder="Type your review here... Example: 'This app is amazing and very helpful!'",
+            value="I love this application! It's incredibly useful and well-designed."
+        )
+        if user_text:
+            with st.spinner("Analyzing sentiment..."):
+                prediction, probabilities = self.predict_sentiment(user_text)
+            # Display results
+            st.subheader("🎯 Prediction Results")
+            col1, col2 = st.columns([1, 2])
+            with col1:
+                sentiment_colors = {
+                    'Positive': '🟢',
+                    'Neutral': '🟡',
+                    'Negative': '🔴'
+                }
+                st.metric(
+                    "Predicted Sentiment",
+                    f"{sentiment_colors.get(prediction, '⚪')} {prediction}"
+                )
+            with col2:
+                st.subheader("Confidence Scores")
+                for sentiment, prob in probabilities.items():
+                    st.write(f"**{sentiment}**: {prob:.1%}")
+                    st.progress(prob)
+            if self.model is None:
+                st.info("🔬 Currently using simulated analysis. Upload a trained model file for more accurate predictions.")
+        # Example reviews
+        st.markdown("---")
+        st.subheader("💡 Try these examples:")
+        examples = [
+            "This app is absolutely fantastic! It helps me so much with my work.",
+            "The application is okay, but it could use some improvements.",
+            "I'm very disappointed with the performance and customer service.",
+            "Outstanding features and excellent user experience!",
+            "It's mediocre, nothing special about it."
+        ]
+        cols = st.columns(3)
+        for i, example in enumerate(examples):
+            with cols[i % 3]:
+                if st.button(f"'{example[:30]}...'", use_container_width=True):
+                    st.session_state.demo_text = example
+                    st.rerun()
+    def show_analysis(self):
+        """Analysis page"""
+        st.header("📈 Data Analysis")
+        # Platform analysis
+        st.subheader("Platform Comparison")
+        platform_counts = self.df['platform'].value_counts()
+        fig = px.bar(platform_counts, x=platform_counts.index, y=platform_counts.values,
+                    labels={'x': 'Platform', 'y': 'Number of Reviews'},
+                    title='Reviews by Platform')
+        st.plotly_chart(fig, use_container_width=True)
+        # Sentiment by platform
+        platform_sentiment = pd.crosstab(self.df['platform'], self.df['sentiment'], normalize='index') * 100
+        fig = px.bar(platform_sentiment, barmode='stack',
+                    title='Sentiment Distribution by Platform (%)')
+        st.plotly_chart(fig, use_container_width=True)
+        # Word clouds
+        st.subheader("📝 Word Clouds")
+        positive_text = ' '.join(self.df[self.df['sentiment'] == 'Positive']['review'])
+        negative_text = ' '.join(self.df[self.df['sentiment'] == 'Negative']['review'])
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("**Positive Reviews**")
+            if positive_text.strip():
+                wordcloud = WordCloud(width=400, height=300, background_color='white').generate(positive_text)
+                fig, ax = plt.subplots(figsize=(10, 6))
+                ax.imshow(wordcloud, interpolation='bilinear')
+                ax.axis('off')
+                st.pyplot(fig)
+            else:
+                st.info("No positive reviews available")
+        with col2:
+            st.markdown("**Negative Reviews**")
+            if negative_text.strip():
+                wordcloud = WordCloud(width=400, height=300, background_color='white').generate(negative_text)
+                fig, ax = plt.subplots(figsize=(10, 6))
+                ax.imshow(wordcloud, interpolation='bilinear')
+                ax.axis('off')
+                st.pyplot(fig)
+            else:
+                st.info("No negative reviews available")
+    def show_insights(self):
+        """Insights page"""
+        st.header("💡 Business Insights & Recommendations")
+        # Key metrics
+        positive_pct = (self.df['sentiment'] == 'Positive').mean() * 100
+        avg_rating = self.df['rating'].mean()
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.metric("Overall Satisfaction", f"{positive_pct:.1f}%")
+        with col2:
+            st.metric("Average Rating", f"{avg_rating:.2f} ⭐")
+        with col3:
+            verified_ratio = (self.df['verified_purchase'] == 'Yes').mean() * 100
+            st.metric("Verified Reviews", f"{verified_ratio:.1f}%")
+        st.markdown("---")
+        # Recommendations
+        st.subheader("🎯 Actionable Recommendations")
+        recommendations = [
+            "**Monitor Negative Reviews**: Regularly analyze 1-2 star reviews for common issues and pain points",
+            "**Platform Optimization**: Ensure consistent user experience across all platforms (Web, Mobile, etc.)",
+            "**Feature Development**: Prioritize features frequently mentioned in positive reviews",
+            "**Customer Support**: Implement sentiment-based routing for support tickets",
+            "**Regional Strategy**: Analyze location-based sentiment for market-specific improvements",
+            "**Version Tracking**: Monitor sentiment changes across different application versions"
+        ]
+        for i, recommendation in enumerate(recommendations, 1):
+            st.markdown(f"{i}. {recommendation}")
+        st.markdown("---")
+        # Technical setup
+        st.subheader("🔧 Technical Setup")
+        st.info("""
+        **To use with your own data:**
+        1. Upload your CSV file to the `data/` folder
+        2. Train and save your model as `models/sentiment_model.pkl`
+        3. The app will automatically detect and use your files
+        **Current mode:** Using sample data with simulated sentiment analysis
+        """)
+# Run the app
+if __name__ == "__main__":
+    app = SentimentAnalyzerApp()
+    app.run()