Spaces:

RohitBh
/

Sentiment_Analysis

Sleeping

App Files Files Community

RohitBh commited on Apr 30, 2024

Commit

9e01c0a

verified ·

1 Parent(s): a94035e

Update app.py

Browse files

Files changed (1) hide show

app.py +428 -78

app.py CHANGED Viewed

@@ -4,110 +4,460 @@ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 from textblob import TextBlob
 from transformers import pipeline
 import matplotlib.pyplot as plt
 import os
 from wordcloud import WordCloud
-import gradio as gr
-# Function to analyze sentiment using the custom Hugging Face pipeline
-def analyze_sentiment_hf(text):
-    hf_pipeline = pipeline("sentiment-analysis")
     if len(text) > 512:
-        text = text[:511]
-    sentiment_result = hf_pipeline(text)
-    sentiment_label = sentiment_result[0]["label"]
-    if sentiment_label == "LABEL_1":
         return "Positive"
-    elif sentiment_label == "LABEL_0":
         return "Negative"
-    else:
         return "Neutral"
-# Function to analyze sentiment using VADER
-def analyze_sentiment_vader(text):
-    sentiment_analyzer = SentimentIntensityAnalyzer()
-    sentiment_score = sentiment_analyzer.polarity_scores(text)["compound"]
-    if sentiment_score > 0:
         return "Positive"
-    elif sentiment_score == 0:
         return "Neutral"
     else:
         return "Negative"
-# Function to analyze sentiment using TextBlob
-def analyze_sentiment_textblob(text):
-    sentiment_analysis = TextBlob(text)
-    score = sentiment_analysis.sentiment.polarity
-    if score > 0:
         return "Positive"
-    elif score == 0:
         return "Neutral"
     else:
         return "Negative"
-# Function to display DataFrame with sentiment
-def display_results_dataframe(data_frame):
-    st.write(data_frame)
-# Function to display a pie chart of sentiment distribution
-def create_pie_chart(data_frame, sentiment_column):
-    sentiment_distribution = data_frame[sentiment_column].value_counts()
     fig, ax = plt.subplots()
-    ax.pie(sentiment_distribution, labels=sentiment_distribution.index, autopct='%1.1f%%', startangle=90)
-    ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
     st.pyplot(fig)
-# Function to display word cloud based on sentiment data
-def create_word_cloud(sentiment_data):
-    wordcloud_generator = WordCloud(width=800, height=400).generate(sentiment_data)
     fig, ax = plt.subplots(figsize=(10, 5))
-    ax.imshow(wordcloud_generator, interpolation='bilinear')
-    ax.axis('off')
     st.pyplot(fig)
-# Main UI setup
-st.set_page_config(page_title="Sentiment Analysis Tool", page_icon=":bar_chart:")
-st.title("Sentiment Analysis Tool")
-# Sidebar configuration for user input options
-st.sidebar.title("Analysis Options")
-input_type = st.sidebar.selectbox("Choose Input Type", ["Text Input", "CSV Upload"])
-model_choice = st.sidebar.selectbox("Choose Sentiment Analysis Model", ["Hugging Face", "VADER", "TextBlob"])
-display_type = st.sidebar.selectbox("Choose Display Type", ["DataFrame", "Pie Chart", "Word Cloud"])
-# Process input based on user choice
-if input_type == "Text Input":
-    user_text = st.text_input("Enter text for sentiment analysis:")
-    if st.button("Analyze Sentiment"):
-        if user_text:
-            # Analyzing sentiment based on selected model
-            if model_choice == "Hugging Face":
-                sentiment = analyze_sentiment_hf(user_text)
-            elif model_choice == "VADER":
-                sentiment = analyze_sentiment_vader(user_text)
             else:
-                sentiment = analyze_sentiment_textblob(user_text)
-            st.write("Detected Sentiment:", sentiment)
-        else:
-            st.warning("Please enter some text to analyze.")
-elif input_type == "CSV Upload":
-    uploaded_file = st.file_uploader("Upload CSV file for analysis", type="csv")
-    if st.button("Start Analysis"):
-        if uploaded_file is not None:
-            data_frame = pd.read_csv(uploaded_file)
-            # Assuming the CSV has a column named 'text' for analysis
-            if 'text' in data_frame.columns:
-                data_frame['Sentiment'] = data_frame['text'].apply(lambda x: analyze_sentiment_hf(x) if model_choice == "Hugging Face" else (analyze_sentiment_vader(x) if model_choice == "VADER" else analyze_sentiment_textblob(x)))
-                if display_type == "DataFrame":
-                    display_results_dataframe(data_frame)
-                elif display_type == "Pie Chart":
-                    create_pie_chart(data_frame, 'Sentiment')
-                elif display_type == "Word Cloud":
-                    combined_text = ' '.join(data_frame['text'])
-                    create_word_cloud(combined_text)
-            else:
-                st.error("The uploaded CSV file must contain a 'text' column.")
         else:
-            st.warning("Please upload a CSV file to proceed with analysis.")

 from textblob import TextBlob
 from transformers import pipeline
 import matplotlib.pyplot as plt
+import base64
 import os
 from wordcloud import WordCloud
+# Function to perform sentiment analysis using Hugging Face model
+hf_sentiment_analyzer = pipeline(
+    "sentiment-analysis", "Dmyadav2001/Sentimental-Analysis"
+)
+def analyze_hf_sentiment(text):
     if len(text) > 512:
+        temp = text[:511]
+        text = temp
+    result = hf_sentiment_analyzer(text)
+    label = result[0]["label"]
+    if label == "LABEL_1":
         return "Positive"
+    elif label == "LABEL_0":
         return "Negative"
+    elif label == "LABEL_2":
         return "Neutral"
+# Function to perform sentiment analysis using VADER
+def analyze_vader_sentiment(text):
+    analyzer = SentimentIntensityAnalyzer()
+    vader_score = analyzer.polarity_scores(text)["compound"]
+    if vader_score > 0:
         return "Positive"
+    elif vader_score == 0:
         return "Neutral"
     else:
         return "Negative"
+# Function to perform sentiment analysis using TextBlob
+def analyze_textblob_sentiment(text):
+    analysis = TextBlob(text)
+    sentiment_score = analysis.sentiment.polarity
+    if sentiment_score > 0:
         return "Positive"
+    elif sentiment_score == 0:
         return "Neutral"
     else:
         return "Negative"
+# Function to display DataFrame with updated sentiment column
+def display_dataframe(df):
+    st.write(df)
+# Function to display pie chart for sentiment distribution
+def display_pie_chart(df, column):
+    sentiment_counts = df[column].value_counts()
     fig, ax = plt.subplots()
+    ax.pie(
+        sentiment_counts,
+        labels=sentiment_counts.index,
+        autopct="%1.1f%%",
+        startangle=140,
+    )
+    ax.axis("equal")
     st.pyplot(fig)
+    # Add a download button
+    if st.button('Download Pie Chart'):
+        # Save the pie chart as an image file
+        plt.savefig('pie_chart.png')
+        # Offer the image file for download
+        st.download_button(label='Download Pie Chart Image', data=open('pie_chart.png', 'rb').read(), file_name='pie_chart.png', mime='image/png')
+# Function to display word cloud
+def display_wordcloud(text_data):
+    wordcloud = WordCloud(width=800, height=400, background_color="white").generate(
+        text_data
+    )
     fig, ax = plt.subplots(figsize=(10, 5))
+    ax.imshow(wordcloud, interpolation="bilinear")
+    ax.axis("off")
     st.pyplot(fig)
+    # Add a download button
+    if st.button('Download Word Cloud'):
+        # Save the word cloud as an image file
+        plt.savefig('word_cloud.png')
+        # Offer the image file for download
+        st.download_button(label='Download Word Cloud Image', data=open('word_cloud.png', 'rb').read(), file_name='word_cloud.png', mime='image/png')
+# Function to download CSV file
+def download_csv(df):
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()  # B64 encoding
+    href = f'<a href="data:file/csv;base64,{b64}" download="sentiment_analysis_results.csv">Download CSV File</a>'
+    st.markdown(href, unsafe_allow_html=True)
+# Function to count occurrences of keywords and sentiment distribution
+def count_reviews_with_keywords(df,keywords):
+    # keywords=['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking']
+    keyword_counts = {keyword: {"Positive": 0, "Negative": 0, "Total": 0} for keyword in keywords}
+    for _, row in df.iterrows():
+        review_text = row["review_text"]
+        sentiment = row["Sentiment"]
+        for keyword in keywords:
+            if keyword.lower() in review_text.lower():
+                keyword_counts[keyword]["Total"] += 1
+                if sentiment == "Positive":
+                    keyword_counts[keyword]["Positive"] += 1
+                elif sentiment == "Negative":
+                    keyword_counts[keyword]["Negative"] += 1
+    return keyword_counts
+# Streamlit UI
+st.set_page_config(page_title="SentimentAnalysis App", page_icon=":smiley:")
+st.title("SentimentAnalysis App")
+# Sidebar
+st.sidebar.title("Options")
+input_option = st.sidebar.radio("Select Input Option", ("Free Text", "CSV Files"))
+selected_model = st.sidebar.radio(
+    "Select Sentiment Analysis Model", ("VADER", "TextBlob", "Hugging Face")
+)
+result_option = st.sidebar.radio(
+    "Select Result Display Option",
+    ("DataFrame", "Pie Chart", "Bar Chart", "Keyword Frequency", "WordCloud", "Comparative Sentiment Analysis"),
+)
+# Main content
+progress_label = st.empty()  # Define progress label
+progress_bar = st.progress(0)
+progress = 0
+# Directory path to store processed files
+processed_directory = "processed_files"
+# Ensure the directory exists, if not create it
+os.makedirs(processed_directory, exist_ok=True)
+# List to store processed filenames
+processed_files = []
+# Function to get filenames from the processed directory
+def get_processed_filenames():
+    return [
+        f
+        for f in os.listdir(processed_directory)
+        if os.path.isfile(os.path.join(processed_directory, f))
+    ]
+if input_option == "Free Text":
+    st.subheader("Enter review for sentiment analysis:")
+    user_input = st.text_area("", "")
+    if not user_input:
+        st.info("Enter some text above for sentiment analysis.")
+    else:
+        with st.spinner("Analyzing..."):
+            if selected_model == "Hugging Face":
+                result = analyze_hf_sentiment(user_input)
+            elif selected_model == "VADER":
+                result = analyze_vader_sentiment(user_input)
+            elif selected_model == "TextBlob":
+                result = analyze_textblob_sentiment(user_input)
+        st.write("Sentiment:", result)
+if input_option == "CSV Files":
+    st.subheader("Select CSV files for sentiment analysis:")
+    # Uploading new file
+    files = st.file_uploader(
+        "Upload New File", type=["csv"], accept_multiple_files=True
+    )
+    if files:
+        # Process uploaded new files
+        for file in files:
+            if file.type != "text/csv":
+                st.warning(
+                    "Uploaded file is not a CSV file. Please upload a CSV file only."
+                )
             else:
+                df = pd.read_csv(file)
+                if "review_text" not in df.columns:
+                    st.warning(
+                        "Uploaded CSV file doesn't contain 'review_text' column. Please check the CSV file format."
+                    )
+                else:
+                    total_rows = len(df)
+                    sentiments_v = []
+                    sentiments_tb = []
+                    sentiments_hf = []
+                    for review_text in df["review_text"]:
+                        sentiments_v.append(analyze_vader_sentiment(review_text))
+                        sentiments_tb.append(analyze_textblob_sentiment(review_text))
+                        sentiments_hf.append(analyze_hf_sentiment(review_text))
+                        progress += 1
+                        progress_label.text(f"{progress}/{total_rows}")
+                        progress_bar.progress(min(progress / total_rows, 1.0))
+                    df["VADER Sentiment"] = sentiments_v
+                    df["TextBlob Sentiment"] = sentiments_tb
+                    df["HuggingFace Sentiment"] = sentiments_hf
+                    # Save processed file with modified filename
+                    new_filename = os.path.splitext(file.name)[0] + "1.csv"
+                    df.to_csv(
+                        os.path.join(processed_directory, new_filename), index=False
+                    )
+                    st.success(f"New file processed and saved as {new_filename}")
+    # List of already processed files
+    processed_files = get_processed_filenames()
+    selected_files = st.multiselect("Select from Processed Files", processed_files)
+    if not files and not selected_files:
+        st.info(
+            "Upload a new CSV file or select from processed files above for sentiment analysis."
+        )
+    all_dfs = []
+    # Process already selected files
+    for file_name in selected_files:
+        df = pd.read_csv(os.path.join(processed_directory, file_name))
+        all_dfs.append(df)
+    # Results
+    if all_dfs:
+        combined_df = pd.concat(all_dfs, ignore_index=True)
+        if selected_model == "TextBlob":
+            result = "TextBlob Sentiment"
+            combined_df.drop(
+                columns=["VADER Sentiment", "HuggingFace Sentiment"],
+                inplace=True,
+            )
+        elif selected_model == "VADER":
+            result = "VADER Sentiment"
+            combined_df.drop(
+                columns=["TextBlob Sentiment", "HuggingFace Sentiment"],
+                inplace=True,
+            )
+        elif selected_model == "Hugging Face":
+            result = "HuggingFace Sentiment"
+            combined_df.drop(
+                columns=["TextBlob Sentiment", "VADER Sentiment"],
+                inplace=True,
+            )
+        combined_df.rename(columns={result: "Sentiment"}, inplace=True)
+        if result_option == "DataFrame":
+            st.subheader("Sentiment Analysis Results")
+            display_dataframe(combined_df)
+            download_csv(combined_df)
+        elif result_option == "Pie Chart":
+            st.subheader("Sentiment Distribution")
+            display_pie_chart(combined_df, "Sentiment")
+        elif result_option == "Bar Chart":
+            # Calculate value counts
+            sentiment_counts = combined_df["Sentiment"].value_counts()
+            # Display bar chart
+            st.bar_chart(sentiment_counts)
+            # Add a download button
+            if st.button('Download Sentiment Counts Chart'):
+                # Plot the bar chart
+                fig, ax = plt.subplots()
+                sentiment_counts.plot(kind='bar', ax=ax)
+                plt.xlabel('Sentiment')
+                plt.ylabel('Count')
+                plt.title('Sentiment Counts')
+                plt.xticks(rotation=45, ha='right')
+                plt.tight_layout()
+                # Save the bar chart as an image file
+                plt.savefig('sentiment_counts_chart.png')
+                # Offer the image file for download
+                st.download_button(label='Download Sentiment Counts Chart Image', data=open('sentiment_counts_chart.png', 'rb').read(), file_name='sentiment_counts_chart.png', mime='image/png')
+        elif result_option == "Keyword Frequency":
+            st.subheader("Keyword Frequency")
+            # List of keywords
+            keywords = [
+                "delivery",
+                "shipping",
+                "parcel",
+                "package",
+                "tracking",
+                "shipment",
+                "cargo",
+                "freight",
+                "automation",
+                "automated",
+                "robotic",
+                "robots",
+                "AI",
+                "artificial intelligence",
+                "machine learning",
+                "chatbot",
+                "virtual assistant",
+                "customer support",
+                "real-time",
+                "instant",
+                "live update",
+                "status",
+                "IoT",
+                "internet of things",
+                "connected devices",
+                "smart technology",
+                "blockchain",
+                "ledger",
+                "transparency",
+                "security",
+                "sustainability",
+                "eco-friendly",
+                "green logistics",
+                "carbon footprint",
+                "customer service",
+                "support",
+                "experience",
+                "satisfaction",
+                "data analytics",
+                "big data",
+                "analysis",
+                "insights",
+                "cloud computing",
+                "cloud-based",
+                "digital infrastructure",
+                "storage",
+                "5G",
+                "connectivity",
+                "network speed",
+                "wireless",
+                "drone",
+                "aerial delivery",
+                "UAV",
+                "drone shipping",
+                "augmented reality",
+                "AR",
+                "virtual reality",
+                "VR",
+                "3D printing",
+                "additive manufacturing",
+                "custom parts",
+                "prototyping",
+                "inventory management",
+                "stock levels",
+                "warehouse management",
+                "storage solutions",
+                "supply chain",
+                "logistics",
+                "supply network",
+                "distribution",
+                "eco-packaging",
+                "sustainable materials",
+                "recycling",
+                "waste reduction",
+                "digital platform",
+                "e-commerce",
+                "online shopping",
+                "online order",
+                "cybersecurity",
+                "data protection",
+                "privacy",
+                "encryption",
+                "predictive modeling",
+                "forecasting",
+                "demand planning",
+                "trend analysis",
+                "robotics",
+                "automated vehicles",
+                "self-driving cars",
+                "logistics automation",
+                "visibility",
+                "supply chain visibility",
+                "track and trace",
+                "monitoring",
+                "integration",
+                "ERP",
+                "supply chain integration",
+                "software",
+                "optimization",
+                "efficiency",
+                "process improvement",
+                "lean logistics",
+                "personalization",
+                "customization",
+                "tailored services",
+                "personal touch",
+                "ethical sourcing",
+                "fair trade",
+                "labor rights",
+                "ethical business",
+                "user experience",
+                "UX",
+                "customer journey",
+                "service design",
+                "visibility",
+            ]
+            text_data = " ".join(combined_df["review_text"])
+            keyword_frequency = (
+                pd.Series(text_data.split()).value_counts().reset_index()
+            )
+            keyword_frequency.columns = ["Keyword", "Frequency"]
+            # Filter keyword frequency for specific keywords
+            filtered_keyword_frequency = keyword_frequency[
+                keyword_frequency["Keyword"].isin(keywords)
+            ]
+            # Display bar chart for filtered keyword frequency
+            st.bar_chart(filtered_keyword_frequency.set_index("Keyword"))
+             # Add a download button
+            if st.button('Download Keyword Frequency Chart'):
+                # Plot the bar chart
+                fig, ax = plt.subplots()
+                filtered_keyword_frequency.plot(kind='bar', x='Keyword', y='Frequency', ax=ax)
+                plt.xticks(rotation=45, ha='right')
+                plt.tight_layout()
+                # Save the bar chart as an image file
+                plt.savefig('keyword_frequency_chart.png')
+                # Offer the image file for download
+                st.download_button(label='Download Keyword Frequency Chart Image', data=open('keyword_frequency_chart.png', 'rb').read(), file_name='keyword_frequency_chart.png', mime='image/png')
+        elif result_option == "Word Cloud":
+            st.subheader("Word Cloud")
+            text_data = " ".join(combined_df["review_text"])
+            display_wordcloud(text_data)
         else:
+            st.subheader("Comparative Sentiment Analysis")
+            supply_chain_areas = {
+                'logistics': ['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking'],
+                'delivery': ['delivery', 'shipping', 'courier', 'postal', 'parcel'],
+                'inventory': ['inventory', 'stock', 'storage', 'warehouse', 'security’'],
+                'customer service': ['customer service', 'support', 'helpdesk', 'service center', 'experience', 'refund'],
+                'procurement': ['procurement', 'sourcing', 'purchasing', 'buying', 'order'],
+                'distribution': ['distribution', 'supply network', 'distribution center'],
+                'manufacturing': ['manufacturing', 'production', 'assembly', 'quality', 'defect']
+            }
+            supply_chain_area = st.sidebar.radio(
+                "Select Supply Chain Area",
+                ("logistics", "delivery", "inventory", "customer service", "procurement", "distribution","manufacturing"),
+            )
+            # Call the function to count occurrences of keywords and sentiment distribution
+            keyword_counts = count_reviews_with_keywords(combined_df,supply_chain_areas[supply_chain_area])
+            # Convert keyword_counts to DataFrame
+            df_counts = pd.DataFrame(keyword_counts).transpose()
+            # Plot dual bar chart horizontally
+            st.bar_chart(df_counts[["Positive", "Negative"]], use_container_width=True, height=500)