Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
| from textblob import TextBlob | |
| from transformers import pipeline | |
| import matplotlib.pyplot as plt | |
| import base64 | |
| import os | |
| from wordcloud import WordCloud | |
| import time | |
| # Function to perform sentiment analysis using Hugging Face model | |
| hf_sentiment_analyzer = pipeline( | |
| "sentiment-analysis", "Dmyadav2001/Sentimental-Analysis" | |
| ) | |
| def analyze_hf_sentiment(text): | |
| if len(text) > 512: | |
| temp = text[:511] | |
| text = temp | |
| result = hf_sentiment_analyzer(text) | |
| label = result[0]["label"] | |
| if label == "LABEL_1": | |
| return "Positive" | |
| elif label == "LABEL_0": | |
| return "Negative" | |
| elif label == "LABEL_2": | |
| return "Neutral" | |
| # Function to perform sentiment analysis using VADER | |
| def analyze_vader_sentiment(text): | |
| analyzer = SentimentIntensityAnalyzer() | |
| vader_score = analyzer.polarity_scores(text)["compound"] | |
| if vader_score > 0: | |
| return "Positive" | |
| elif vader_score == 0: | |
| return "Neutral" | |
| else: | |
| return "Negative" | |
| # Function to perform sentiment analysis using TextBlob | |
| def analyze_textblob_sentiment(text): | |
| analysis = TextBlob(text) | |
| sentiment_score = analysis.sentiment.polarity | |
| if sentiment_score > 0: | |
| return "Positive" | |
| elif sentiment_score == 0: | |
| return "Neutral" | |
| else: | |
| return "Negative" | |
| # Function to display DataFrame with updated sentiment column | |
| def display_dataframe(df): | |
| st.write(df) | |
| # Function to display pie chart for sentiment distribution | |
| def display_pie_chart(df, column): | |
| sentiment_counts = df[column].value_counts() | |
| fig, ax = plt.subplots() | |
| ax.pie( | |
| sentiment_counts, | |
| labels=sentiment_counts.index, | |
| autopct="%1.1f%%", | |
| startangle=140, | |
| ) | |
| ax.axis("equal") | |
| st.pyplot(fig) | |
| # Add a download button | |
| if st.button('Download Pie Chart'): | |
| # Save the pie chart as an image file | |
| plt.savefig('pie_chart.png') | |
| # Offer the image file for download | |
| st.download_button(label='Download Pie Chart Image', data=open('pie_chart.png', 'rb').read(), file_name='pie_chart.png', mime='image/png') | |
| # Function to display word cloud | |
| def display_wordcloud(text_data): | |
| wordcloud = WordCloud(width=800, height=400, background_color="white").generate( | |
| text_data | |
| ) | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| ax.imshow(wordcloud, interpolation="bilinear") | |
| ax.axis("off") | |
| st.pyplot(fig) | |
| # Add a download button | |
| if st.button('Download Word Cloud'): | |
| # Save the word cloud as an image file | |
| plt.savefig('word_cloud.png') | |
| # Offer the image file for download | |
| st.download_button(label='Download Word Cloud Image', data=open('word_cloud.png', 'rb').read(), file_name='word_cloud.png', mime='image/png') | |
| # Function to download CSV file | |
| def download_csv(df): | |
| csv = df.to_csv(index=False) | |
| b64 = base64.b64encode(csv.encode()).decode() # B64 encoding | |
| href = f'<a href="data:file/csv;base64,{b64}" download="sentiment_analysis_results.csv">Download CSV File</a>' | |
| st.markdown(href, unsafe_allow_html=True) | |
| # Function to count occurrences of keywords and sentiment distribution | |
| def count_reviews_with_keywords(df,keywords): | |
| # keywords=['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking'] | |
| keyword_counts = {keyword: {"Positive": 0, "Negative": 0, "Total": 0} for keyword in keywords} | |
| for _, row in df.iterrows(): | |
| review_text = row["review_text"] | |
| sentiment = row["Sentiment"] | |
| for keyword in keywords: | |
| if keyword.lower() in review_text.lower(): | |
| keyword_counts[keyword]["Total"] += 1 | |
| if sentiment == "Positive": | |
| keyword_counts[keyword]["Positive"] += 1 | |
| elif sentiment == "Negative": | |
| keyword_counts[keyword]["Negative"] += 1 | |
| return keyword_counts | |
| # Streamlit UI | |
| st.set_page_config(page_title="SentimentAnalysis App", page_icon=":smiley:") | |
| # st.title("SentimentAnalysis App") | |
| text = "SentimentAnalysis App" | |
| t = st.empty() | |
| for i in range(len(text) + 1): | |
| t.markdown("## %s" % text[0:i]) | |
| time.sleep(0.1) | |
| # Sidebar | |
| st.sidebar.title("Options") | |
| input_option = st.sidebar.radio("Select Input Option", ("Free Text", "CSV Files")) | |
| selected_model = st.sidebar.radio( | |
| "Select Sentiment Analysis Model", ("VADER", "TextBlob", "Hugging Face") | |
| ) | |
| result_option = st.sidebar.radio( | |
| "Select Result Display Option", | |
| ("DataFrame", "Pie Chart", "Bar Chart", "Keyword Frequency", "WordCloud", "Comparative Sentiment Analysis"), | |
| ) | |
| # Main content | |
| progress_label = st.empty() # Define progress label | |
| progress_bar = st.progress(0) | |
| progress = 0 | |
| # Directory path to store processed files | |
| processed_directory = "processed_files" | |
| # Ensure the directory exists, if not create it | |
| os.makedirs(processed_directory, exist_ok=True) | |
| # List to store processed filenames | |
| processed_files = [] | |
| # Function to get filenames from the processed directory | |
| def get_processed_filenames(): | |
| return [ | |
| f | |
| for f in os.listdir(processed_directory) | |
| if os.path.isfile(os.path.join(processed_directory, f)) | |
| ] | |
| if input_option == "Free Text": | |
| st.subheader("Enter review for sentiment analysis:") | |
| user_input = st.text_area("", "") | |
| if not user_input: | |
| st.info("Enter some text above for sentiment analysis.") | |
| else: | |
| with st.spinner("Analyzing..."): | |
| if selected_model == "Hugging Face": | |
| result = analyze_hf_sentiment(user_input) | |
| elif selected_model == "VADER": | |
| result = analyze_vader_sentiment(user_input) | |
| elif selected_model == "TextBlob": | |
| result = analyze_textblob_sentiment(user_input) | |
| st.write("Sentiment:", result) | |
| if input_option == "CSV Files": | |
| st.subheader("Select CSV files for sentiment analysis:") | |
| # Uploading new file | |
| files = st.file_uploader( | |
| "Upload New File", type=["csv"], accept_multiple_files=True | |
| ) | |
| if files: | |
| # Process uploaded new files | |
| for file in files: | |
| if file.type != "text/csv": | |
| st.warning( | |
| "Uploaded file is not a CSV file. Please upload a CSV file only." | |
| ) | |
| else: | |
| df = pd.read_csv(file) | |
| if "review_text" not in df.columns: | |
| st.warning( | |
| "Uploaded CSV file doesn't contain 'review_text' column. Please check the CSV file format." | |
| ) | |
| else: | |
| total_rows = len(df) | |
| sentiments_v = [] | |
| sentiments_tb = [] | |
| sentiments_hf = [] | |
| for review_text in df["review_text"]: | |
| sentiments_v.append(analyze_vader_sentiment(review_text)) | |
| sentiments_tb.append(analyze_textblob_sentiment(review_text)) | |
| sentiments_hf.append(analyze_hf_sentiment(review_text)) | |
| progress += 1 | |
| progress_label.text(f"{progress}/{total_rows}") | |
| progress_bar.progress(min(progress / total_rows, 1.0)) | |
| df["VADER Sentiment"] = sentiments_v | |
| df["TextBlob Sentiment"] = sentiments_tb | |
| df["HuggingFace Sentiment"] = sentiments_hf | |
| # Save processed file with modified filename | |
| new_filename = os.path.splitext(file.name)[0] + "1.csv" | |
| df.to_csv( | |
| os.path.join(processed_directory, new_filename), index=False | |
| ) | |
| st.success(f"New file processed and saved as {new_filename}") | |
| # List of already processed files | |
| processed_files = get_processed_filenames() | |
| selected_files = st.multiselect("Select from Processed Files", processed_files) | |
| if not files and not selected_files: | |
| st.info( | |
| "Upload a new CSV file or select from processed files above for sentiment analysis." | |
| ) | |
| all_dfs = [] | |
| # Process already selected files | |
| for file_name in selected_files: | |
| df = pd.read_csv(os.path.join(processed_directory, file_name)) | |
| all_dfs.append(df) | |
| # Results | |
| if all_dfs: | |
| combined_df = pd.concat(all_dfs, ignore_index=True) | |
| if selected_model == "TextBlob": | |
| result = "TextBlob Sentiment" | |
| combined_df.drop( | |
| columns=["VADER Sentiment", "HuggingFace Sentiment"], | |
| inplace=True, | |
| ) | |
| elif selected_model == "VADER": | |
| result = "VADER Sentiment" | |
| combined_df.drop( | |
| columns=["TextBlob Sentiment", "HuggingFace Sentiment"], | |
| inplace=True, | |
| ) | |
| elif selected_model == "Hugging Face": | |
| result = "HuggingFace Sentiment" | |
| combined_df.drop( | |
| columns=["TextBlob Sentiment", "VADER Sentiment"], | |
| inplace=True, | |
| ) | |
| combined_df.rename(columns={result: "Sentiment"}, inplace=True) | |
| if result_option == "DataFrame": | |
| st.subheader("Sentiment Analysis Results") | |
| display_dataframe(combined_df) | |
| download_csv(combined_df) | |
| elif result_option == "Pie Chart": | |
| st.subheader("Sentiment Distribution") | |
| display_pie_chart(combined_df, "Sentiment") | |
| elif result_option == "Bar Chart": | |
| # Calculate value counts | |
| sentiment_counts = combined_df["Sentiment"].value_counts() | |
| # Display bar chart | |
| st.bar_chart(sentiment_counts) | |
| # Add a download button | |
| if st.button('Download Sentiment Counts Chart'): | |
| # Plot the bar chart | |
| fig, ax = plt.subplots() | |
| sentiment_counts.plot(kind='bar', ax=ax) | |
| plt.xlabel('Sentiment') | |
| plt.ylabel('Count') | |
| plt.title('Sentiment Counts') | |
| plt.xticks(rotation=45, ha='right') | |
| plt.tight_layout() | |
| # Save the bar chart as an image file | |
| plt.savefig('sentiment_counts_chart.png') | |
| # Offer the image file for download | |
| st.download_button(label='Download Sentiment Counts Chart Image', data=open('sentiment_counts_chart.png', 'rb').read(), file_name='sentiment_counts_chart.png', mime='image/png') | |
| elif result_option == "Keyword Frequency": | |
| st.subheader("Keyword Frequency") | |
| # List of keywords | |
| keywords = [ | |
| "delivery", | |
| "shipping", | |
| "parcel", | |
| "package", | |
| "tracking", | |
| "shipment", | |
| "cargo", | |
| "freight", | |
| "automation", | |
| "automated", | |
| "robotic", | |
| "robots", | |
| "AI", | |
| "artificial intelligence", | |
| "machine learning", | |
| "chatbot", | |
| "virtual assistant", | |
| "customer support", | |
| "real-time", | |
| "instant", | |
| "live update", | |
| "status", | |
| "IoT", | |
| "internet of things", | |
| "connected devices", | |
| "smart technology", | |
| "blockchain", | |
| "ledger", | |
| "transparency", | |
| "security", | |
| "sustainability", | |
| "eco-friendly", | |
| "green logistics", | |
| "carbon footprint", | |
| "customer service", | |
| "support", | |
| "experience", | |
| "satisfaction", | |
| "data analytics", | |
| "big data", | |
| "analysis", | |
| "insights", | |
| "cloud computing", | |
| "cloud-based", | |
| "digital infrastructure", | |
| "storage", | |
| "5G", | |
| "connectivity", | |
| "network speed", | |
| "wireless", | |
| "drone", | |
| "aerial delivery", | |
| "UAV", | |
| "drone shipping", | |
| "augmented reality", | |
| "AR", | |
| "virtual reality", | |
| "VR", | |
| "3D printing", | |
| "additive manufacturing", | |
| "custom parts", | |
| "prototyping", | |
| "inventory management", | |
| "stock levels", | |
| "warehouse management", | |
| "storage solutions", | |
| "supply chain", | |
| "logistics", | |
| "supply network", | |
| "distribution", | |
| "eco-packaging", | |
| "sustainable materials", | |
| "recycling", | |
| "waste reduction", | |
| "digital platform", | |
| "e-commerce", | |
| "online shopping", | |
| "online order", | |
| "cybersecurity", | |
| "data protection", | |
| "privacy", | |
| "encryption", | |
| "predictive modeling", | |
| "forecasting", | |
| "demand planning", | |
| "trend analysis", | |
| "robotics", | |
| "automated vehicles", | |
| "self-driving cars", | |
| "logistics automation", | |
| "visibility", | |
| "supply chain visibility", | |
| "track and trace", | |
| "monitoring", | |
| "integration", | |
| "ERP", | |
| "supply chain integration", | |
| "software", | |
| "optimization", | |
| "efficiency", | |
| "process improvement", | |
| "lean logistics", | |
| "personalization", | |
| "customization", | |
| "tailored services", | |
| "personal touch", | |
| "ethical sourcing", | |
| "fair trade", | |
| "labor rights", | |
| "ethical business", | |
| "user experience", | |
| "UX", | |
| "customer journey", | |
| "service design", | |
| "visibility", | |
| ] | |
| text_data = " ".join(combined_df["review_text"]) | |
| keyword_frequency = ( | |
| pd.Series(text_data.split()).value_counts().reset_index() | |
| ) | |
| keyword_frequency.columns = ["Keyword", "Frequency"] | |
| # Filter keyword frequency for specific keywords | |
| filtered_keyword_frequency = keyword_frequency[ | |
| keyword_frequency["Keyword"].isin(keywords) | |
| ] | |
| # Display bar chart for filtered keyword frequency | |
| st.bar_chart(filtered_keyword_frequency.set_index("Keyword")) | |
| # Add a download button | |
| if st.button('Download Keyword Frequency Chart'): | |
| # Plot the bar chart | |
| fig, ax = plt.subplots() | |
| filtered_keyword_frequency.plot(kind='bar', x='Keyword', y='Frequency', ax=ax) | |
| plt.xticks(rotation=45, ha='right') | |
| plt.tight_layout() | |
| # Save the bar chart as an image file | |
| plt.savefig('keyword_frequency_chart.png') | |
| # Offer the image file for download | |
| st.download_button(label='Download Keyword Frequency Chart Image', data=open('keyword_frequency_chart.png', 'rb').read(), file_name='keyword_frequency_chart.png', mime='image/png') | |
| elif result_option == "Word Cloud": | |
| st.subheader("Word Cloud") | |
| text_data = " ".join(combined_df["review_text"]) | |
| display_wordcloud(text_data) | |
| else: | |
| st.subheader("Comparative Sentiment Analysis") | |
| supply_chain_areas = { | |
| 'logistics': ['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking'], | |
| 'delivery': ['delivery', 'shipping', 'courier', 'postal', 'parcel'], | |
| 'inventory': ['inventory', 'stock', 'storage', 'warehouse', 'security’'], | |
| 'customer service': ['customer service', 'support', 'helpdesk', 'service center', 'experience', 'refund'], | |
| 'procurement': ['procurement', 'sourcing', 'purchasing', 'buying', 'order'], | |
| 'distribution': ['distribution', 'supply network', 'distribution center'], | |
| 'manufacturing': ['manufacturing', 'production', 'assembly', 'quality', 'defect'] | |
| } | |
| supply_chain_area = st.sidebar.radio( | |
| "Select Supply Chain Area", | |
| ("logistics", "delivery", "inventory", "customer service", "procurement", "distribution","manufacturing"), | |
| ) | |
| # Call the function to count occurrences of keywords and sentiment distribution | |
| keyword_counts = count_reviews_with_keywords(combined_df,supply_chain_areas[supply_chain_area]) | |
| # Convert keyword_counts to DataFrame | |
| df_counts = pd.DataFrame(keyword_counts).transpose() | |
| # Plot dual bar chart horizontally | |
| st.bar_chart(df_counts[["Positive", "Negative"]], use_container_width=True, height=500) |