import streamlit as st import pandas as pd from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch import matplotlib.pyplot as plt from datasets import Dataset import asyncio # Handle asyncio loop issues try: asyncio.get_running_loop() except RuntimeError: # No running event loop asyncio.run(asyncio.sleep(0)) # Load pre-trained model and tokenizer MODEL_PATH = "distilbert-base-uncased-finetuned-sst-2-english" # Default Hugging Face sentiment model model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH) tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) # Define a sentiment analysis function def sentiment_analysis(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) outputs = model(**inputs) probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) sentiment = torch.argmax(probabilities, dim=1).item() confidence = torch.max(probabilities, dim=1).values.item() return ("POSITIVE" if sentiment == 1 else "NEGATIVE", confidence) # Streamlit app st.title("Twitter Sentiment Analysis App") st.write("Analyze sentiments in Twitter-like text data using a pre-trained model.") # Tabs for navigation tab1, tab2 = st.tabs(["Analyze Sentiments", "Sample Dataset"]) with tab1: st.header("Analyze Sentiments") st.write("Upload a dataset to analyze sentiments of text data.") # File uploader data_file = st.file_uploader("Upload your CSV file", type=["csv"]) if data_file is not None: # Read the dataset data = pd.read_csv(data_file) # Display the dataset st.subheader("Dataset Preview") st.write(data.head()) # Check for text column selection text_column = st.selectbox("Select the column containing text for analysis:", data.columns) if st.button("Analyze Sentiment"): # Clean the text column: Remove NaN values and ensure text input is string type data[text_column] = data[text_column].fillna("").astype(str) # Perform sentiment analysis st.write("Analyzing sentiments...") results = data[text_column].apply(lambda x: sentiment_analysis(x)) data['Sentiment'] = results.apply(lambda x: x[0]) data['Confidence'] = results.apply(lambda x: x[1]) # Display results st.subheader("Analysis Results") st.write(data[[text_column, 'Sentiment', 'Confidence']]) # Plot sentiment distribution st.subheader("Sentiment Distribution") sentiment_counts = data['Sentiment'].value_counts() fig, ax = plt.subplots() sentiment_counts.plot(kind='bar', ax=ax, color=['green', 'blue', 'red']) ax.set_title("Sentiment Distribution") ax.set_xlabel("Sentiment") ax.set_ylabel("Count") st.pyplot(fig) # Option to download results st.subheader("Download Results") csv = data.to_csv(index=False) st.download_button( label="Download Sentiment Analysis Results", data=csv, file_name="sentiment_analysis_results.csv", mime="text/csv", ) else: st.write("Please upload a dataset to proceed.") with tab2: st.header("Sample Dataset") st.write("Download a sample dataset to try out the app.") # Provide a sample dataset for download sample_data = pd.DataFrame({ "Tweet": [ "I love this product! It's amazing.", "This is the worst service I have ever received.", "I'm not sure how I feel about this.", "Absolutely fantastic experience!", "Terrible. Would not recommend." ] }) st.write(sample_data) sample_csv = sample_data.to_csv(index=False) st.download_button( label="Download Sample Dataset", data=sample_csv, file_name="sample_twitter_dataset.csv", mime="text/csv", ) st.write("Follow these steps:") st.markdown(""" 1. Go to the **Analyze Sentiments** tab. 2. Upload the sample dataset or your own dataset in CSV format. 3. Select the column containing the text to analyze. 4. Click **Analyze Sentiment** to view results and download them. """)