File size: 4,354 Bytes
44f9ee7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import streamlit as st
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import matplotlib.pyplot as plt
from datasets import Dataset
import asyncio
# Handle asyncio loop issues
try:
asyncio.get_running_loop()
except RuntimeError: # No running event loop
asyncio.run(asyncio.sleep(0))
# Load pre-trained model and tokenizer
MODEL_PATH = "distilbert-base-uncased-finetuned-sst-2-english" # Default Hugging Face sentiment model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
# Define a sentiment analysis function
def sentiment_analysis(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
sentiment = torch.argmax(probabilities, dim=1).item()
confidence = torch.max(probabilities, dim=1).values.item()
return ("POSITIVE" if sentiment == 1 else "NEGATIVE", confidence)
# Streamlit app
st.title("Twitter Sentiment Analysis App")
st.write("Analyze sentiments in Twitter-like text data using a pre-trained model.")
# Tabs for navigation
tab1, tab2 = st.tabs(["Analyze Sentiments", "Sample Dataset"])
with tab1:
st.header("Analyze Sentiments")
st.write("Upload a dataset to analyze sentiments of text data.")
# File uploader
data_file = st.file_uploader("Upload your CSV file", type=["csv"])
if data_file is not None:
# Read the dataset
data = pd.read_csv(data_file)
# Display the dataset
st.subheader("Dataset Preview")
st.write(data.head())
# Check for text column selection
text_column = st.selectbox("Select the column containing text for analysis:", data.columns)
if st.button("Analyze Sentiment"):
# Clean the text column: Remove NaN values and ensure text input is string type
data[text_column] = data[text_column].fillna("").astype(str)
# Perform sentiment analysis
st.write("Analyzing sentiments...")
results = data[text_column].apply(lambda x: sentiment_analysis(x))
data['Sentiment'] = results.apply(lambda x: x[0])
data['Confidence'] = results.apply(lambda x: x[1])
# Display results
st.subheader("Analysis Results")
st.write(data[[text_column, 'Sentiment', 'Confidence']])
# Plot sentiment distribution
st.subheader("Sentiment Distribution")
sentiment_counts = data['Sentiment'].value_counts()
fig, ax = plt.subplots()
sentiment_counts.plot(kind='bar', ax=ax, color=['green', 'blue', 'red'])
ax.set_title("Sentiment Distribution")
ax.set_xlabel("Sentiment")
ax.set_ylabel("Count")
st.pyplot(fig)
# Option to download results
st.subheader("Download Results")
csv = data.to_csv(index=False)
st.download_button(
label="Download Sentiment Analysis Results",
data=csv,
file_name="sentiment_analysis_results.csv",
mime="text/csv",
)
else:
st.write("Please upload a dataset to proceed.")
with tab2:
st.header("Sample Dataset")
st.write("Download a sample dataset to try out the app.")
# Provide a sample dataset for download
sample_data = pd.DataFrame({
"Tweet": [
"I love this product! It's amazing.",
"This is the worst service I have ever received.",
"I'm not sure how I feel about this.",
"Absolutely fantastic experience!",
"Terrible. Would not recommend."
]
})
st.write(sample_data)
sample_csv = sample_data.to_csv(index=False)
st.download_button(
label="Download Sample Dataset",
data=sample_csv,
file_name="sample_twitter_dataset.csv",
mime="text/csv",
)
st.write("Follow these steps:")
st.markdown("""
1. Go to the **Analyze Sentiments** tab.
2. Upload the sample dataset or your own dataset in CSV format.
3. Select the column containing the text to analyze.
4. Click **Analyze Sentiment** to view results and download them.
""")
|