|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
import torch |
|
|
import matplotlib.pyplot as plt |
|
|
from datasets import Dataset |
|
|
import asyncio |
|
|
|
|
|
|
|
|
try: |
|
|
asyncio.get_running_loop() |
|
|
except RuntimeError: |
|
|
asyncio.run(asyncio.sleep(0)) |
|
|
|
|
|
|
|
|
MODEL_PATH = "distilbert-base-uncased-finetuned-sst-2-english" |
|
|
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH) |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) |
|
|
|
|
|
|
|
|
def sentiment_analysis(text): |
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
|
outputs = model(**inputs) |
|
|
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
|
sentiment = torch.argmax(probabilities, dim=1).item() |
|
|
confidence = torch.max(probabilities, dim=1).values.item() |
|
|
return ("POSITIVE" if sentiment == 1 else "NEGATIVE", confidence) |
|
|
|
|
|
|
|
|
st.title("Twitter Sentiment Analysis App") |
|
|
st.write("Analyze sentiments in Twitter-like text data using a pre-trained model.") |
|
|
|
|
|
|
|
|
tab1, tab2 = st.tabs(["Analyze Sentiments", "Sample Dataset"]) |
|
|
|
|
|
with tab1: |
|
|
st.header("Analyze Sentiments") |
|
|
st.write("Upload a dataset to analyze sentiments of text data.") |
|
|
|
|
|
|
|
|
data_file = st.file_uploader("Upload your CSV file", type=["csv"]) |
|
|
|
|
|
if data_file is not None: |
|
|
|
|
|
data = pd.read_csv(data_file) |
|
|
|
|
|
|
|
|
st.subheader("Dataset Preview") |
|
|
st.write(data.head()) |
|
|
|
|
|
|
|
|
text_column = st.selectbox("Select the column containing text for analysis:", data.columns) |
|
|
|
|
|
if st.button("Analyze Sentiment"): |
|
|
|
|
|
data[text_column] = data[text_column].fillna("").astype(str) |
|
|
|
|
|
|
|
|
st.write("Analyzing sentiments...") |
|
|
results = data[text_column].apply(lambda x: sentiment_analysis(x)) |
|
|
data['Sentiment'] = results.apply(lambda x: x[0]) |
|
|
data['Confidence'] = results.apply(lambda x: x[1]) |
|
|
|
|
|
|
|
|
st.subheader("Analysis Results") |
|
|
st.write(data[[text_column, 'Sentiment', 'Confidence']]) |
|
|
|
|
|
|
|
|
st.subheader("Sentiment Distribution") |
|
|
sentiment_counts = data['Sentiment'].value_counts() |
|
|
fig, ax = plt.subplots() |
|
|
sentiment_counts.plot(kind='bar', ax=ax, color=['green', 'blue', 'red']) |
|
|
ax.set_title("Sentiment Distribution") |
|
|
ax.set_xlabel("Sentiment") |
|
|
ax.set_ylabel("Count") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
st.subheader("Download Results") |
|
|
csv = data.to_csv(index=False) |
|
|
st.download_button( |
|
|
label="Download Sentiment Analysis Results", |
|
|
data=csv, |
|
|
file_name="sentiment_analysis_results.csv", |
|
|
mime="text/csv", |
|
|
) |
|
|
else: |
|
|
st.write("Please upload a dataset to proceed.") |
|
|
|
|
|
with tab2: |
|
|
st.header("Sample Dataset") |
|
|
st.write("Download a sample dataset to try out the app.") |
|
|
|
|
|
|
|
|
sample_data = pd.DataFrame({ |
|
|
"Tweet": [ |
|
|
"I love this product! It's amazing.", |
|
|
"This is the worst service I have ever received.", |
|
|
"I'm not sure how I feel about this.", |
|
|
"Absolutely fantastic experience!", |
|
|
"Terrible. Would not recommend." |
|
|
] |
|
|
}) |
|
|
st.write(sample_data) |
|
|
|
|
|
sample_csv = sample_data.to_csv(index=False) |
|
|
st.download_button( |
|
|
label="Download Sample Dataset", |
|
|
data=sample_csv, |
|
|
file_name="sample_twitter_dataset.csv", |
|
|
mime="text/csv", |
|
|
) |
|
|
|
|
|
st.write("Follow these steps:") |
|
|
st.markdown(""" |
|
|
1. Go to the **Analyze Sentiments** tab. |
|
|
2. Upload the sample dataset or your own dataset in CSV format. |
|
|
3. Select the column containing the text to analyze. |
|
|
4. Click **Analyze Sentiment** to view results and download them. |
|
|
""") |
|
|
|