Spaces:

JEPHONETORRE
/

KaggleSentimentAnalyzer

Sleeping

44f9ee7 11 months ago

4.35 kB

	import streamlit as st
	import pandas as pd
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch
	import matplotlib.pyplot as plt
	from datasets import Dataset
	import asyncio

	# Handle asyncio loop issues
	try:
	asyncio.get_running_loop()
	except RuntimeError: # No running event loop
	asyncio.run(asyncio.sleep(0))

	# Load pre-trained model and tokenizer
	MODEL_PATH = "distilbert-base-uncased-finetuned-sst-2-english" # Default Hugging Face sentiment model
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

	# Define a sentiment analysis function
	def sentiment_analysis(text):
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
	outputs = model(**inputs)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
	sentiment = torch.argmax(probabilities, dim=1).item()
	confidence = torch.max(probabilities, dim=1).values.item()
	return ("POSITIVE" if sentiment == 1 else "NEGATIVE", confidence)

	# Streamlit app
	st.title("Twitter Sentiment Analysis App")
	st.write("Analyze sentiments in Twitter-like text data using a pre-trained model.")

	# Tabs for navigation
	tab1, tab2 = st.tabs(["Analyze Sentiments", "Sample Dataset"])

	with tab1:
	st.header("Analyze Sentiments")
	st.write("Upload a dataset to analyze sentiments of text data.")

	# File uploader
	data_file = st.file_uploader("Upload your CSV file", type=["csv"])

	if data_file is not None:
	# Read the dataset
	data = pd.read_csv(data_file)

	# Display the dataset
	st.subheader("Dataset Preview")
	st.write(data.head())

	# Check for text column selection
	text_column = st.selectbox("Select the column containing text for analysis:", data.columns)

	if st.button("Analyze Sentiment"):
	# Clean the text column: Remove NaN values and ensure text input is string type
	data[text_column] = data[text_column].fillna("").astype(str)

	# Perform sentiment analysis
	st.write("Analyzing sentiments...")
	results = data[text_column].apply(lambda x: sentiment_analysis(x))
	data['Sentiment'] = results.apply(lambda x: x[0])
	data['Confidence'] = results.apply(lambda x: x[1])

	# Display results
	st.subheader("Analysis Results")
	st.write(data[[text_column, 'Sentiment', 'Confidence']])

	# Plot sentiment distribution
	st.subheader("Sentiment Distribution")
	sentiment_counts = data['Sentiment'].value_counts()
	fig, ax = plt.subplots()
	sentiment_counts.plot(kind='bar', ax=ax, color=['green', 'blue', 'red'])
	ax.set_title("Sentiment Distribution")
	ax.set_xlabel("Sentiment")
	ax.set_ylabel("Count")
	st.pyplot(fig)

	# Option to download results
	st.subheader("Download Results")
	csv = data.to_csv(index=False)
	st.download_button(
	label="Download Sentiment Analysis Results",
	data=csv,
	file_name="sentiment_analysis_results.csv",
	mime="text/csv",
	)
	else:
	st.write("Please upload a dataset to proceed.")

	with tab2:
	st.header("Sample Dataset")
	st.write("Download a sample dataset to try out the app.")

	# Provide a sample dataset for download
	sample_data = pd.DataFrame({
	"Tweet": [
	"I love this product! It's amazing.",
	"This is the worst service I have ever received.",
	"I'm not sure how I feel about this.",
	"Absolutely fantastic experience!",
	"Terrible. Would not recommend."
	]
	})
	st.write(sample_data)

	sample_csv = sample_data.to_csv(index=False)
	st.download_button(
	label="Download Sample Dataset",
	data=sample_csv,
	file_name="sample_twitter_dataset.csv",
	mime="text/csv",
	)

	st.write("Follow these steps:")
	st.markdown("""
	1. Go to the Analyze Sentiments tab.
	2. Upload the sample dataset or your own dataset in CSV format.
	3. Select the column containing the text to analyze.
	4. Click Analyze Sentiment to view results and download them.
	""")