Spaces:

reysarms
/

twitter_sentiment_analysis

Build error

App Files Files Community

twitter_sentiment_analysis / app.py

reysarms

updated environment

d23d393 about 1 year ago

raw

history blame contribute delete

2.03 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import re
	from sklearn.model_selection import train_test_split
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.pipeline import make_pipeline
	from sklearn.preprocessing import LabelEncoder

	# Load dataset from Kaggle dataset file
	def load_data():
	data = pd.read_csv("twitter_training.csv", header=None, encoding='utf-8')
	data.columns = ["id", "entity", "sentiment", "tweet"] # Rename columns
	data = data[["tweet", "sentiment"]] # Keep only relevant columns
	data.dropna(inplace=True)
	return data

	data = load_data()

	# Preprocess text
	def clean_text(text):
	text = re.sub(r"http\S+\|www\S+", "", text) # Remove URLs
	text = re.sub(r"[^a-zA-Z ]", "", text) # Keep only letters and spaces
	return text.lower().strip()

	data['clean_text'] = data['tweet'].apply(clean_text)

	# Encode labels
	label_encoder = LabelEncoder()
	data['sentiment_encoded'] = label_encoder.fit_transform(data['sentiment'])

	# Train Random Forest model
	def train_model():
	X_train, X_test, y_train, y_test = train_test_split(
	data['clean_text'], data['sentiment_encoded'], test_size=0.2, random_state=42)

	pipeline = make_pipeline(TfidfVectorizer(), RandomForestClassifier(n_estimators=100, random_state=42))
	pipeline.fit(X_train, y_train)
	return pipeline

	model = train_model()

	# Streamlit UI
	st.title("📢 Twitter Sentiment Analysis with Random Forest")
	st.write("Enter a tweet to analyze its sentiment!")

	# User input
	tweet_input = st.text_area("Enter Tweet:")

	if st.button("Analyze Sentiment"):
	cleaned_tweet = clean_text(tweet_input)
	prediction = model.predict([cleaned_tweet])[0]
	sentiment_result = label_encoder.inverse_transform([prediction])[0]
	st.success(f"Predicted Sentiment: {sentiment_result}")

	st.write("Dataset: [Twitter Entity Sentiment Analysis](https://www.kaggle.com/datasets/jp797498e/twitter-entity-sentiment-analysis/data)")