Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.preprocessing import LabelEncoder | |
| # Load dataset from Kaggle dataset file | |
| def load_data(): | |
| data = pd.read_csv("twitter_training.csv", header=None, encoding='utf-8') | |
| data.columns = ["id", "entity", "sentiment", "tweet"] # Rename columns | |
| data = data[["tweet", "sentiment"]] # Keep only relevant columns | |
| data.dropna(inplace=True) | |
| return data | |
| data = load_data() | |
| # Preprocess text | |
| def clean_text(text): | |
| text = re.sub(r"http\S+|www\S+", "", text) # Remove URLs | |
| text = re.sub(r"[^a-zA-Z ]", "", text) # Keep only letters and spaces | |
| return text.lower().strip() | |
| data['clean_text'] = data['tweet'].apply(clean_text) | |
| # Encode labels | |
| label_encoder = LabelEncoder() | |
| data['sentiment_encoded'] = label_encoder.fit_transform(data['sentiment']) | |
| # Train Random Forest model | |
| def train_model(): | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| data['clean_text'], data['sentiment_encoded'], test_size=0.2, random_state=42) | |
| pipeline = make_pipeline(TfidfVectorizer(), RandomForestClassifier(n_estimators=100, random_state=42)) | |
| pipeline.fit(X_train, y_train) | |
| return pipeline | |
| model = train_model() | |
| # Streamlit UI | |
| st.title("📢 Twitter Sentiment Analysis with Random Forest") | |
| st.write("Enter a tweet to analyze its sentiment!") | |
| # User input | |
| tweet_input = st.text_area("Enter Tweet:") | |
| if st.button("Analyze Sentiment"): | |
| cleaned_tweet = clean_text(tweet_input) | |
| prediction = model.predict([cleaned_tweet])[0] | |
| sentiment_result = label_encoder.inverse_transform([prediction])[0] | |
| st.success(f"Predicted Sentiment: {sentiment_result}") | |
| st.write("Dataset: [Twitter Entity Sentiment Analysis](https://www.kaggle.com/datasets/jp797498e/twitter-entity-sentiment-analysis/data)") | |