Spaces:

Pushp123
/

Twitter_Data_Sentimental_Analysis

Sleeping

File size: 7,667 Bytes


import gradio as gr
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
nltk.download('stopwords', quiet=True)
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn import metrics
from sklearn.multiclass import OneVsRestClassifier
from textblob import TextBlob
from wordcloud import WordCloud

twitter = pd.read_csv("Twitter_Data.csv")


twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})



twitter.dropna(subset=['clean_text','category'] , inplace=True)



text = ''

for tweet in twitter[twitter['category'] == "positive"]['clean_text']:
  text += f" {tweet}"

wordcloud = WordCloud(
width = 3000, height = 2000, background_color = 'black',
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
'''
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')

plt.imshow(wordcloud, interpolation= 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.show()
'''
del text

text = ''

for tweet in twitter[twitter['category'] == "neutral"]['clean_text']:
  text += f" {tweet}"

wordcloud = WordCloud(
width = 3000, height = 2000, background_color = 'black',
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
'''
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')

plt.imshow(wordcloud, interpolation= 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.show()
'''
del text

text = ''

for tweet in twitter[twitter['category'] == "negative"]['clean_text']:
  text += f" {tweet}"

wordcloud = WordCloud(
width = 3000, height = 2000, background_color = 'black',
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
'''
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')

plt.imshow(wordcloud, interpolation= 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.show()
'''
del text

#print(twitter['category'].value_counts())

dist = twitter['category'].value_counts()
def distribution_plot(x, y, name):
  plt.figure(figsize=(10, 6))
  sns.barplot(x=x, y=y)
  plt.title(name)
  plt.show()

distribution_plot(x=dist.index, y=dist.values, name="Class Distribution Train")

pol = lambda x: TextBlob(x).sentiment.polarity
sub = lambda x: TextBlob(x).sentiment.subjectivity

twitter['polarity'] = twitter['clean_text'].apply(pol)
twitter['subjectivity'] = twitter['clean_text'].apply(sub)
twitter
'''
# Plot Polarity

plt.figure(figsize=(10,6))
plt.hist(twitter['polarity'], bins=20, color='skyblue', edgecolor='black')
plt.title("Distribution of Polarity")
plt.xlabel("Polarity")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

# Plot Subjectivity

plt.figure(figsize=(10,6))
plt.hist(twitter['subjectivity'], bins=20, color='lightgreen', edgecolor='black')
plt.title("Distribution of Subjectivity")
plt.xlabel("Subjectivity")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()
'''
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)

X = vectorizer.fit_transform(twitter['clean_text'])

y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)


lr = LogisticRegression(max_iter=1000)
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
'''
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

from sklearn.svm import SVC

classifier = SVC(kernel='linear', random_state=42)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
'''
from sklearn.ensemble import AdaBoostClassifier
from sklearn .tree import DecisionTreeClassifier

classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

AdaBoostClassifier

# Importing necessary libraries

import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn .tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc


vectorizer =  TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(twitter['clean_text'])

# Encode target label (category) into numeric values
y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Predict probabilities on the test set
y_probs = classifier.predict_proba(X_test)
'''
# Calculate ROC curve and AUC for each class
fpr = {}
tpr = {}
roc_auc = {}
num_classes =3  # Number of classes (negative , neutral, positive)


for i in range(num_classes):
  fpr[i], tpr[i], _ =roc_curve(y_test == i, y_probs[:,i])


# Plot ROC curves
plt.figure()
for i in range (num_classes):
  plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC = {roc_auc[i]:.2f})")

plt.plot([0,1], [0,1], 'k--')  # Diagonal line
plt.xlim([0.0,1.0])
plt.ylim([0.0,1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curves for Multi-Class Classification")
plt.legend(loc='lower right')
plt.show()


# Evaluate the classifier
print("Accuracy:", accuracy_score(y_test, classifier.predict(X_test)))
print("F1 Score:", f1_score(y_test, classifier.predict(X_test), average = 'weighted'))
print("Classification Report:\n", classification_report(y_test, classifier.predict(X_test)))
print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test)))



'''





# Function to make predictions
def predict_sentiment(text):
    if not text.strip():
        return "Please enter some text."

    text_vector = vectorizer.transform([text])
    pred = classifier.predict(text_vector)[0]
    sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
    return sentiment_map[pred]

# Create Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## Twitter Sentiment Analyzer")
    gr.Markdown("Enter a tweet and get its predicted sentiment:")

    with gr.Row():
        input_text = gr.Textbox(lines=3, placeholder="Type your tweet here...", label="Tweet")

    output = gr.Textbox(label="Predicted Sentiment")

    analyze_btn = gr.Button("Analyze Sentiment")
    analyze_btn.click(fn=predict_sentiment, inputs=input_text, outputs=output)

demo.launch()