|
|
|
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import seaborn as sns |
|
|
import matplotlib.pyplot as plt |
|
|
import nltk |
|
|
nltk.download('stopwords', quiet=True) |
|
|
from nltk.corpus import stopwords |
|
|
from nltk.stem.porter import PorterStemmer |
|
|
from sklearn import metrics |
|
|
from sklearn.multiclass import OneVsRestClassifier |
|
|
from textblob import TextBlob |
|
|
from wordcloud import WordCloud |
|
|
|
|
|
twitter = pd.read_csv("Twitter_Data.csv") |
|
|
|
|
|
|
|
|
twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'}) |
|
|
|
|
|
|
|
|
|
|
|
twitter.dropna(subset=['clean_text','category'] , inplace=True) |
|
|
|
|
|
|
|
|
|
|
|
text = '' |
|
|
|
|
|
for tweet in twitter[twitter['category'] == "positive"]['clean_text']: |
|
|
text += f" {tweet}" |
|
|
|
|
|
wordcloud = WordCloud( |
|
|
width = 3000, height = 2000, background_color = 'black', |
|
|
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text) |
|
|
''' |
|
|
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k') |
|
|
|
|
|
plt.imshow(wordcloud, interpolation= 'bilinear') |
|
|
plt.axis('off') |
|
|
plt.tight_layout(pad=0) |
|
|
plt.show() |
|
|
''' |
|
|
del text |
|
|
|
|
|
text = '' |
|
|
|
|
|
for tweet in twitter[twitter['category'] == "neutral"]['clean_text']: |
|
|
text += f" {tweet}" |
|
|
|
|
|
wordcloud = WordCloud( |
|
|
width = 3000, height = 2000, background_color = 'black', |
|
|
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text) |
|
|
''' |
|
|
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k') |
|
|
|
|
|
plt.imshow(wordcloud, interpolation= 'bilinear') |
|
|
plt.axis('off') |
|
|
plt.tight_layout(pad=0) |
|
|
plt.show() |
|
|
''' |
|
|
del text |
|
|
|
|
|
text = '' |
|
|
|
|
|
for tweet in twitter[twitter['category'] == "negative"]['clean_text']: |
|
|
text += f" {tweet}" |
|
|
|
|
|
wordcloud = WordCloud( |
|
|
width = 3000, height = 2000, background_color = 'black', |
|
|
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text) |
|
|
''' |
|
|
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k') |
|
|
|
|
|
plt.imshow(wordcloud, interpolation= 'bilinear') |
|
|
plt.axis('off') |
|
|
plt.tight_layout(pad=0) |
|
|
plt.show() |
|
|
''' |
|
|
del text |
|
|
|
|
|
|
|
|
|
|
|
dist = twitter['category'].value_counts() |
|
|
def distribution_plot(x, y, name): |
|
|
plt.figure(figsize=(10, 6)) |
|
|
sns.barplot(x=x, y=y) |
|
|
plt.title(name) |
|
|
plt.show() |
|
|
|
|
|
distribution_plot(x=dist.index, y=dist.values, name="Class Distribution Train") |
|
|
|
|
|
pol = lambda x: TextBlob(x).sentiment.polarity |
|
|
sub = lambda x: TextBlob(x).sentiment.subjectivity |
|
|
|
|
|
twitter['polarity'] = twitter['clean_text'].apply(pol) |
|
|
twitter['subjectivity'] = twitter['clean_text'].apply(sub) |
|
|
twitter |
|
|
''' |
|
|
# Plot Polarity |
|
|
|
|
|
plt.figure(figsize=(10,6)) |
|
|
plt.hist(twitter['polarity'], bins=20, color='skyblue', edgecolor='black') |
|
|
plt.title("Distribution of Polarity") |
|
|
plt.xlabel("Polarity") |
|
|
plt.ylabel("Frequency") |
|
|
plt.grid(True) |
|
|
plt.show() |
|
|
|
|
|
# Plot Subjectivity |
|
|
|
|
|
plt.figure(figsize=(10,6)) |
|
|
plt.hist(twitter['subjectivity'], bins=20, color='lightgreen', edgecolor='black') |
|
|
plt.title("Distribution of Subjectivity") |
|
|
plt.xlabel("Subjectivity") |
|
|
plt.ylabel("Frequency") |
|
|
plt.grid(True) |
|
|
plt.show() |
|
|
''' |
|
|
from sklearn.linear_model import LogisticRegression |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
|
|
|
vectorizer = TfidfVectorizer(max_features=5000) |
|
|
|
|
|
X = vectorizer.fit_transform(twitter['clean_text']) |
|
|
|
|
|
y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2}) |
|
|
|
|
|
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
|
|
|
lr = LogisticRegression(max_iter=1000) |
|
|
lr.fit(X_train,y_train) |
|
|
y_pred = lr.predict(X_test) |
|
|
''' |
|
|
print("Accuracy:", accuracy_score(y_test, y_pred)) |
|
|
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted')) |
|
|
print("Classification Report:\n", classification_report(y_test, y_pred)) |
|
|
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) |
|
|
|
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
|
|
|
classifier = RandomForestClassifier(n_estimators=100, random_state=42) |
|
|
classifier.fit(X_train, y_train) |
|
|
|
|
|
y_pred = classifier.predict(X_test) |
|
|
|
|
|
print("Accuracy:", accuracy_score(y_test, y_pred)) |
|
|
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted')) |
|
|
print("Classification Report:\n", classification_report(y_test, y_pred)) |
|
|
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) |
|
|
|
|
|
from sklearn.svm import SVC |
|
|
|
|
|
classifier = SVC(kernel='linear', random_state=42) |
|
|
classifier.fit(X_train, y_train) |
|
|
|
|
|
y_pred = classifier.predict(X_test) |
|
|
|
|
|
print("Accuracy:", accuracy_score(y_test, y_pred)) |
|
|
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted')) |
|
|
print("Classification Report:\n", classification_report(y_test, y_pred)) |
|
|
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) |
|
|
''' |
|
|
from sklearn.ensemble import AdaBoostClassifier |
|
|
from sklearn .tree import DecisionTreeClassifier |
|
|
|
|
|
classifier = AdaBoostClassifier(n_estimators=100, random_state=42) |
|
|
classifier.fit(X_train, y_train) |
|
|
|
|
|
y_pred = classifier.predict(X_test) |
|
|
|
|
|
AdaBoostClassifier |
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
import matplotlib.pyplot as plt |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.ensemble import AdaBoostClassifier |
|
|
from sklearn .tree import DecisionTreeClassifier |
|
|
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc |
|
|
|
|
|
|
|
|
vectorizer = TfidfVectorizer(max_features=5000) |
|
|
X = vectorizer.fit_transform(twitter['clean_text']) |
|
|
|
|
|
|
|
|
y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2}) |
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
|
|
|
|
|
|
classifier = AdaBoostClassifier(n_estimators=100, random_state=42) |
|
|
classifier.fit(X_train, y_train) |
|
|
|
|
|
|
|
|
y_probs = classifier.predict_proba(X_test) |
|
|
''' |
|
|
# Calculate ROC curve and AUC for each class |
|
|
fpr = {} |
|
|
tpr = {} |
|
|
roc_auc = {} |
|
|
num_classes =3 # Number of classes (negative , neutral, positive) |
|
|
|
|
|
|
|
|
for i in range(num_classes): |
|
|
fpr[i], tpr[i], _ =roc_curve(y_test == i, y_probs[:,i]) |
|
|
|
|
|
|
|
|
# Plot ROC curves |
|
|
plt.figure() |
|
|
for i in range (num_classes): |
|
|
plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC = {roc_auc[i]:.2f})") |
|
|
|
|
|
plt.plot([0,1], [0,1], 'k--') # Diagonal line |
|
|
plt.xlim([0.0,1.0]) |
|
|
plt.ylim([0.0,1.05]) |
|
|
plt.xlabel("False Positive Rate") |
|
|
plt.ylabel("True Positive Rate") |
|
|
plt.title("ROC Curves for Multi-Class Classification") |
|
|
plt.legend(loc='lower right') |
|
|
plt.show() |
|
|
|
|
|
|
|
|
# Evaluate the classifier |
|
|
print("Accuracy:", accuracy_score(y_test, classifier.predict(X_test))) |
|
|
print("F1 Score:", f1_score(y_test, classifier.predict(X_test), average = 'weighted')) |
|
|
print("Classification Report:\n", classification_report(y_test, classifier.predict(X_test))) |
|
|
print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test))) |
|
|
|
|
|
|
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict_sentiment(text): |
|
|
if not text.strip(): |
|
|
return "Please enter some text." |
|
|
|
|
|
text_vector = vectorizer.transform([text]) |
|
|
pred = classifier.predict(text_vector)[0] |
|
|
sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"} |
|
|
return sentiment_map[pred] |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## Twitter Sentiment Analyzer") |
|
|
gr.Markdown("Enter a tweet and get its predicted sentiment:") |
|
|
|
|
|
with gr.Row(): |
|
|
input_text = gr.Textbox(lines=3, placeholder="Type your tweet here...", label="Tweet") |
|
|
|
|
|
output = gr.Textbox(label="Predicted Sentiment") |
|
|
|
|
|
analyze_btn = gr.Button("Analyze Sentiment") |
|
|
analyze_btn.click(fn=predict_sentiment, inputs=input_text, outputs=output) |
|
|
|
|
|
demo.launch() |
|
|
|