Pushp123's picture
Update app.py
1711fde verified
import gradio as gr
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
nltk.download('stopwords', quiet=True)
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn import metrics
from sklearn.multiclass import OneVsRestClassifier
from textblob import TextBlob
from wordcloud import WordCloud
twitter = pd.read_csv("Twitter_Data.csv")
twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
twitter.dropna(subset=['clean_text','category'] , inplace=True)
text = ''
for tweet in twitter[twitter['category'] == "positive"]['clean_text']:
text += f" {tweet}"
wordcloud = WordCloud(
width = 3000, height = 2000, background_color = 'black',
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
'''
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
plt.imshow(wordcloud, interpolation= 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.show()
'''
del text
text = ''
for tweet in twitter[twitter['category'] == "neutral"]['clean_text']:
text += f" {tweet}"
wordcloud = WordCloud(
width = 3000, height = 2000, background_color = 'black',
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
'''
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
plt.imshow(wordcloud, interpolation= 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.show()
'''
del text
text = ''
for tweet in twitter[twitter['category'] == "negative"]['clean_text']:
text += f" {tweet}"
wordcloud = WordCloud(
width = 3000, height = 2000, background_color = 'black',
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
'''
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
plt.imshow(wordcloud, interpolation= 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.show()
'''
del text
#print(twitter['category'].value_counts())
dist = twitter['category'].value_counts()
def distribution_plot(x, y, name):
plt.figure(figsize=(10, 6))
sns.barplot(x=x, y=y)
plt.title(name)
plt.show()
distribution_plot(x=dist.index, y=dist.values, name="Class Distribution Train")
pol = lambda x: TextBlob(x).sentiment.polarity
sub = lambda x: TextBlob(x).sentiment.subjectivity
twitter['polarity'] = twitter['clean_text'].apply(pol)
twitter['subjectivity'] = twitter['clean_text'].apply(sub)
twitter
'''
# Plot Polarity
plt.figure(figsize=(10,6))
plt.hist(twitter['polarity'], bins=20, color='skyblue', edgecolor='black')
plt.title("Distribution of Polarity")
plt.xlabel("Polarity")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()
# Plot Subjectivity
plt.figure(figsize=(10,6))
plt.hist(twitter['subjectivity'], bins=20, color='lightgreen', edgecolor='black')
plt.title("Distribution of Subjectivity")
plt.xlabel("Subjectivity")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()
'''
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(twitter['clean_text'])
y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
'''
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
from sklearn.svm import SVC
classifier = SVC(kernel='linear', random_state=42)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
'''
from sklearn.ensemble import AdaBoostClassifier
from sklearn .tree import DecisionTreeClassifier
classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
AdaBoostClassifier
# Importing necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn .tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(twitter['clean_text'])
# Encode target label (category) into numeric values
y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})
# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)
# Predict probabilities on the test set
y_probs = classifier.predict_proba(X_test)
'''
# Calculate ROC curve and AUC for each class
fpr = {}
tpr = {}
roc_auc = {}
num_classes =3 # Number of classes (negative , neutral, positive)
for i in range(num_classes):
fpr[i], tpr[i], _ =roc_curve(y_test == i, y_probs[:,i])
# Plot ROC curves
plt.figure()
for i in range (num_classes):
plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC = {roc_auc[i]:.2f})")
plt.plot([0,1], [0,1], 'k--') # Diagonal line
plt.xlim([0.0,1.0])
plt.ylim([0.0,1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curves for Multi-Class Classification")
plt.legend(loc='lower right')
plt.show()
# Evaluate the classifier
print("Accuracy:", accuracy_score(y_test, classifier.predict(X_test)))
print("F1 Score:", f1_score(y_test, classifier.predict(X_test), average = 'weighted'))
print("Classification Report:\n", classification_report(y_test, classifier.predict(X_test)))
print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test)))
'''
# Function to make predictions
def predict_sentiment(text):
if not text.strip():
return "Please enter some text."
text_vector = vectorizer.transform([text])
pred = classifier.predict(text_vector)[0]
sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
return sentiment_map[pred]
# Create Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## Twitter Sentiment Analyzer")
gr.Markdown("Enter a tweet and get its predicted sentiment:")
with gr.Row():
input_text = gr.Textbox(lines=3, placeholder="Type your tweet here...", label="Tweet")
output = gr.Textbox(label="Predicted Sentiment")
analyze_btn = gr.Button("Analyze Sentiment")
analyze_btn.click(fn=predict_sentiment, inputs=input_text, outputs=output)
demo.launch()