import gradio as gr import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import nltk nltk.download('stopwords', quiet=True) from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer from sklearn import metrics from sklearn.multiclass import OneVsRestClassifier from textblob import TextBlob from wordcloud import WordCloud twitter = pd.read_csv("Twitter_Data.csv") twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'}) twitter.dropna(subset=['clean_text','category'] , inplace=True) text = '' for tweet in twitter[twitter['category'] == "positive"]['clean_text']: text += f" {tweet}" wordcloud = WordCloud( width = 3000, height = 2000, background_color = 'black', stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text) ''' fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k') plt.imshow(wordcloud, interpolation= 'bilinear') plt.axis('off') plt.tight_layout(pad=0) plt.show() ''' del text text = '' for tweet in twitter[twitter['category'] == "neutral"]['clean_text']: text += f" {tweet}" wordcloud = WordCloud( width = 3000, height = 2000, background_color = 'black', stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text) ''' fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k') plt.imshow(wordcloud, interpolation= 'bilinear') plt.axis('off') plt.tight_layout(pad=0) plt.show() ''' del text text = '' for tweet in twitter[twitter['category'] == "negative"]['clean_text']: text += f" {tweet}" wordcloud = WordCloud( width = 3000, height = 2000, background_color = 'black', stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text) ''' fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k') plt.imshow(wordcloud, interpolation= 'bilinear') plt.axis('off') plt.tight_layout(pad=0) plt.show() ''' del text #print(twitter['category'].value_counts()) dist = twitter['category'].value_counts() def distribution_plot(x, y, name): plt.figure(figsize=(10, 6)) sns.barplot(x=x, y=y) plt.title(name) plt.show() distribution_plot(x=dist.index, y=dist.values, name="Class Distribution Train") pol = lambda x: TextBlob(x).sentiment.polarity sub = lambda x: TextBlob(x).sentiment.subjectivity twitter['polarity'] = twitter['clean_text'].apply(pol) twitter['subjectivity'] = twitter['clean_text'].apply(sub) twitter ''' # Plot Polarity plt.figure(figsize=(10,6)) plt.hist(twitter['polarity'], bins=20, color='skyblue', edgecolor='black') plt.title("Distribution of Polarity") plt.xlabel("Polarity") plt.ylabel("Frequency") plt.grid(True) plt.show() # Plot Subjectivity plt.figure(figsize=(10,6)) plt.hist(twitter['subjectivity'], bins=20, color='lightgreen', edgecolor='black') plt.title("Distribution of Subjectivity") plt.xlabel("Subjectivity") plt.ylabel("Frequency") plt.grid(True) plt.show() ''' from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc from sklearn.feature_extraction.text import TfidfVectorizer vectorizer = TfidfVectorizer(max_features=5000) X = vectorizer.fit_transform(twitter['clean_text']) y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2}) X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42) lr = LogisticRegression(max_iter=1000) lr.fit(X_train,y_train) y_pred = lr.predict(X_test) ''' print("Accuracy:", accuracy_score(y_test, y_pred)) print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted')) print("Classification Report:\n", classification_report(y_test, y_pred)) print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators=100, random_state=42) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted')) print("Classification Report:\n", classification_report(y_test, y_pred)) print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) from sklearn.svm import SVC classifier = SVC(kernel='linear', random_state=42) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted')) print("Classification Report:\n", classification_report(y_test, y_pred)) print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) ''' from sklearn.ensemble import AdaBoostClassifier from sklearn .tree import DecisionTreeClassifier classifier = AdaBoostClassifier(n_estimators=100, random_state=42) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) AdaBoostClassifier # Importing necessary libraries import numpy as np import matplotlib.pyplot as plt from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.ensemble import AdaBoostClassifier from sklearn .tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc vectorizer = TfidfVectorizer(max_features=5000) X = vectorizer.fit_transform(twitter['clean_text']) # Encode target label (category) into numeric values y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2}) # Split the dataset into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) classifier = AdaBoostClassifier(n_estimators=100, random_state=42) classifier.fit(X_train, y_train) # Predict probabilities on the test set y_probs = classifier.predict_proba(X_test) ''' # Calculate ROC curve and AUC for each class fpr = {} tpr = {} roc_auc = {} num_classes =3 # Number of classes (negative , neutral, positive) for i in range(num_classes): fpr[i], tpr[i], _ =roc_curve(y_test == i, y_probs[:,i]) # Plot ROC curves plt.figure() for i in range (num_classes): plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC = {roc_auc[i]:.2f})") plt.plot([0,1], [0,1], 'k--') # Diagonal line plt.xlim([0.0,1.0]) plt.ylim([0.0,1.05]) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("ROC Curves for Multi-Class Classification") plt.legend(loc='lower right') plt.show() # Evaluate the classifier print("Accuracy:", accuracy_score(y_test, classifier.predict(X_test))) print("F1 Score:", f1_score(y_test, classifier.predict(X_test), average = 'weighted')) print("Classification Report:\n", classification_report(y_test, classifier.predict(X_test))) print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test))) ''' # Function to make predictions def predict_sentiment(text): if not text.strip(): return "Please enter some text." text_vector = vectorizer.transform([text]) pred = classifier.predict(text_vector)[0] sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"} return sentiment_map[pred] # Create Gradio UI with gr.Blocks() as demo: gr.Markdown("## Twitter Sentiment Analyzer") gr.Markdown("Enter a tweet and get its predicted sentiment:") with gr.Row(): input_text = gr.Textbox(lines=3, placeholder="Type your tweet here...", label="Tweet") output = gr.Textbox(label="Predicted Sentiment") analyze_btn = gr.Button("Analyze Sentiment") analyze_btn.click(fn=predict_sentiment, inputs=input_text, outputs=output) demo.launch()