Spaces:

Pushp123
/

Twitter_Data_Sentimental_Analysis

Sleeping

App Files Files Community

Pushp123 commited on Apr 10, 2025

Commit

ccc96e3

verified ·

1 Parent(s): 54f4a40

Create app.py

Browse files

Files changed (1) hide show

app.py +272 -0

app.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import gradio as gr
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import nltk
+nltk.download('stopwords', quiet=True)
+from nltk.corpus import stopwords
+from nltk.stem.porter import PorterStemmer
+from sklearn import metrics
+from sklearn.multiclass import OneVsRestClassifier
+from textblob import TextBlob
+from wordcloud import WordCloud
+twitter = pd.read_csv("/content/Twitter_Data.csv")
+twitter.head(5)
+twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
+twitter.head()
+twitter.info()
+twitter.isna().sum()
+twitter.dropna(subset=['clean_text','category'] , inplace=True)
+twitter.isna().sum()
+text = ''
+for tweet in twitter[twitter['category'] == "positive"]['clean_text']:
+  text += f" {tweet}"
+wordcloud = WordCloud(
+width = 3000, height = 2000, background_color = 'black',
+stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
+fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
+plt.imshow(wordcloud, interpolation= 'bilinear')
+plt.axis('off')
+plt.tight_layout(pad=0)
+plt.show()
+del text
+text = ''
+for tweet in twitter[twitter['category'] == "neutral"]['clean_text']:
+  text += f" {tweet}"
+wordcloud = WordCloud(
+width = 3000, height = 2000, background_color = 'black',
+stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
+fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
+plt.imshow(wordcloud, interpolation= 'bilinear')
+plt.axis('off')
+plt.tight_layout(pad=0)
+plt.show()
+del text
+text = ''
+for tweet in twitter[twitter['category'] == "negative"]['clean_text']:
+  text += f" {tweet}"
+wordcloud = WordCloud(
+width = 3000, height = 2000, background_color = 'black',
+stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
+fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
+plt.imshow(wordcloud, interpolation= 'bilinear')
+plt.axis('off')
+plt.tight_layout(pad=0)
+plt.show()
+del text
+print(twitter['category'].value_counts())
+dist = twitter['category'].value_counts()
+def distribution_plot(x, y, name):
+  plt.figure(figsize=(10, 6))
+  sns.barplot(x=x, y=y)
+  plt.title(name)
+  plt.show()
+distribution_plot(x=dist.index, y=dist.values, name="Class Distribution Train")
+pol = lambda x: TextBlob(x).sentiment.polarity
+sub = lambda x: TextBlob(x).sentiment.subjectivity
+twitter['polarity'] = twitter['clean_text'].apply(pol)
+twitter['subjectivity'] = twitter['clean_text'].apply(sub)
+twitter
+# Plot Polarity
+plt.figure(figsize=(10,6))
+plt.hist(twitter['polarity'], bins=20, color='skyblue', edgecolor='black')
+plt.title("Distribution of Polarity")
+plt.xlabel("Polarity")
+plt.ylabel("Frequency")
+plt.grid(True)
+plt.show()
+# Plot Subjectivity
+plt.figure(figsize=(10,6))
+plt.hist(twitter['subjectivity'], bins=20, color='lightgreen', edgecolor='black')
+plt.title("Distribution of Subjectivity")
+plt.xlabel("Subjectivity")
+plt.ylabel("Frequency")
+plt.grid(True)
+plt.show()
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
+from sklearn.feature_extraction.text import TfidfVectorizer
+vectorizer = TfidfVectorizer(max_features=5000)
+X = vectorizer.fit_transform(twitter['clean_text'])
+y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})
+X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+lr = LogisticRegression(max_iter=1000)
+lr.fit(X_train,y_train)
+y_pred = lr.predict(X_test)
+print("Accuracy:", accuracy_score(y_test, y_pred))
+print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
+print("Classification Report:\n", classification_report(y_test, y_pred))
+print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
+from sklearn.ensemble import RandomForestClassifier
+classifier = RandomForestClassifier(n_estimators=100, random_state=42)
+classifier.fit(X_train, y_train)
+y_pred = classifier.predict(X_test)
+print("Accuracy:", accuracy_score(y_test, y_pred))
+print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
+print("Classification Report:\n", classification_report(y_test, y_pred))
+print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
+from sklearn.svm import SVC
+classifier = SVC(kernel='linear', random_state=42)
+classifier.fit(X_train, y_train)
+y_pred = classifier.predict(X_test)
+print("Accuracy:", accuracy_score(y_test, y_pred))
+print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
+print("Classification Report:\n", classification_report(y_test, y_pred))
+print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn .tree import DecisionTreeClassifier
+classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
+classifier.fit(X_train, y_train)
+y_pred = classifier.predict(X_test)
+AdaBoostClassifier
+# Importing necessary libraries
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn .tree import DecisionTreeClassifier
+from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
+vectorizer =  TfidfVectorizer(max_features=5000)
+X = vectorizer.fit_transform(twitter['clean_text'])
+# Encode target label (category) into numeric values
+y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})
+# Split the dataset into train and test sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
+classifier.fit(X_train, y_train)
+# Predict probabilities on the test set
+y_probs = classifier.predict_proba(X_test)
+# Calculate ROC curve and AUC for each class
+fpr = {}
+tpr = {}
+roc_auc = {}
+num_classes =3  # Number of classes (negative , neutral, positive)
+for i in range(num_classes):
+  fpr[i], tpr[i], _ =roc_curve(y_test == i, y_probs[:,i])
+# Plot ROC curves
+plt.figure()
+for i in range (num_classes):
+  plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC = {roc_auc[i]:.2f})")
+plt.plot([0,1], [0,1], 'k--')  # Diagonal line
+plt.xlim([0.0,1.0])
+plt.ylim([0.0,1.05])
+plt.xlabel("False Positive Rate")
+plt.ylabel("True Positive Rate")
+plt.title("ROC Curves for Multi-Class Classification")
+plt.legend(loc='lower right')
+plt.show()
+# Evaluate the classifier
+print("Accuracy:", accuracy_score(y_test, classifier.predict(X_test)))
+print("F1 Score:", f1_score(y_test, classifier.predict(X_test), average = 'weighted'))
+print("Classification Report:\n", classification_report(y_test, classifier.predict(X_test)))
+print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test)))
+# Function to make predictions
+def predict_sentiment(text):
+    if not text.strip():
+        return "Please enter some text."
+    text_vector = vectorizer.transform([text])
+    pred = classifier.predict(text_vector)[0]
+    sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
+    return sentiment_map[pred]
+# Create Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("## Twitter Sentiment Analyzer")
+    gr.Markdown("Enter a tweet and get its predicted sentiment:")
+    with gr.Row():
+        input_text = gr.Textbox(lines=3, placeholder="Type your tweet here...", label="Tweet")
+    output = gr.Textbox(label="Predicted Sentiment")
+    analyze_btn = gr.Button("Analyze Sentiment")
+    analyze_btn.click(fn=predict_sentiment, inputs=input_text, outputs=output)
+demo.launch()