Spaces:

Pushp123
/

Twitter_Data_Sentimental_Analysis

Sleeping

App Files Files Community

Pushp123 commited on Apr 10, 2025

Commit

8335989

verified ·

1 Parent(s): c395d07

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -24

app.py CHANGED Viewed

@@ -14,20 +14,15 @@ from textblob import TextBlob
 from wordcloud import WordCloud
 twitter = pd.read_csv("Twitter_Data.csv")
-twitter.head(5)
-twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
-twitter.head()
-twitter.info()
-twitter.isna().sum()
 twitter.dropna(subset=['clean_text','category'] , inplace=True)
-twitter.isna().sum()
 text = ''
@@ -37,14 +32,14 @@ for tweet in twitter[twitter['category'] == "positive"]['clean_text']:
 wordcloud = WordCloud(
 width = 3000, height = 2000, background_color = 'black',
 stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
 fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
 plt.imshow(wordcloud, interpolation= 'bilinear')
 plt.axis('off')
 plt.tight_layout(pad=0)
 plt.show()
 del text
 text = ''
@@ -55,14 +50,14 @@ for tweet in twitter[twitter['category'] == "neutral"]['clean_text']:
 wordcloud = WordCloud(
 width = 3000, height = 2000, background_color = 'black',
 stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
 fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
 plt.imshow(wordcloud, interpolation= 'bilinear')
 plt.axis('off')
 plt.tight_layout(pad=0)
 plt.show()
 del text
 text = ''
@@ -73,17 +68,17 @@ for tweet in twitter[twitter['category'] == "negative"]['clean_text']:
 wordcloud = WordCloud(
 width = 3000, height = 2000, background_color = 'black',
 stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
 fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
 plt.imshow(wordcloud, interpolation= 'bilinear')
 plt.axis('off')
 plt.tight_layout(pad=0)
 plt.show()
 del text
-print(twitter['category'].value_counts())
 dist = twitter['category'].value_counts()
 def distribution_plot(x, y, name):
@@ -100,7 +95,7 @@ sub = lambda x: TextBlob(x).sentiment.subjectivity
 twitter['polarity'] = twitter['clean_text'].apply(pol)
 twitter['subjectivity'] = twitter['clean_text'].apply(sub)
 twitter
 # Plot Polarity
 plt.figure(figsize=(10,6))
@@ -120,7 +115,7 @@ plt.xlabel("Subjectivity")
 plt.ylabel("Frequency")
 plt.grid(True)
 plt.show()
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
@@ -138,36 +133,36 @@ X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_sta
 lr = LogisticRegression(max_iter=1000)
 lr.fit(X_train,y_train)
 y_pred = lr.predict(X_test)
 print("Accuracy:", accuracy_score(y_test, y_pred))
 print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
 print("Classification Report:\n", classification_report(y_test, y_pred))
 print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
 from sklearn.ensemble import RandomForestClassifier
 classifier = RandomForestClassifier(n_estimators=100, random_state=42)
 classifier.fit(X_train, y_train)
 y_pred = classifier.predict(X_test)
 print("Accuracy:", accuracy_score(y_test, y_pred))
 print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
 print("Classification Report:\n", classification_report(y_test, y_pred))
 print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
 from sklearn.svm import SVC
 classifier = SVC(kernel='linear', random_state=42)
 classifier.fit(X_train, y_train)
 y_pred = classifier.predict(X_test)
 print("Accuracy:", accuracy_score(y_test, y_pred))
 print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
 print("Classification Report:\n", classification_report(y_test, y_pred))
 print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
 from sklearn.ensemble import AdaBoostClassifier
 from sklearn .tree import DecisionTreeClassifier
@@ -205,7 +200,7 @@ classifier.fit(X_train, y_train)
 # Predict probabilities on the test set
 y_probs = classifier.predict_proba(X_test)
 # Calculate ROC curve and AUC for each class
 fpr = {}
 tpr = {}
@@ -240,7 +235,7 @@ print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test)

 from wordcloud import WordCloud
 twitter = pd.read_csv("Twitter_Data.csv")
+twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
 twitter.dropna(subset=['clean_text','category'] , inplace=True)
 text = ''
 wordcloud = WordCloud(
 width = 3000, height = 2000, background_color = 'black',
 stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
+'''
 fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
 plt.imshow(wordcloud, interpolation= 'bilinear')
 plt.axis('off')
 plt.tight_layout(pad=0)
 plt.show()
+'''
 del text
 text = ''
 wordcloud = WordCloud(
 width = 3000, height = 2000, background_color = 'black',
 stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
+'''
 fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
 plt.imshow(wordcloud, interpolation= 'bilinear')
 plt.axis('off')
 plt.tight_layout(pad=0)
 plt.show()
+'''
 del text
 text = ''
 wordcloud = WordCloud(
 width = 3000, height = 2000, background_color = 'black',
 stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
+'''
 fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
 plt.imshow(wordcloud, interpolation= 'bilinear')
 plt.axis('off')
 plt.tight_layout(pad=0)
 plt.show()
+'''
 del text
+#print(twitter['category'].value_counts())
 dist = twitter['category'].value_counts()
 def distribution_plot(x, y, name):
 twitter['polarity'] = twitter['clean_text'].apply(pol)
 twitter['subjectivity'] = twitter['clean_text'].apply(sub)
 twitter
+'''
 # Plot Polarity
 plt.figure(figsize=(10,6))
 plt.ylabel("Frequency")
 plt.grid(True)
 plt.show()
+'''
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
 lr = LogisticRegression(max_iter=1000)
 lr.fit(X_train,y_train)
 y_pred = lr.predict(X_test)
+'''
 print("Accuracy:", accuracy_score(y_test, y_pred))
 print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
 print("Classification Report:\n", classification_report(y_test, y_pred))
 print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
+'''
 from sklearn.ensemble import RandomForestClassifier
 classifier = RandomForestClassifier(n_estimators=100, random_state=42)
 classifier.fit(X_train, y_train)
 y_pred = classifier.predict(X_test)
+'''
 print("Accuracy:", accuracy_score(y_test, y_pred))
 print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
 print("Classification Report:\n", classification_report(y_test, y_pred))
 print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
+'''
 from sklearn.svm import SVC
 classifier = SVC(kernel='linear', random_state=42)
 classifier.fit(X_train, y_train)
 y_pred = classifier.predict(X_test)
+'''
 print("Accuracy:", accuracy_score(y_test, y_pred))
 print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
 print("Classification Report:\n", classification_report(y_test, y_pred))
 print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
+'''
 from sklearn.ensemble import AdaBoostClassifier
 from sklearn .tree import DecisionTreeClassifier
 # Predict probabilities on the test set
 y_probs = classifier.predict_proba(X_test)
+'''
 # Calculate ROC curve and AUC for each class
 fpr = {}
 tpr = {}
+'''