Update app.py
Browse files
app.py
CHANGED
|
@@ -14,20 +14,15 @@ from textblob import TextBlob
|
|
| 14 |
from wordcloud import WordCloud
|
| 15 |
|
| 16 |
twitter = pd.read_csv("Twitter_Data.csv")
|
| 17 |
-
twitter.head(5)
|
| 18 |
-
|
| 19 |
-
twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
|
| 20 |
|
| 21 |
|
| 22 |
-
twitter.
|
| 23 |
|
| 24 |
-
twitter.info()
|
| 25 |
|
| 26 |
-
twitter.isna().sum()
|
| 27 |
|
| 28 |
twitter.dropna(subset=['clean_text','category'] , inplace=True)
|
| 29 |
|
| 30 |
-
|
| 31 |
|
| 32 |
text = ''
|
| 33 |
|
|
@@ -37,14 +32,14 @@ for tweet in twitter[twitter['category'] == "positive"]['clean_text']:
|
|
| 37 |
wordcloud = WordCloud(
|
| 38 |
width = 3000, height = 2000, background_color = 'black',
|
| 39 |
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
|
| 40 |
-
|
| 41 |
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
|
| 42 |
|
| 43 |
plt.imshow(wordcloud, interpolation= 'bilinear')
|
| 44 |
plt.axis('off')
|
| 45 |
plt.tight_layout(pad=0)
|
| 46 |
plt.show()
|
| 47 |
-
|
| 48 |
del text
|
| 49 |
|
| 50 |
text = ''
|
|
@@ -55,14 +50,14 @@ for tweet in twitter[twitter['category'] == "neutral"]['clean_text']:
|
|
| 55 |
wordcloud = WordCloud(
|
| 56 |
width = 3000, height = 2000, background_color = 'black',
|
| 57 |
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
|
| 58 |
-
|
| 59 |
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
|
| 60 |
|
| 61 |
plt.imshow(wordcloud, interpolation= 'bilinear')
|
| 62 |
plt.axis('off')
|
| 63 |
plt.tight_layout(pad=0)
|
| 64 |
plt.show()
|
| 65 |
-
|
| 66 |
del text
|
| 67 |
|
| 68 |
text = ''
|
|
@@ -73,17 +68,17 @@ for tweet in twitter[twitter['category'] == "negative"]['clean_text']:
|
|
| 73 |
wordcloud = WordCloud(
|
| 74 |
width = 3000, height = 2000, background_color = 'black',
|
| 75 |
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
|
| 76 |
-
|
| 77 |
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
|
| 78 |
|
| 79 |
plt.imshow(wordcloud, interpolation= 'bilinear')
|
| 80 |
plt.axis('off')
|
| 81 |
plt.tight_layout(pad=0)
|
| 82 |
plt.show()
|
| 83 |
-
|
| 84 |
del text
|
| 85 |
|
| 86 |
-
print(twitter['category'].value_counts())
|
| 87 |
|
| 88 |
dist = twitter['category'].value_counts()
|
| 89 |
def distribution_plot(x, y, name):
|
|
@@ -100,7 +95,7 @@ sub = lambda x: TextBlob(x).sentiment.subjectivity
|
|
| 100 |
twitter['polarity'] = twitter['clean_text'].apply(pol)
|
| 101 |
twitter['subjectivity'] = twitter['clean_text'].apply(sub)
|
| 102 |
twitter
|
| 103 |
-
|
| 104 |
# Plot Polarity
|
| 105 |
|
| 106 |
plt.figure(figsize=(10,6))
|
|
@@ -120,7 +115,7 @@ plt.xlabel("Subjectivity")
|
|
| 120 |
plt.ylabel("Frequency")
|
| 121 |
plt.grid(True)
|
| 122 |
plt.show()
|
| 123 |
-
|
| 124 |
from sklearn.linear_model import LogisticRegression
|
| 125 |
from sklearn.model_selection import train_test_split
|
| 126 |
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
|
|
@@ -138,36 +133,36 @@ X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_sta
|
|
| 138 |
lr = LogisticRegression(max_iter=1000)
|
| 139 |
lr.fit(X_train,y_train)
|
| 140 |
y_pred = lr.predict(X_test)
|
| 141 |
-
|
| 142 |
print("Accuracy:", accuracy_score(y_test, y_pred))
|
| 143 |
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
|
| 144 |
print("Classification Report:\n", classification_report(y_test, y_pred))
|
| 145 |
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
|
| 146 |
-
|
| 147 |
from sklearn.ensemble import RandomForestClassifier
|
| 148 |
|
| 149 |
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
|
| 150 |
classifier.fit(X_train, y_train)
|
| 151 |
|
| 152 |
y_pred = classifier.predict(X_test)
|
| 153 |
-
|
| 154 |
print("Accuracy:", accuracy_score(y_test, y_pred))
|
| 155 |
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
|
| 156 |
print("Classification Report:\n", classification_report(y_test, y_pred))
|
| 157 |
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
|
| 158 |
-
|
| 159 |
from sklearn.svm import SVC
|
| 160 |
|
| 161 |
classifier = SVC(kernel='linear', random_state=42)
|
| 162 |
classifier.fit(X_train, y_train)
|
| 163 |
|
| 164 |
y_pred = classifier.predict(X_test)
|
| 165 |
-
|
| 166 |
print("Accuracy:", accuracy_score(y_test, y_pred))
|
| 167 |
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
|
| 168 |
print("Classification Report:\n", classification_report(y_test, y_pred))
|
| 169 |
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
|
| 170 |
-
|
| 171 |
from sklearn.ensemble import AdaBoostClassifier
|
| 172 |
from sklearn .tree import DecisionTreeClassifier
|
| 173 |
|
|
@@ -205,7 +200,7 @@ classifier.fit(X_train, y_train)
|
|
| 205 |
|
| 206 |
# Predict probabilities on the test set
|
| 207 |
y_probs = classifier.predict_proba(X_test)
|
| 208 |
-
|
| 209 |
# Calculate ROC curve and AUC for each class
|
| 210 |
fpr = {}
|
| 211 |
tpr = {}
|
|
@@ -240,7 +235,7 @@ print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test)
|
|
| 240 |
|
| 241 |
|
| 242 |
|
| 243 |
-
|
| 244 |
|
| 245 |
|
| 246 |
|
|
|
|
| 14 |
from wordcloud import WordCloud
|
| 15 |
|
| 16 |
twitter = pd.read_csv("Twitter_Data.csv")
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
+
twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
|
| 20 |
|
|
|
|
| 21 |
|
|
|
|
| 22 |
|
| 23 |
twitter.dropna(subset=['clean_text','category'] , inplace=True)
|
| 24 |
|
| 25 |
+
|
| 26 |
|
| 27 |
text = ''
|
| 28 |
|
|
|
|
| 32 |
wordcloud = WordCloud(
|
| 33 |
width = 3000, height = 2000, background_color = 'black',
|
| 34 |
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
|
| 35 |
+
'''
|
| 36 |
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
|
| 37 |
|
| 38 |
plt.imshow(wordcloud, interpolation= 'bilinear')
|
| 39 |
plt.axis('off')
|
| 40 |
plt.tight_layout(pad=0)
|
| 41 |
plt.show()
|
| 42 |
+
'''
|
| 43 |
del text
|
| 44 |
|
| 45 |
text = ''
|
|
|
|
| 50 |
wordcloud = WordCloud(
|
| 51 |
width = 3000, height = 2000, background_color = 'black',
|
| 52 |
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
|
| 53 |
+
'''
|
| 54 |
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
|
| 55 |
|
| 56 |
plt.imshow(wordcloud, interpolation= 'bilinear')
|
| 57 |
plt.axis('off')
|
| 58 |
plt.tight_layout(pad=0)
|
| 59 |
plt.show()
|
| 60 |
+
'''
|
| 61 |
del text
|
| 62 |
|
| 63 |
text = ''
|
|
|
|
| 68 |
wordcloud = WordCloud(
|
| 69 |
width = 3000, height = 2000, background_color = 'black',
|
| 70 |
stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
|
| 71 |
+
'''
|
| 72 |
fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
|
| 73 |
|
| 74 |
plt.imshow(wordcloud, interpolation= 'bilinear')
|
| 75 |
plt.axis('off')
|
| 76 |
plt.tight_layout(pad=0)
|
| 77 |
plt.show()
|
| 78 |
+
'''
|
| 79 |
del text
|
| 80 |
|
| 81 |
+
#print(twitter['category'].value_counts())
|
| 82 |
|
| 83 |
dist = twitter['category'].value_counts()
|
| 84 |
def distribution_plot(x, y, name):
|
|
|
|
| 95 |
twitter['polarity'] = twitter['clean_text'].apply(pol)
|
| 96 |
twitter['subjectivity'] = twitter['clean_text'].apply(sub)
|
| 97 |
twitter
|
| 98 |
+
'''
|
| 99 |
# Plot Polarity
|
| 100 |
|
| 101 |
plt.figure(figsize=(10,6))
|
|
|
|
| 115 |
plt.ylabel("Frequency")
|
| 116 |
plt.grid(True)
|
| 117 |
plt.show()
|
| 118 |
+
'''
|
| 119 |
from sklearn.linear_model import LogisticRegression
|
| 120 |
from sklearn.model_selection import train_test_split
|
| 121 |
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
|
|
|
|
| 133 |
lr = LogisticRegression(max_iter=1000)
|
| 134 |
lr.fit(X_train,y_train)
|
| 135 |
y_pred = lr.predict(X_test)
|
| 136 |
+
'''
|
| 137 |
print("Accuracy:", accuracy_score(y_test, y_pred))
|
| 138 |
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
|
| 139 |
print("Classification Report:\n", classification_report(y_test, y_pred))
|
| 140 |
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
|
| 141 |
+
'''
|
| 142 |
from sklearn.ensemble import RandomForestClassifier
|
| 143 |
|
| 144 |
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
|
| 145 |
classifier.fit(X_train, y_train)
|
| 146 |
|
| 147 |
y_pred = classifier.predict(X_test)
|
| 148 |
+
'''
|
| 149 |
print("Accuracy:", accuracy_score(y_test, y_pred))
|
| 150 |
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
|
| 151 |
print("Classification Report:\n", classification_report(y_test, y_pred))
|
| 152 |
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
|
| 153 |
+
'''
|
| 154 |
from sklearn.svm import SVC
|
| 155 |
|
| 156 |
classifier = SVC(kernel='linear', random_state=42)
|
| 157 |
classifier.fit(X_train, y_train)
|
| 158 |
|
| 159 |
y_pred = classifier.predict(X_test)
|
| 160 |
+
'''
|
| 161 |
print("Accuracy:", accuracy_score(y_test, y_pred))
|
| 162 |
print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
|
| 163 |
print("Classification Report:\n", classification_report(y_test, y_pred))
|
| 164 |
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
|
| 165 |
+
'''
|
| 166 |
from sklearn.ensemble import AdaBoostClassifier
|
| 167 |
from sklearn .tree import DecisionTreeClassifier
|
| 168 |
|
|
|
|
| 200 |
|
| 201 |
# Predict probabilities on the test set
|
| 202 |
y_probs = classifier.predict_proba(X_test)
|
| 203 |
+
'''
|
| 204 |
# Calculate ROC curve and AUC for each class
|
| 205 |
fpr = {}
|
| 206 |
tpr = {}
|
|
|
|
| 235 |
|
| 236 |
|
| 237 |
|
| 238 |
+
'''
|
| 239 |
|
| 240 |
|
| 241 |
|