Pushp123 commited on
Commit
8335989
·
verified ·
1 Parent(s): c395d07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -24
app.py CHANGED
@@ -14,20 +14,15 @@ from textblob import TextBlob
14
  from wordcloud import WordCloud
15
 
16
  twitter = pd.read_csv("Twitter_Data.csv")
17
- twitter.head(5)
18
-
19
- twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
20
 
21
 
22
- twitter.head()
23
 
24
- twitter.info()
25
 
26
- twitter.isna().sum()
27
 
28
  twitter.dropna(subset=['clean_text','category'] , inplace=True)
29
 
30
- twitter.isna().sum()
31
 
32
  text = ''
33
 
@@ -37,14 +32,14 @@ for tweet in twitter[twitter['category'] == "positive"]['clean_text']:
37
  wordcloud = WordCloud(
38
  width = 3000, height = 2000, background_color = 'black',
39
  stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
40
-
41
  fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
42
 
43
  plt.imshow(wordcloud, interpolation= 'bilinear')
44
  plt.axis('off')
45
  plt.tight_layout(pad=0)
46
  plt.show()
47
-
48
  del text
49
 
50
  text = ''
@@ -55,14 +50,14 @@ for tweet in twitter[twitter['category'] == "neutral"]['clean_text']:
55
  wordcloud = WordCloud(
56
  width = 3000, height = 2000, background_color = 'black',
57
  stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
58
-
59
  fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
60
 
61
  plt.imshow(wordcloud, interpolation= 'bilinear')
62
  plt.axis('off')
63
  plt.tight_layout(pad=0)
64
  plt.show()
65
-
66
  del text
67
 
68
  text = ''
@@ -73,17 +68,17 @@ for tweet in twitter[twitter['category'] == "negative"]['clean_text']:
73
  wordcloud = WordCloud(
74
  width = 3000, height = 2000, background_color = 'black',
75
  stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
76
-
77
  fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
78
 
79
  plt.imshow(wordcloud, interpolation= 'bilinear')
80
  plt.axis('off')
81
  plt.tight_layout(pad=0)
82
  plt.show()
83
-
84
  del text
85
 
86
- print(twitter['category'].value_counts())
87
 
88
  dist = twitter['category'].value_counts()
89
  def distribution_plot(x, y, name):
@@ -100,7 +95,7 @@ sub = lambda x: TextBlob(x).sentiment.subjectivity
100
  twitter['polarity'] = twitter['clean_text'].apply(pol)
101
  twitter['subjectivity'] = twitter['clean_text'].apply(sub)
102
  twitter
103
-
104
  # Plot Polarity
105
 
106
  plt.figure(figsize=(10,6))
@@ -120,7 +115,7 @@ plt.xlabel("Subjectivity")
120
  plt.ylabel("Frequency")
121
  plt.grid(True)
122
  plt.show()
123
-
124
  from sklearn.linear_model import LogisticRegression
125
  from sklearn.model_selection import train_test_split
126
  from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
@@ -138,36 +133,36 @@ X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_sta
138
  lr = LogisticRegression(max_iter=1000)
139
  lr.fit(X_train,y_train)
140
  y_pred = lr.predict(X_test)
141
-
142
  print("Accuracy:", accuracy_score(y_test, y_pred))
143
  print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
144
  print("Classification Report:\n", classification_report(y_test, y_pred))
145
  print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
146
-
147
  from sklearn.ensemble import RandomForestClassifier
148
 
149
  classifier = RandomForestClassifier(n_estimators=100, random_state=42)
150
  classifier.fit(X_train, y_train)
151
 
152
  y_pred = classifier.predict(X_test)
153
-
154
  print("Accuracy:", accuracy_score(y_test, y_pred))
155
  print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
156
  print("Classification Report:\n", classification_report(y_test, y_pred))
157
  print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
158
-
159
  from sklearn.svm import SVC
160
 
161
  classifier = SVC(kernel='linear', random_state=42)
162
  classifier.fit(X_train, y_train)
163
 
164
  y_pred = classifier.predict(X_test)
165
-
166
  print("Accuracy:", accuracy_score(y_test, y_pred))
167
  print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
168
  print("Classification Report:\n", classification_report(y_test, y_pred))
169
  print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
170
-
171
  from sklearn.ensemble import AdaBoostClassifier
172
  from sklearn .tree import DecisionTreeClassifier
173
 
@@ -205,7 +200,7 @@ classifier.fit(X_train, y_train)
205
 
206
  # Predict probabilities on the test set
207
  y_probs = classifier.predict_proba(X_test)
208
-
209
  # Calculate ROC curve and AUC for each class
210
  fpr = {}
211
  tpr = {}
@@ -240,7 +235,7 @@ print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test)
240
 
241
 
242
 
243
-
244
 
245
 
246
 
 
14
  from wordcloud import WordCloud
15
 
16
  twitter = pd.read_csv("Twitter_Data.csv")
 
 
 
17
 
18
 
19
+ twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
20
 
 
21
 
 
22
 
23
  twitter.dropna(subset=['clean_text','category'] , inplace=True)
24
 
25
+
26
 
27
  text = ''
28
 
 
32
  wordcloud = WordCloud(
33
  width = 3000, height = 2000, background_color = 'black',
34
  stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
35
+ '''
36
  fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
37
 
38
  plt.imshow(wordcloud, interpolation= 'bilinear')
39
  plt.axis('off')
40
  plt.tight_layout(pad=0)
41
  plt.show()
42
+ '''
43
  del text
44
 
45
  text = ''
 
50
  wordcloud = WordCloud(
51
  width = 3000, height = 2000, background_color = 'black',
52
  stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
53
+ '''
54
  fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
55
 
56
  plt.imshow(wordcloud, interpolation= 'bilinear')
57
  plt.axis('off')
58
  plt.tight_layout(pad=0)
59
  plt.show()
60
+ '''
61
  del text
62
 
63
  text = ''
 
68
  wordcloud = WordCloud(
69
  width = 3000, height = 2000, background_color = 'black',
70
  stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
71
+ '''
72
  fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
73
 
74
  plt.imshow(wordcloud, interpolation= 'bilinear')
75
  plt.axis('off')
76
  plt.tight_layout(pad=0)
77
  plt.show()
78
+ '''
79
  del text
80
 
81
+ #print(twitter['category'].value_counts())
82
 
83
  dist = twitter['category'].value_counts()
84
  def distribution_plot(x, y, name):
 
95
  twitter['polarity'] = twitter['clean_text'].apply(pol)
96
  twitter['subjectivity'] = twitter['clean_text'].apply(sub)
97
  twitter
98
+ '''
99
  # Plot Polarity
100
 
101
  plt.figure(figsize=(10,6))
 
115
  plt.ylabel("Frequency")
116
  plt.grid(True)
117
  plt.show()
118
+ '''
119
  from sklearn.linear_model import LogisticRegression
120
  from sklearn.model_selection import train_test_split
121
  from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
 
133
  lr = LogisticRegression(max_iter=1000)
134
  lr.fit(X_train,y_train)
135
  y_pred = lr.predict(X_test)
136
+ '''
137
  print("Accuracy:", accuracy_score(y_test, y_pred))
138
  print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
139
  print("Classification Report:\n", classification_report(y_test, y_pred))
140
  print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
141
+ '''
142
  from sklearn.ensemble import RandomForestClassifier
143
 
144
  classifier = RandomForestClassifier(n_estimators=100, random_state=42)
145
  classifier.fit(X_train, y_train)
146
 
147
  y_pred = classifier.predict(X_test)
148
+ '''
149
  print("Accuracy:", accuracy_score(y_test, y_pred))
150
  print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
151
  print("Classification Report:\n", classification_report(y_test, y_pred))
152
  print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
153
+ '''
154
  from sklearn.svm import SVC
155
 
156
  classifier = SVC(kernel='linear', random_state=42)
157
  classifier.fit(X_train, y_train)
158
 
159
  y_pred = classifier.predict(X_test)
160
+ '''
161
  print("Accuracy:", accuracy_score(y_test, y_pred))
162
  print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
163
  print("Classification Report:\n", classification_report(y_test, y_pred))
164
  print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
165
+ '''
166
  from sklearn.ensemble import AdaBoostClassifier
167
  from sklearn .tree import DecisionTreeClassifier
168
 
 
200
 
201
  # Predict probabilities on the test set
202
  y_probs = classifier.predict_proba(X_test)
203
+ '''
204
  # Calculate ROC curve and AUC for each class
205
  fpr = {}
206
  tpr = {}
 
235
 
236
 
237
 
238
+ '''
239
 
240
 
241