Pushp123 commited on
Commit
ccc96e3
·
verified ·
1 Parent(s): 54f4a40

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -0
app.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import numpy as np
4
+ import pandas as pd
5
+ import seaborn as sns
6
+ import matplotlib.pyplot as plt
7
+ import nltk
8
+ nltk.download('stopwords', quiet=True)
9
+ from nltk.corpus import stopwords
10
+ from nltk.stem.porter import PorterStemmer
11
+ from sklearn import metrics
12
+ from sklearn.multiclass import OneVsRestClassifier
13
+ from textblob import TextBlob
14
+ from wordcloud import WordCloud
15
+
16
+ twitter = pd.read_csv("/content/Twitter_Data.csv")
17
+ twitter.head(5)
18
+
19
+ twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})
20
+
21
+
22
+ twitter.head()
23
+
24
+ twitter.info()
25
+
26
+ twitter.isna().sum()
27
+
28
+ twitter.dropna(subset=['clean_text','category'] , inplace=True)
29
+
30
+ twitter.isna().sum()
31
+
32
+ text = ''
33
+
34
+ for tweet in twitter[twitter['category'] == "positive"]['clean_text']:
35
+ text += f" {tweet}"
36
+
37
+ wordcloud = WordCloud(
38
+ width = 3000, height = 2000, background_color = 'black',
39
+ stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
40
+
41
+ fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
42
+
43
+ plt.imshow(wordcloud, interpolation= 'bilinear')
44
+ plt.axis('off')
45
+ plt.tight_layout(pad=0)
46
+ plt.show()
47
+
48
+ del text
49
+
50
+ text = ''
51
+
52
+ for tweet in twitter[twitter['category'] == "neutral"]['clean_text']:
53
+ text += f" {tweet}"
54
+
55
+ wordcloud = WordCloud(
56
+ width = 3000, height = 2000, background_color = 'black',
57
+ stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
58
+
59
+ fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
60
+
61
+ plt.imshow(wordcloud, interpolation= 'bilinear')
62
+ plt.axis('off')
63
+ plt.tight_layout(pad=0)
64
+ plt.show()
65
+
66
+ del text
67
+
68
+ text = ''
69
+
70
+ for tweet in twitter[twitter['category'] == "negative"]['clean_text']:
71
+ text += f" {tweet}"
72
+
73
+ wordcloud = WordCloud(
74
+ width = 3000, height = 2000, background_color = 'black',
75
+ stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
76
+
77
+ fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')
78
+
79
+ plt.imshow(wordcloud, interpolation= 'bilinear')
80
+ plt.axis('off')
81
+ plt.tight_layout(pad=0)
82
+ plt.show()
83
+
84
+ del text
85
+
86
+ print(twitter['category'].value_counts())
87
+
88
+ dist = twitter['category'].value_counts()
89
+ def distribution_plot(x, y, name):
90
+ plt.figure(figsize=(10, 6))
91
+ sns.barplot(x=x, y=y)
92
+ plt.title(name)
93
+ plt.show()
94
+
95
+ distribution_plot(x=dist.index, y=dist.values, name="Class Distribution Train")
96
+
97
+ pol = lambda x: TextBlob(x).sentiment.polarity
98
+ sub = lambda x: TextBlob(x).sentiment.subjectivity
99
+
100
+ twitter['polarity'] = twitter['clean_text'].apply(pol)
101
+ twitter['subjectivity'] = twitter['clean_text'].apply(sub)
102
+ twitter
103
+
104
+ # Plot Polarity
105
+
106
+ plt.figure(figsize=(10,6))
107
+ plt.hist(twitter['polarity'], bins=20, color='skyblue', edgecolor='black')
108
+ plt.title("Distribution of Polarity")
109
+ plt.xlabel("Polarity")
110
+ plt.ylabel("Frequency")
111
+ plt.grid(True)
112
+ plt.show()
113
+
114
+ # Plot Subjectivity
115
+
116
+ plt.figure(figsize=(10,6))
117
+ plt.hist(twitter['subjectivity'], bins=20, color='lightgreen', edgecolor='black')
118
+ plt.title("Distribution of Subjectivity")
119
+ plt.xlabel("Subjectivity")
120
+ plt.ylabel("Frequency")
121
+ plt.grid(True)
122
+ plt.show()
123
+
124
+ from sklearn.linear_model import LogisticRegression
125
+ from sklearn.model_selection import train_test_split
126
+ from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
127
+ from sklearn.feature_extraction.text import TfidfVectorizer
128
+
129
+ vectorizer = TfidfVectorizer(max_features=5000)
130
+
131
+ X = vectorizer.fit_transform(twitter['clean_text'])
132
+
133
+ y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})
134
+
135
+ X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)
136
+
137
+
138
+ lr = LogisticRegression(max_iter=1000)
139
+ lr.fit(X_train,y_train)
140
+ y_pred = lr.predict(X_test)
141
+
142
+ print("Accuracy:", accuracy_score(y_test, y_pred))
143
+ print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
144
+ print("Classification Report:\n", classification_report(y_test, y_pred))
145
+ print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
146
+
147
+ from sklearn.ensemble import RandomForestClassifier
148
+
149
+ classifier = RandomForestClassifier(n_estimators=100, random_state=42)
150
+ classifier.fit(X_train, y_train)
151
+
152
+ y_pred = classifier.predict(X_test)
153
+
154
+ print("Accuracy:", accuracy_score(y_test, y_pred))
155
+ print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
156
+ print("Classification Report:\n", classification_report(y_test, y_pred))
157
+ print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
158
+
159
+ from sklearn.svm import SVC
160
+
161
+ classifier = SVC(kernel='linear', random_state=42)
162
+ classifier.fit(X_train, y_train)
163
+
164
+ y_pred = classifier.predict(X_test)
165
+
166
+ print("Accuracy:", accuracy_score(y_test, y_pred))
167
+ print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
168
+ print("Classification Report:\n", classification_report(y_test, y_pred))
169
+ print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
170
+
171
+ from sklearn.ensemble import AdaBoostClassifier
172
+ from sklearn .tree import DecisionTreeClassifier
173
+
174
+ classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
175
+ classifier.fit(X_train, y_train)
176
+
177
+ y_pred = classifier.predict(X_test)
178
+
179
+ AdaBoostClassifier
180
+
181
+ # Importing necessary libraries
182
+
183
+ import numpy as np
184
+ import matplotlib.pyplot as plt
185
+ from sklearn.feature_extraction.text import TfidfVectorizer
186
+ from sklearn.model_selection import train_test_split
187
+ from sklearn.ensemble import AdaBoostClassifier
188
+ from sklearn .tree import DecisionTreeClassifier
189
+ from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
190
+
191
+
192
+ vectorizer = TfidfVectorizer(max_features=5000)
193
+ X = vectorizer.fit_transform(twitter['clean_text'])
194
+
195
+ # Encode target label (category) into numeric values
196
+ y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})
197
+
198
+ # Split the dataset into train and test sets
199
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
200
+
201
+
202
+
203
+ classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
204
+ classifier.fit(X_train, y_train)
205
+
206
+ # Predict probabilities on the test set
207
+ y_probs = classifier.predict_proba(X_test)
208
+
209
+ # Calculate ROC curve and AUC for each class
210
+ fpr = {}
211
+ tpr = {}
212
+ roc_auc = {}
213
+ num_classes =3 # Number of classes (negative , neutral, positive)
214
+
215
+
216
+ for i in range(num_classes):
217
+ fpr[i], tpr[i], _ =roc_curve(y_test == i, y_probs[:,i])
218
+
219
+
220
+ # Plot ROC curves
221
+ plt.figure()
222
+ for i in range (num_classes):
223
+ plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC = {roc_auc[i]:.2f})")
224
+
225
+ plt.plot([0,1], [0,1], 'k--') # Diagonal line
226
+ plt.xlim([0.0,1.0])
227
+ plt.ylim([0.0,1.05])
228
+ plt.xlabel("False Positive Rate")
229
+ plt.ylabel("True Positive Rate")
230
+ plt.title("ROC Curves for Multi-Class Classification")
231
+ plt.legend(loc='lower right')
232
+ plt.show()
233
+
234
+
235
+ # Evaluate the classifier
236
+ print("Accuracy:", accuracy_score(y_test, classifier.predict(X_test)))
237
+ print("F1 Score:", f1_score(y_test, classifier.predict(X_test), average = 'weighted'))
238
+ print("Classification Report:\n", classification_report(y_test, classifier.predict(X_test)))
239
+ print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test)))
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+ # Function to make predictions
250
+ def predict_sentiment(text):
251
+ if not text.strip():
252
+ return "Please enter some text."
253
+
254
+ text_vector = vectorizer.transform([text])
255
+ pred = classifier.predict(text_vector)[0]
256
+ sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
257
+ return sentiment_map[pred]
258
+
259
+ # Create Gradio UI
260
+ with gr.Blocks() as demo:
261
+ gr.Markdown("## Twitter Sentiment Analyzer")
262
+ gr.Markdown("Enter a tweet and get its predicted sentiment:")
263
+
264
+ with gr.Row():
265
+ input_text = gr.Textbox(lines=3, placeholder="Type your tweet here...", label="Tweet")
266
+
267
+ output = gr.Textbox(label="Predicted Sentiment")
268
+
269
+ analyze_btn = gr.Button("Analyze Sentiment")
270
+ analyze_btn.click(fn=predict_sentiment, inputs=input_text, outputs=output)
271
+
272
+ demo.launch()