seek007 commited on
Commit
8432f36
·
verified ·
1 Parent(s): 7b48159

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -0
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """FA20-BCS-OO1 final app.ipynb
3
+
4
+ Automatically generated by Colab
5
+ """
6
+
7
+ # !pip install emoji gradio
8
+
9
+
10
+
11
+ import joblib, pickle, pandas as pd, numpy as np
12
+ import gradio as gr
13
+ from TweetNormalizer import normalizeTweet
14
+ import seaborn as sns
15
+ import matplotlib.pyplot as plt
16
+
17
+ from transformers import pipeline
18
+ # seek007/taskA-DeBERTa-bweet-1.2.5
19
+ # seek007/taskA-DeBERTa-large-1.0.0
20
+ # seek007/taskA-DeBERTa-bweet-1.1.0
21
+ pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')
22
+
23
+ # pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')
24
+
25
+
26
+
27
+ import numpy as np
28
+
29
+ def predict(text=None , fil=None):
30
+ # Preprocess the text
31
+ preprocessed_text = normalizeTweet(text)
32
+ sentiment =None
33
+ df=None
34
+ fig=None
35
+ if fil:
36
+ if fil.name.endswith('.csv'):
37
+ df = pd.read_csv(fil.name)
38
+ elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
39
+ df = pd.read_excel(fil.name)
40
+ else:
41
+ raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")
42
+
43
+ # df= df.sample(20)
44
+ lst = list(df.tweet)
45
+ m =[normalizeTweet(i) for i in lst]
46
+ # m = [truncate_string(i) for i in m]
47
+ d = pd.DataFrame(pipe.predict(m))
48
+ df['label'] = d['label']
49
+ # print(df.sample(5))
50
+ df.drop('sarcastic', axis=1, inplace=True)
51
+ # print(df.sample(5))
52
+
53
+ mapping = {
54
+ 'LABEL_0': 'non_sarcastic',
55
+ 'LABEL_1': 'sarcastic'
56
+ }
57
+
58
+ # df['label']=df['label'].map(mapping)
59
+ sarcastic_count = np.sum(df.label =='sarcastic')
60
+ non_sarcastic_count = np.sum(df.label =='non_sarcastic')
61
+
62
+ labels = ['Sarcastic', 'Non-Sarcastic']
63
+ sizes = [sarcastic_count, non_sarcastic_count]
64
+ colors = ['gold', 'lightblue']
65
+ explode = (0.1, 0) # explode 1st slice
66
+ sns.set_style("whitegrid")
67
+ fig, ax = plt.subplots()
68
+ ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140) #, colors=colors
69
+ ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
70
+
71
+ plt.title('Sarcastic vs Non-Sarcastic Tweets')
72
+
73
+ # fig = plt.figure() #figsize=(8, 6)
74
+ # sns.countplot(x='label', data=df, palette='viridis')
75
+ # plt.title('Result: Count Plot') # Add a title to the plot
76
+ # plt.xlabel('label') # Add label for the x-axis
77
+ # plt.ylabel('Count')
78
+ # Perform sentiment prediction
79
+ if text !="" or fil !=None:
80
+ prediction = pipe.predict([preprocessed_text])[0]
81
+ print(prediction)
82
+ # sentiment = {p['label']: p['score'] for p in prediction}
83
+ # sentiment['']
84
+ # print(sentiment)
85
+ sentiment = "Sarcastic" if (prediction['label'] == 'LABEL_1' or prediction['label'] =='sarcastic') else "Non Sarcastic"
86
+ if fil == None:
87
+ df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
88
+ else:
89
+ return "Either enter text or upload .csv or .xlsx file.!" , df, fig
90
+
91
+ return sentiment, df, fig
92
+
93
+
94
+
95
+
96
+
97
+ file_path =gr.File(label="Upload a File")
98
+ output = gr.Label(num_top_classes=2, label="Predicted Labels")
99
+ demo = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")
100
+
101
+ # demo.launch(debug=True)
102
+
103
+ file_path =gr.File(label="Upload a File")
104
+ label = gr.Label(num_top_classes=3, label="Top 3 Labels")
105
+ classification = gr.Interface(classify, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier")
106
+
107
+ # classification.launch(debug=True)
108
+
109
+ from transformers import pipeline
110
+ pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")
111
+
112
+ def classifyB(text=None , fil=None):
113
+ # Preprocess the text
114
+ preprocessed_text = normalizeTweet(text)
115
+ sentiment =None
116
+ df=None
117
+ fig=None
118
+ labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
119
+ if fil:
120
+ if fil.name.endswith('.csv'):
121
+ df = pd.read_csv(fil.name)
122
+ elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
123
+ df = pd.read_excel(fil.name)
124
+ else:
125
+ raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")
126
+
127
+ lst = list(df.tweet)
128
+ m =[normalizeTweet(i) for i in lst]
129
+ # m = [truncate_string(i) for i in m]
130
+ d = pipe2(m)
131
+
132
+ structured_data = []
133
+
134
+ # Iterate over the list of dictionaries and convert each to a structured dictionary
135
+ for item in d:
136
+ labels = item['label']
137
+ scores = item['score']
138
+ structured_data.append({ "label": labels, "score": scores})
139
+
140
+ # Convert the list of dictionaries to a DataFrame
141
+ df1 = pd.DataFrame(structured_data)
142
+ df = pd.concat([df, df1], axis=1)
143
+
144
+ # df["labels"] = d['labels']
145
+ # print("df: ",df.head())
146
+ # return df.head()
147
+
148
+
149
+ fig = plt.figure() #figsize=(8, 6)
150
+ sns.countplot(x='label', data=df, palette='viridis')
151
+ plt.title('Result: Count Plot') # Add a title to the plot
152
+ plt.xlabel('label') # Add label for the x-axis
153
+ plt.ylabel('Count')
154
+ # Perform sentiment prediction
155
+ if text !=None or fil !=None:
156
+ prediction = pipe2([preprocessed_text])[0]
157
+ print(prediction["label"])
158
+ labels = prediction['label']
159
+ scores = prediction['score']
160
+
161
+ # Combine labels and scores, and sort by score in descending order
162
+
163
+
164
+ # Extract top 3 labels and their scores
165
+
166
+ sentiment = labels
167
+
168
+
169
+ return sentiment, df, fig
170
+
171
+ file_path =gr.File(label="Upload a File")
172
+ label = gr.Label( label="Labels")
173
+ classificationB = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier",theme= 'dark')
174
+
175
+ main = gr.TabbedInterface([demo, classificationB],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )
176
+
177
+ main.launch(share=True)