seek007 commited on
Commit
5c77412
·
verified ·
1 Parent(s): 6d8b322

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -36
app.py CHANGED
@@ -1,47 +1,52 @@
1
  # -*- coding: utf-8 -*-
2
- """FA20-BCS-OO1 final app.ipynb
3
-
4
- Automatically generated by Colab
5
  """
6
 
7
- # !pip install emoji gradio
 
8
 
9
 
 
 
10
 
11
- import joblib, pickle, pandas as pd, numpy as np
 
12
  import gradio as gr
13
  from TweetNormalizer import normalizeTweet
14
  import seaborn as sns
15
  import matplotlib.pyplot as plt
16
-
17
  from transformers import pipeline
18
- # seek007/taskA-DeBERTa-bweet-1.2.5
19
- # seek007/taskA-DeBERTa-large-1.0.0
20
- # seek007/taskA-DeBERTa-bweet-1.1.0
 
21
  pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')
22
 
23
  # pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')
24
 
25
 
26
 
27
- import numpy as np
28
-
29
  def predict(text=None , fil=None):
30
- # Preprocess the text
31
- preprocessed_text = normalizeTweet(text)
32
  sentiment =None
33
  df=None
34
  fig=None
 
 
 
 
 
 
 
35
  if fil:
36
  if fil.name.endswith('.csv'):
37
- df = pd.read_csv(fil.name, header=None)
38
  elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
39
- df = pd.read_excel(fil.name, header=None)
40
  else:
41
  raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")
42
 
43
 
44
- lst = list(df[0])
45
  m =[normalizeTweet(i) for i in lst]
46
 
47
  d = pd.DataFrame(pipe.predict(m))
@@ -49,7 +54,7 @@ def predict(text=None , fil=None):
49
 
50
 
51
 
52
- sarcastic_count = np.sum(df.label =='sarcastic')
53
  non_sarcastic_count = np.sum(df.label =='non_sarcastic')
54
 
55
  labels = ['Sarcastic', 'Non-Sarcastic']
@@ -62,17 +67,17 @@ def predict(text=None , fil=None):
62
  ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
63
 
64
  plt.title('Sarcastic vs Non-Sarcastic Tweets')
65
-
 
66
 
67
- if text !="" or fil !=None:
68
  prediction = pipe.predict([preprocessed_text])[0]
69
  print(prediction)
70
 
71
- sentiment = "Sarcastic" if (prediction['label'] == 'LABEL_1' or prediction['label'] =='sarcastic') else "Non Sarcastic"
72
  if fil == None:
73
  df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
74
- else:
75
- return "Either enter text or upload .csv or .xlsx file.!" , df, fig
76
 
77
  return sentiment, df, fig
78
 
@@ -82,32 +87,40 @@ def predict(text=None , fil=None):
82
 
83
  file_path =gr.File(label="Upload a File")
84
  output = gr.Label(num_top_classes=2, label="Predicted Labels")
85
- demo = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")
86
 
87
  # demo.launch(debug=True)
88
 
89
 
90
- from transformers import pipeline
91
  pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")
92
 
 
 
93
  def classifyB(text=None , fil=None):
 
 
 
 
 
 
 
 
94
  # Preprocess the text
95
  preprocessed_text = normalizeTweet(text)
96
- sentiment =None
97
- df=None
98
- fig=None
99
 
100
  labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
101
 
102
  if fil:
103
  if fil.name.endswith('.csv'):
104
- df = pd.read_csv(fil.name, header=None)
105
  elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
106
- df = pd.read_excel(fil.name, header=None)
107
  else:
108
  raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")
109
 
110
- lst = list(df[0])
111
  m =[normalizeTweet(i) for i in lst]
112
  d = pipe2(m)
113
 
@@ -129,25 +142,27 @@ def classifyB(text=None , fil=None):
129
  plt.title('Result: Count Plot') # Add a title to the plot
130
  plt.xlabel('label') # Add label for the x-axis
131
  plt.ylabel('Count')
132
- if text == None:
133
  sentiment = df['label'][0]
134
 
135
  # Perform sentiment prediction
136
- if text != None:
137
  prediction = pipe2([preprocessed_text])[0]
138
  # print(prediction["label"])
139
  labels = prediction['label']
140
- # scores = prediction['score']
141
  sentiment = labels
142
-
 
 
143
  return sentiment, df, fig
144
 
145
 
146
 
147
  file_path =gr.File(label="Upload a File")
148
  label = gr.Label( label="Labels")
149
- classificationB = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier") #,theme= 'darkhuggingface'
150
 
151
- main = gr.TabbedInterface([demo, classificationB],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )
152
 
153
  main.launch(share=True)
 
1
  # -*- coding: utf-8 -*-
 
 
 
2
  """
3
 
4
+ Developed by Abdul S.
5
+ FA20-BCS-OO1 final app.ipynb
6
 
7
 
8
+ Automatically generated by Colab
9
+ """
10
 
11
+ import pandas as pd
12
+ import numpy as np
13
  import gradio as gr
14
  from TweetNormalizer import normalizeTweet
15
  import seaborn as sns
16
  import matplotlib.pyplot as plt
 
17
  from transformers import pipeline
18
+
19
+ # Set pandas display option to show only 2 decimal places
20
+ pd.set_option('display.float_format', '{:.2f}'.format)
21
+
22
  pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')
23
 
24
  # pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')
25
 
26
 
27
 
28
+ #
 
29
  def predict(text=None , fil=None):
 
 
30
  sentiment =None
31
  df=None
32
  fig=None
33
+
34
+ if text == None and fil == None:
35
+ return "Either enter text or upload .csv or .xlsx file.!" , df, fig
36
+
37
+ # Preprocess the text
38
+ preprocessed_text = normalizeTweet(text)
39
+
40
  if fil:
41
  if fil.name.endswith('.csv'):
42
+ df = pd.read_csv(fil.name, header=None , names=['tweet'], usecols=[0])
43
  elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
44
+ df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0])
45
  else:
46
  raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")
47
 
48
 
49
+ lst = list(df.tweet)
50
  m =[normalizeTweet(i) for i in lst]
51
 
52
  d = pd.DataFrame(pipe.predict(m))
 
54
 
55
 
56
 
57
+ sarcastic_count = np.sum(df.label == 'sarcastic')
58
  non_sarcastic_count = np.sum(df.label =='non_sarcastic')
59
 
60
  labels = ['Sarcastic', 'Non-Sarcastic']
 
67
  ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
68
 
69
  plt.title('Sarcastic vs Non-Sarcastic Tweets')
70
+ if text == None:
71
+ sentiment = df['label'][0]
72
 
73
+ if text != "":
74
  prediction = pipe.predict([preprocessed_text])[0]
75
  print(prediction)
76
 
77
+ sentiment = "Sarcastic" if prediction['label'] == 'sarcastic' else "Non Sarcastic"
78
  if fil == None:
79
  df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
80
+
 
81
 
82
  return sentiment, df, fig
83
 
 
87
 
88
  file_path =gr.File(label="Upload a File")
89
  output = gr.Label(num_top_classes=2, label="Predicted Labels")
90
+ detector = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")
91
 
92
  # demo.launch(debug=True)
93
 
94
 
95
+ # load classifier pipeline
96
  pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")
97
 
98
+
99
+ # classifier
100
  def classifyB(text=None , fil=None):
101
+ sentiment = None
102
+ df = None
103
+ fig = None
104
+
105
+ if text is None and fil is None:
106
+ return "Either enter text or upload .csv or .xlsx file.!" , df, fig
107
+
108
+
109
  # Preprocess the text
110
  preprocessed_text = normalizeTweet(text)
111
+
 
 
112
 
113
  labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
114
 
115
  if fil:
116
  if fil.name.endswith('.csv'):
117
+ df = pd.read_csv(fil.name, header=None, names=['tweet'], usecols=[0])
118
  elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
119
+ df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0])
120
  else:
121
  raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")
122
 
123
+ lst = list(df['tweet'])
124
  m =[normalizeTweet(i) for i in lst]
125
  d = pipe2(m)
126
 
 
142
  plt.title('Result: Count Plot') # Add a title to the plot
143
  plt.xlabel('label') # Add label for the x-axis
144
  plt.ylabel('Count')
145
+ if text is None:
146
  sentiment = df['label'][0]
147
 
148
  # Perform sentiment prediction
149
+ if text:
150
  prediction = pipe2([preprocessed_text])[0]
151
  # print(prediction["label"])
152
  labels = prediction['label']
153
+ scores = prediction['score']
154
  sentiment = labels
155
+ if fil is None:
156
+ df= pd.DataFrame([{'tweet':text, 'label':sentiment, "score": scores}])
157
+
158
  return sentiment, df, fig
159
 
160
 
161
 
162
  file_path =gr.File(label="Upload a File")
163
  label = gr.Label( label="Labels")
164
+ classifier = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier") #,theme= 'darkhuggingface'
165
 
166
+ main = gr.TabbedInterface([detector, classifier],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )
167
 
168
  main.launch(share=True)