Spaces:

seek007
/

external

Sleeping

App Files Files Community

seek007 commited on Jun 25, 2024

Commit

8432f36

verified ·

1 Parent(s): 7b48159

Create app.py

Browse files

Files changed (1) hide show

app.py +177 -0

app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+# -*- coding: utf-8 -*-
+"""FA20-BCS-OO1 final app.ipynb
+Automatically generated by Colab
+"""
+# !pip install emoji gradio
+import joblib, pickle, pandas as pd, numpy as np
+import gradio as gr
+from TweetNormalizer import normalizeTweet
+import seaborn as sns
+import matplotlib.pyplot as plt
+from transformers import pipeline
+#  seek007/taskA-DeBERTa-bweet-1.2.5
+# seek007/taskA-DeBERTa-large-1.0.0
+# seek007/taskA-DeBERTa-bweet-1.1.0
+pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')
+# pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')
+import numpy as np
+def predict(text=None , fil=None):
+    # Preprocess the text
+    preprocessed_text = normalizeTweet(text)
+    sentiment =None
+    df=None
+    fig=None
+    if fil:
+      if fil.name.endswith('.csv'):
+          df = pd.read_csv(fil.name)
+      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
+          df = pd.read_excel(fil.name)
+      else:
+          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")
+      # df= df.sample(20)
+      lst = list(df.tweet)
+      m =[normalizeTweet(i) for i in lst]
+      # m = [truncate_string(i) for i in m]
+      d = pd.DataFrame(pipe.predict(m))
+      df['label'] = d['label']
+      # print(df.sample(5))
+      df.drop('sarcastic', axis=1, inplace=True)
+      # print(df.sample(5))
+      mapping = {
+          'LABEL_0': 'non_sarcastic',
+          'LABEL_1': 'sarcastic'
+      }
+      # df['label']=df['label'].map(mapping)
+      sarcastic_count = np.sum(df.label =='sarcastic')
+      non_sarcastic_count = np.sum(df.label =='non_sarcastic')
+      labels = ['Sarcastic', 'Non-Sarcastic']
+      sizes = [sarcastic_count, non_sarcastic_count]
+      colors = ['gold', 'lightblue']
+      explode = (0.1, 0)  # explode 1st slice
+      sns.set_style("whitegrid")
+      fig, ax = plt.subplots()
+      ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140)  #, colors=colors
+      ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
+      plt.title('Sarcastic vs Non-Sarcastic Tweets')
+      # fig = plt.figure()  #figsize=(8, 6)
+      # sns.countplot(x='label', data=df, palette='viridis')
+      # plt.title('Result: Count Plot')  # Add a title to the plot
+      # plt.xlabel('label')  # Add label for the x-axis
+      # plt.ylabel('Count')
+     # Perform sentiment prediction
+    if text !="" or fil !=None:
+      prediction = pipe.predict([preprocessed_text])[0]
+      print(prediction)
+      # sentiment = {p['label']: p['score'] for p in prediction}
+      # sentiment['']
+      # print(sentiment)
+      sentiment = "Sarcastic" if (prediction['label'] == 'LABEL_1' or prediction['label'] =='sarcastic') else "Non Sarcastic"
+      if fil == None:
+        df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
+    else:
+      return "Either enter text or upload .csv or .xlsx file.!"  , df, fig
+    return sentiment, df, fig
+file_path =gr.File(label="Upload a File")
+output = gr.Label(num_top_classes=2, label="Predicted Labels")
+demo = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")
+# demo.launch(debug=True)
+file_path =gr.File(label="Upload a File")
+label = gr.Label(num_top_classes=3, label="Top 3 Labels")
+classification = gr.Interface(classify, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier")
+# classification.launch(debug=True)
+from transformers import pipeline
+pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")
+def classifyB(text=None , fil=None):
+    # Preprocess the text
+    preprocessed_text = normalizeTweet(text)
+    sentiment =None
+    df=None
+    fig=None
+    labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
+    if fil:
+      if fil.name.endswith('.csv'):
+          df = pd.read_csv(fil.name)
+      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
+          df = pd.read_excel(fil.name)
+      else:
+          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")
+      lst = list(df.tweet)
+      m =[normalizeTweet(i) for i in lst]
+      # m = [truncate_string(i) for i in m]
+      d = pipe2(m)
+      structured_data = []
+      # Iterate over the list of dictionaries and convert each to a structured dictionary
+      for item in d:
+          labels = item['label']
+          scores = item['score']
+          structured_data.append({ "label": labels, "score": scores})
+      # Convert the list of dictionaries to a DataFrame
+      df1 = pd.DataFrame(structured_data)
+      df = pd.concat([df, df1], axis=1)
+      # df["labels"] = d['labels']
+      # print("df: ",df.head())
+      # return df.head()
+      fig = plt.figure()  #figsize=(8, 6)
+      sns.countplot(x='label', data=df, palette='viridis')
+      plt.title('Result: Count Plot')  # Add a title to the plot
+      plt.xlabel('label')  # Add label for the x-axis
+      plt.ylabel('Count')
+     # Perform sentiment prediction
+    if text !=None or fil !=None:
+      prediction = pipe2([preprocessed_text])[0]
+      print(prediction["label"])
+      labels = prediction['label']
+      scores = prediction['score']
+      # Combine labels and scores, and sort by score in descending order
+      # Extract top 3 labels and their scores
+      sentiment = labels
+    return sentiment, df, fig
+file_path =gr.File(label="Upload a File")
+label = gr.Label( label="Labels")
+classificationB = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier",theme= 'dark')
+main = gr.TabbedInterface([demo, classificationB],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )
+main.launch(share=True)