Spaces:

HMPhuoc
/

toxic_detect

Sleeping

File size: 5,358 Bytes

d32c4be
 
 
 
 
 
 
 
 
dbcd187
 
d32c4be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ce5bd5
d32c4be
 
 
 
 
 
5ce5bd5
d32c4be
 
 
 
5ce5bd5
d32c4be
 
 
b5d5483
 
 
 
 
 
ae10921
b5d5483
 
 
 
 
d32c4be
 
 
 
 
 
 
 
 
b5d5483
 
d32c4be
 
 
b5d5483
 
5ce5bd5
b5d5483
 
 
d32c4be
 
7c202ad
 
 
 
 
 
 
 
 
d32c4be
 
 
 
7c202ad
 
 
 
 
 
 
 
 
d32c4be
 
b5d5483
 
 
d32c4be
b5d5483
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14d2c3a
d32c4be
 
 
 
 
 
 
 
 
 
14d2c3a
d32c4be

import gradio as gr
import tensorflow as tf
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pathlib import Path
import pandas as pd
import plotly.express as px

#Load tokenizer
fp = Path(__file__).with_name('tokenizer.pkl')
with open(fp,mode="rb") as f:
    tokenizer = pickle.load(f)

#Load LSTM
fp = Path(__file__).with_name('lstm_model.h5')
LSTM_model = tf.keras.models.load_model(fp, compile=True)

#Load GRU
fp = Path(__file__).with_name('gru_model.h5')
GRU_model = load_model(fp)


def tokenizer_pad(tokenizer,comment_text,max_length=200):
   
    comment_text = [comment_text]
    tokenized_text = tokenizer.texts_to_sequences(comment_text)

    padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")

    return padded_sequences

def LSTM_predict(x):
    x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)

    pred_proba = LSTM_model.predict(x)[0]

    pred_proba = [round(i,2) for i in pred_proba]

    #print(pred_proba)

    return pred_proba

def GRU_predict(x):
    x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)

    
    pred_proba = GRU_model.predict(x)[0]

    pred_proba = [round(i,2) for i in pred_proba]

    #print(pred_proba)

    return pred_proba

def plot(result):
  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  data = pd.DataFrame()
  data['Nhãn'] = label
  data['Điểm'] = result

  #print(data)

  p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
  return p
  pass

def judge(x):

  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  result = []
  judge_result = []

  lstm_pred = LSTM_predict(x)
  gru_pred = GRU_predict(x)

  #print(result)
  
  return_result = 'Result'
  result_lstm = np.round(lstm_pred, 2)
  result_gru = np.round(gru_pred, 2)
  for i in range(6):
    result.append((result_lstm[i]+result_gru[i])/2)
  
  final_result = np.round(result, 2)

  #print(final_result)
  return_result += '\nMô hình LSTM\n'
  return_result += f"{result_lstm}\n"
  # for i in range(6):
  #   if result_lstm[i]>=0 and result_lstm[i]<0.1:
  #     return_result += "Tính {} là không có\n".format(label[i])
  #   if result_lstm[i]>=0.1 and result_lstm[i]<0.5:
  #     return_result += "Tính {} ở mức không rõ ràng, không thể xác định chính xác\n".format(label[i])
  #   if result_lstm[i]>=0.5 and result_lstm[i]<0.8:
  #     return_result += "Tính {} ở mức rõ ràng, cần xem xét\n".format(label[i])
  #   if result_lstm[i]>=0.8:
  #     return_result += "Tính {} ở mức nghiêm trọng, yêu cầu chấn chỉnh\n".format(label[i])


  return_result += '\nMô hình GRU\n'
  return_result += f"{result_gru}\n"
  # for i in range(6):
    # if result_gru[i]>=0 and result_gru[i]<0.1:
    #   return_result += "Tính {} là không có\n".format(label[i])
    # if result_gru[i]>=0.1 and result_gru[i]<0.5:
    #   return_result += "Tính {} ở mức không rõ ràng, không thể xác định chính xác\n".format(label[i])
    # if result_gru[i]>=0.5 and result_gru[i]<0.8:
    #   return_result += "Tính {} ở mức rõ ràng, cần xem xét\n".format(label[i])
    # if result_gru[i]>=0.8:
    #   return_result += "Tính {} ở mức nghiêm trọng, yêu cầu chấn chỉnh\n".format(label[i])


  another_result = ''
  another_result += "\nTổng quan kết quả trung bình:\n"
  another_result += f"{final_result}\n"

  for i in range(6):
    if final_result[i]>=0 and final_result[i]<0.1:
      another_result += "Tính {} là không có\n".format(label[i])
    if final_result[i]>=0.1 and final_result[i]<0.5:
      another_result += "Tính {} ở mức không rõ ràng, không thể xác định chính xác\n".format(label[i])
    if final_result[i]>=0.5 and final_result[i]<0.8:
      another_result += "Tính {} ở mức rõ ràng, cần xem xét\n".format(label[i])
    if final_result[i]>=0.8:
      another_result += "Tính {} ở mức nghiêm trọng, yêu cầu chấn chỉnh\n".format(label[i])

  another_result += "\nKết luận:\n"
  if max(final_result)>=0 and max(final_result)<0.1:
    another_result += "Ngôn ngữ phù hợp mọi lứa tuổi.\n"
  if max(final_result)>=0.1 and max(final_result)<0.5:
    another_result += "Ngôn ngữ cần được kiểm tra lại.\n"
  if max(final_result)>=0.5 and max(final_result)<0.8:
    another_result += "Ngôn ngữ không phù hợp, cần xem xét lại.\n"
  if max(final_result)>=0.8:
    another_result += "Ngôn ngữ vi phạm tiêu chuẩn cộng đồng nghiêm trọng, yêu cầu chấn chỉnh.\n"

  p = plot(final_result)
  return (return_result, p, another_result)

if __name__ == "__main__":
  #  print("Loading")
  #  while(True):
  #   string = input("\nMời nhập văn bản: ")
  #   os.system('cls')
  #   print(f"Văn bản đã nhập: {string}")
  #   judge(string)
  interface = gr.Interface(fn=judge,
                         inputs=gr.Textbox(lines=2, placeholder='Please write something', label="Input Text"),
                        outputs=['text','plot','text'])
  interface.launch()