File size: 1,568 Bytes
ac47f8e
 
 
 
 
eefec60
 
 
4e92e71
 
 
 
 
05f7045
4e92e71
ed80502
ac47f8e
 
 
 
 
 
 
 
 
 
 
 
 
 
4e92e71
ac47f8e
 
 
 
 
 
 
 
 
4e92e71
ac47f8e
 
 
 
 
 
 
 
a876f7e
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import keras
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import json

# Load tokenizer
tokenizer_path = "tokenizer.json"
with open(tokenizer_path, "r", encoding="utf-8") as f:
    tokenizer_json = f.read()
    tokenizer = tokenizer_from_json(tokenizer_json)
model_path = "toxic_classifier.h5"
# Load model from Hugging Face Hub
model = tf.keras.models.load_model(model_path)

# Label map
label_map = {
    0: "Child Sexual Exploitation",
    1: "Elections",
    2: "Non-Violent Crimes",
    3: "Safe",
    4: "Sex-Related Crimes",
    5: "Suicide & Self-Harm",
    6: "Unknown S-Type",
    7: "Violent Crimes",
    8: "Unsafe"
}

# Inference function
def classify_toxic(query, image_desc):
    max_len = 150
    text = query + " " + image_desc
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
    pred = model.predict(pad)
    pred_label = np.argmax(pred, axis=1)[0]
    return label_map.get(pred_label, "Unknown")

# Gradio interface
iface = gr.Interface(
    fn=classify_toxic,
    inputs=[
        gr.Textbox(label="Query"),
        gr.Textbox(label="Image Description")
    ],
    outputs=gr.Textbox(label="Predicted Toxic Category"),
    title="Toxic Category Classifier",
    description="Enter a query and image description to classify the prompt into one of the toxic categories"
)

if __name__ == "__main__":
    iface.launch()