NightPrince commited on
Commit
eb0dc26
·
verified ·
1 Parent(s): 175924e

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +21 -18
pipeline.py CHANGED
@@ -1,33 +1,36 @@
1
  import numpy as np
 
2
  from tensorflow.keras.preprocessing.sequence import pad_sequences
3
  from tensorflow.keras.preprocessing.text import tokenizer_from_json
4
- import tensorflow as tf
5
  import json
6
  import os
7
 
8
- class ToxicPipeline:
9
- def __init__(self, model, tokenizer_path="tokenizer.json", max_len=150, label_map=None):
10
- self.model = model
11
- with open(tokenizer_path, "r", encoding="utf-8") as f:
12
  tokenizer_json = f.read()
13
  self.tokenizer = tokenizer_from_json(tokenizer_json)
14
- self.max_len = max_len
15
- self.label_map = label_map
 
 
16
 
17
- def __call__(self, text, image_desc):
 
 
 
 
 
 
 
 
 
18
  input_text = text + " " + image_desc
19
  seq = self.tokenizer.texts_to_sequences([input_text])
20
  padded = pad_sequences(seq, maxlen=self.max_len, padding='post', truncating='post')
21
  pred_probs = self.model.predict(padded)
22
  pred_label = int(np.argmax(pred_probs, axis=1)[0])
23
  if self.label_map:
24
- return self.label_map.get(pred_label, pred_label)
25
- return pred_label
26
-
27
- # Example usage (for README):
28
- # from huggingface_hub import from_pretrained_keras
29
- # from pipeline import ToxicPipeline
30
- # model = from_pretrained_keras("NightPrince/Toxic_Classification")
31
- # pipeline = ToxicPipeline(model, tokenizer_path="data/tokenizer.json", label_map={0: "toxic", 1: "not toxic", ...})
32
- # result = pipeline("This is a dangerous post", "Knife shown in the image")
33
- # print(result)
 
1
  import numpy as np
2
+ import tensorflow as tf
3
  from tensorflow.keras.preprocessing.sequence import pad_sequences
4
  from tensorflow.keras.preprocessing.text import tokenizer_from_json
 
5
  import json
6
  import os
7
 
8
+ class Pipeline:
9
+ def __init__(self):
10
+ # Load tokenizer
11
+ with open("tokenizer.json", "r", encoding="utf-8") as f:
12
  tokenizer_json = f.read()
13
  self.tokenizer = tokenizer_from_json(tokenizer_json)
14
+ self.max_len = 150
15
+
16
+ # Load model (SavedModel format)
17
+ self.model = tf.keras.models.load_model(".")
18
 
19
+ # Optionally, load label map if you have one
20
+ self.label_map = None
21
+ if os.path.exists("label_map.json"):
22
+ with open("label_map.json", "r", encoding="utf-8") as f:
23
+ self.label_map = json.load(f)
24
+
25
+ def __call__(self, inputs):
26
+ # Accepts a dict with keys 'text' and 'image_desc'
27
+ text = inputs.get("text", "")
28
+ image_desc = inputs.get("image_desc", "")
29
  input_text = text + " " + image_desc
30
  seq = self.tokenizer.texts_to_sequences([input_text])
31
  padded = pad_sequences(seq, maxlen=self.max_len, padding='post', truncating='post')
32
  pred_probs = self.model.predict(padded)
33
  pred_label = int(np.argmax(pred_probs, axis=1)[0])
34
  if self.label_map:
35
+ return {"label": self.label_map.get(str(pred_label), pred_label), "score": float(np.max(pred_probs))}
36
+ return {"label": pred_label, "score": float(np.max(pred_probs))}