| from transformers import Pipeline |
| from tensorflow.keras.models import load_model |
| from tensorflow.keras.preprocessing.text import tokenizer_from_json |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
| import numpy as np |
| import json |
|
|
| class NewsClassifierPipeline(Pipeline): |
| def __init__(self): |
| super().__init__() |
| |
| self.model = load_model('./news_classifier.h5') |
| with open('./tokenizer.json', 'r') as f: |
| tokenizer_data = json.load(f) |
| self.tokenizer = tokenizer_from_json(tokenizer_data) |
|
|
| def preprocess(self, inputs): |
| """Tokenizes and pads the input text.""" |
| sequences = self.tokenizer.texts_to_sequences([inputs]) |
| padded = pad_sequences(sequences, maxlen=128) |
| return padded |
|
|
| def _forward(self, inputs): |
| """Runs the model prediction.""" |
| processed = self.preprocess(inputs) |
| predictions = self.model.predict(processed) |
| scores = predictions[0] |
| label = "foxnews" if scores[0] > 0.5 else "nbc" |
| return [{"label": label, "score": float(scores[0] if label == "foxnews" else 1 - scores[0])}] |
|
|
| def postprocess(self, model_outputs): |
| """Returns the processed output.""" |
| return model_outputs |
|
|