| from transformers import Pipeline | |
| import tensorflow as tf | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import json | |
| import os | |
| def load_tokenizer(tokenizer_path): | |
| with open(tokenizer_path, 'r') as f: | |
| return json.load(f) | |
| class NewsClassificationPipeline(Pipeline): | |
| def __init__(self, model=None, tokenizer=None, **kwargs): | |
| super().__init__(**kwargs) | |
| model_path = os.path.join(os.path.dirname(__file__), 'news_classifier.h5') | |
| self.model = tf.keras.models.load_model(model_path) | |
| tokenizer_path = os.path.join(os.path.dirname(__file__), 'tokenizer.json') | |
| self.tokenizer_config = load_tokenizer(tokenizer_path) | |
| def __call__(self, texts, **kwargs): | |
| if isinstance(texts, str): | |
| texts = [texts] | |
| sequences = self.tokenizer.texts_to_sequences(texts) | |
| padded = pad_sequences(sequences, maxlen=128) | |
| predictions = self.model.predict(padded) | |
| results = [] | |
| for pred in predictions: | |
| label = "foxnews" if pred[0] > 0.5 else "nbc" | |
| score = float(pred[0] if label == "foxnews" else 1 - pred[0]) | |
| results.append({"label": label, "score": score}) | |
| return results[0] if isinstance(texts, str) else results |