Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, TextClassificationPipeline | |
| HF_MODEL = os.getenv("HF_MODEL", "IMSyPP/hate_speech_multilingual") | |
| # Label order follows the model card from the original model. | |
| LABEL_NAMES = { | |
| "LABEL_0": "Appropriate", | |
| "LABEL_1": "Inappropriate", | |
| "LABEL_2": "Offensive", | |
| "LABEL_3": "Violent", | |
| } | |
| _pipeline = None | |
| def _get_pipeline(): | |
| global _pipeline | |
| if _pipeline is not None: | |
| return _pipeline | |
| tokenizer = AutoTokenizer.from_pretrained(HF_MODEL) | |
| model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL) | |
| # Transformers pipeline uses -1 for CPU, >=0 for CUDA GPU index. | |
| device = 0 if torch.cuda.is_available() else -1 | |
| _pipeline = TextClassificationPipeline( | |
| model=model, | |
| tokenizer=tokenizer, | |
| top_k=None, | |
| task="sentiment_analysis", | |
| function_to_apply="softmax", | |
| device=device, | |
| ) | |
| return _pipeline | |
| def _normalize_scores(raw_output): | |
| """ | |
| Normalize Hugging Face inference output into a sorted list: | |
| [{"label": str, "display_label": str, "score": float}, ...] | |
| """ | |
| if not isinstance(raw_output, list) or not raw_output: | |
| raise ValueError("Unexpected model response format") | |
| # HF text-classification can return either: | |
| # - [{"label": "...", "score": ...}] or | |
| # - [[{"label": "...", "score": ...}, ...]] with return_all_scores=True | |
| if isinstance(raw_output[0], list): | |
| scores = raw_output[0] | |
| else: | |
| scores = raw_output | |
| normalized = [] | |
| for item in scores: | |
| if not isinstance(item, dict): | |
| continue | |
| label = item.get("label") | |
| score = float(item.get("score", 0.0)) | |
| if not label: | |
| continue | |
| normalized.append( | |
| { | |
| "label": label, | |
| "display_label": LABEL_NAMES.get(label, label.replace("_", " ").title()), | |
| "score": score, | |
| } | |
| ) | |
| if not normalized: | |
| raise ValueError("Model returned no class scores") | |
| return sorted(normalized, key=lambda x: x["score"], reverse=True) | |
| def predict(text): | |
| try: | |
| classifier = _get_pipeline() | |
| response_data = classifier(text) | |
| except Exception as err: | |
| raise RuntimeError(f"Local model inference failed: {err}") from err | |
| classes = _normalize_scores(response_data) | |
| top = classes[0] | |
| return { | |
| "model": HF_MODEL, | |
| "label": top["label"], | |
| "display_label": top["display_label"], | |
| "probability": top["score"], | |
| "classes": classes, | |
| } |