Spaces:
Running
Running
| """ | |
| Content Moderation API | |
| Generated on: 2025-10-03 04:10:32 | |
| """ | |
| import joblib | |
| import json | |
| import re | |
| from typing import Dict, List, Tuple | |
| class ContentModeratorAPI: | |
| def __init__(self, model_path: str, config_path: str): | |
| # Load model | |
| self.model = joblib.load(model_path) | |
| # Load configuration | |
| with open(config_path, 'r') as f: | |
| config = json.load(f) | |
| self.blocked_words = config['blocked_words'] | |
| self.sensitivity_threshold = config['sensitivity_threshold'] | |
| self.model_type = config['embedding_type'] | |
| # Compile blocked word patterns | |
| self.patterns = [re.compile(r'' + re.escape(word) + r'', re.IGNORECASE) | |
| for word in self.blocked_words] | |
| def detect_blocked_words(self, text: str) -> List[str]: | |
| """Detect blocked words in text""" | |
| found_words = [] | |
| for word, pattern in zip(self.blocked_words, self.patterns): | |
| if pattern.search(text): | |
| found_words.append(word) | |
| return found_words | |
| def censor_text(self, text: str, replacement: str = "***") -> str: | |
| """Censor inappropriate content""" | |
| censored = text | |
| for word, pattern in zip(self.blocked_words, self.patterns): | |
| censored = pattern.sub(replacement, censored) | |
| return censored | |
| def moderate(self, text: str) -> Dict: | |
| """Moderate a single text""" | |
| # Rule-based detection | |
| blocked_words = self.detect_blocked_words(text) | |
| rule_inappropriate = len(blocked_words) > 0 | |
| # ML-based detection | |
| ml_confidence = 0.0 | |
| ml_inappropriate = False | |
| if self.model_type == 'tfidf': | |
| # This would need the vectorizer loaded separately | |
| pass | |
| elif self.model_type == 'sentence_transformer': | |
| # This would need the embedder loaded separately | |
| pass | |
| # Combine predictions | |
| is_inappropriate = rule_inappropriate or ml_inappropriate | |
| censored_text = self.censor_text(text) if is_inappropriate else text | |
| return {"text": text, "is_inappropriate": is_inappropriate, | |
| "blocked_words": blocked_words, "censored_text": censored_text, | |
| "confidence": ml_confidence} | |
| def moderate_batch(self, texts: List[str]) -> List[Dict]: | |
| """Moderate a batch of texts""" | |
| return [self.moderate(text) for text in texts] | |
| # Usage example: | |
| # moderator = ContentModeratorAPI('models/model.pkl', 'models/config.json') | |
| # result = moderator.moderate("Test text") | |
| # print(result) | |