""" Content Moderation API Generated on: 2025-10-03 07:26:24 """ import joblib import json import re from typing import Dict, List, Tuple class ContentModeratorAPI: def __init__(self, model_path: str, config_path: str): # Load model self.model = joblib.load(model_path) # Load configuration with open(config_path, 'r') as f: config = json.load(f) self.blocked_words = config['blocked_words'] self.sensitivity_threshold = config['sensitivity_threshold'] self.model_type = config['embedding_type'] # Compile blocked word patterns self.patterns = [re.compile(r'' + re.escape(word) + r'', re.IGNORECASE) for word in self.blocked_words] def detect_blocked_words(self, text: str) -> List[str]: """Detect blocked words in text""" found_words = [] for word, pattern in zip(self.blocked_words, self.patterns): if pattern.search(text): found_words.append(word) return found_words def censor_text(self, text: str, replacement: str = "***") -> str: """Censor inappropriate content""" censored = text for word, pattern in zip(self.blocked_words, self.patterns): censored = pattern.sub(replacement, censored) return censored def moderate(self, text: str) -> Dict: """Moderate a single text""" # Rule-based detection blocked_words = self.detect_blocked_words(text) rule_inappropriate = len(blocked_words) > 0 # ML-based detection ml_confidence = 0.0 ml_inappropriate = False if self.model_type == 'tfidf': # This would need the vectorizer loaded separately pass elif self.model_type == 'sentence_transformer': # This would need the embedder loaded separately pass # Combine predictions is_inappropriate = rule_inappropriate or ml_inappropriate censored_text = self.censor_text(text) if is_inappropriate else text return {"text": text, "is_inappropriate": is_inappropriate, "blocked_words": blocked_words, "censored_text": censored_text, "confidence": ml_confidence} def moderate_batch(self, texts: List[str]) -> List[Dict]: """Moderate a batch of texts""" return [self.moderate(text) for text in texts] # Usage example: # moderator = ContentModeratorAPI('models/model.pkl', 'models/config.json') # result = moderator.moderate("Test text") # print(result)