Spaces:
Running
Running
| """ | |
| Pre-submission prompt validation to catch common issues before generation | |
| """ | |
| import re | |
| from typing import Dict, List, Tuple | |
| # Common public figure names (not exhaustive, just examples) | |
| COMMON_PUBLIC_FIGURES = [ | |
| # Politicians | |
| "donald trump", "joe biden", "barack obama", "kamala harris", | |
| "vladimir putin", "xi jinping", "narendra modi", "boris johnson", | |
| # Tech CEOs | |
| "elon musk", "jeff bezos", "mark zuckerberg", "bill gates", "steve jobs", | |
| "tim cook", "sundar pichai", "satya nadella", | |
| # Celebrities | |
| "taylor swift", "beyonce", "kim kardashian", "kanye west", | |
| "dwayne johnson", "tom cruise", "leonardo dicaprio", | |
| # Athletes | |
| "lebron james", "cristiano ronaldo", "lionel messi", "serena williams", | |
| "tiger woods", "michael jordan", | |
| ] | |
| # Copyrighted characters and brands | |
| COPYRIGHTED_TERMS = [ | |
| # Characters | |
| "spider-man", "spiderman", "batman", "superman", "iron man", | |
| "mickey mouse", "harry potter", "darth vader", | |
| # Brands | |
| "nike", "adidas", "apple", "google", "microsoft", "coca-cola", | |
| "pepsi", "mcdonalds", "starbucks", "amazon", | |
| ] | |
| def validate_prompt_content(text: str) -> Tuple[bool, List[str]]: | |
| """ | |
| Validate prompt content for potential content policy violations. | |
| Returns: | |
| Tuple of (is_valid, list_of_warnings) | |
| """ | |
| warnings = [] | |
| text_lower = text.lower() | |
| # Check for public figures | |
| found_figures = [name for name in COMMON_PUBLIC_FIGURES if name in text_lower] | |
| if found_figures: | |
| warnings.append( | |
| f"⚠️ Detected public figure(s): {', '.join(found_figures)}. " | |
| "Consider using generic descriptions instead (e.g., 'a business executive' instead of specific names)." | |
| ) | |
| # Check for copyrighted terms | |
| found_copyrighted = [term for term in COPYRIGHTED_TERMS if term in text_lower] | |
| if found_copyrighted: | |
| warnings.append( | |
| f"⚠️ Detected copyrighted term(s): {', '.join(found_copyrighted)}. " | |
| "Consider using generic alternatives to avoid content policy issues." | |
| ) | |
| # Check for potentially sensitive content | |
| sensitive_patterns = [ | |
| (r'\b(kill|murder|death|blood|violence)\b', "violent content"), | |
| (r'\b(naked|nude|sex|sexual)\b', "explicit content"), | |
| (r'\b(hate|racist|discriminat)\w*\b', "discriminatory language"), | |
| ] | |
| for pattern, content_type in sensitive_patterns: | |
| if re.search(pattern, text_lower): | |
| warnings.append(f"⚠️ Potentially sensitive {content_type} detected. Review for content policy compliance.") | |
| is_valid = len(warnings) == 0 | |
| return is_valid, warnings | |
| def sanitize_prompt_content(text: str) -> str: | |
| """ | |
| Automatically sanitize prompt content by replacing problematic terms. | |
| This is a basic implementation - the AI-powered fix is more sophisticated. | |
| Returns: | |
| Sanitized text | |
| """ | |
| sanitized = text | |
| # Replace common public figures with generic terms | |
| replacements = { | |
| # Politicians | |
| "donald trump": "a business executive", | |
| "joe biden": "a senior politician", | |
| "elon musk": "a tech entrepreneur", | |
| "jeff bezos": "a business mogul", | |
| "mark zuckerberg": "a tech founder", | |
| "bill gates": "a technology pioneer", | |
| # Celebrities | |
| "taylor swift": "a popular singer", | |
| "beyonce": "a renowned performer", | |
| "kim kardashian": "a media personality", | |
| # Athletes | |
| "lebron james": "a professional basketball player", | |
| "cristiano ronaldo": "a soccer star", | |
| "lionel messi": "a soccer champion", | |
| # Characters | |
| "spider-man": "a superhero", | |
| "spiderman": "a superhero", | |
| "batman": "a crime fighter", | |
| "superman": "a hero with superpowers", | |
| "harry potter": "a young wizard", | |
| # Brands | |
| "nike": "athletic", | |
| "adidas": "sportswear", | |
| "apple": "tech", | |
| "google": "a search engine", | |
| "starbucks": "a coffee shop", | |
| "mcdonalds": "a restaurant", | |
| } | |
| for term, replacement in replacements.items(): | |
| # Case-insensitive replacement | |
| pattern = re.compile(re.escape(term), re.IGNORECASE) | |
| sanitized = pattern.sub(replacement, sanitized) | |
| return sanitized | |
| def get_content_guidance() -> Dict[str, List[str]]: | |
| """ | |
| Get guidance on what to avoid in prompts. | |
| Returns: | |
| Dictionary of content categories and examples | |
| """ | |
| return { | |
| "avoid_public_figures": [ | |
| "Real politicians, celebrities, athletes, or public figures", | |
| "Use generic roles instead: 'a business executive', 'a singer', 'an athlete'", | |
| ], | |
| "avoid_copyrighted": [ | |
| "Trademarked characters (Spider-Man, Mickey Mouse, etc.)", | |
| "Brand names (Nike, Apple, Starbucks, etc.)", | |
| "Use generic alternatives: 'a superhero', 'athletic shoes', 'a coffee shop'", | |
| ], | |
| "avoid_sensitive": [ | |
| "Violence, gore, or disturbing imagery", | |
| "Explicit or sexual content", | |
| "Hate speech or discriminatory language", | |
| "Dangerous or illegal activities", | |
| ], | |
| "best_practices": [ | |
| "Use generic, descriptive language", | |
| "Focus on actions, emotions, and settings rather than specific identities", | |
| "Keep content family-friendly and brand-safe", | |
| "Test with shorter scripts first to validate content compliance", | |
| ], | |
| } | |