Video_AdGenesis_App / api /prompt_validator.py
sushilideaclan01's picture
Enhance prompt validation and safety features
82a1419
"""
Pre-submission prompt validation to catch common issues before generation
"""
import re
from typing import Dict, List, Tuple
# Common public figure names (not exhaustive, just examples)
COMMON_PUBLIC_FIGURES = [
# Politicians
"donald trump", "joe biden", "barack obama", "kamala harris",
"vladimir putin", "xi jinping", "narendra modi", "boris johnson",
# Tech CEOs
"elon musk", "jeff bezos", "mark zuckerberg", "bill gates", "steve jobs",
"tim cook", "sundar pichai", "satya nadella",
# Celebrities
"taylor swift", "beyonce", "kim kardashian", "kanye west",
"dwayne johnson", "tom cruise", "leonardo dicaprio",
# Athletes
"lebron james", "cristiano ronaldo", "lionel messi", "serena williams",
"tiger woods", "michael jordan",
]
# Copyrighted characters and brands
COPYRIGHTED_TERMS = [
# Characters
"spider-man", "spiderman", "batman", "superman", "iron man",
"mickey mouse", "harry potter", "darth vader",
# Brands
"nike", "adidas", "apple", "google", "microsoft", "coca-cola",
"pepsi", "mcdonalds", "starbucks", "amazon",
]
def validate_prompt_content(text: str) -> Tuple[bool, List[str]]:
"""
Validate prompt content for potential content policy violations.
Returns:
Tuple of (is_valid, list_of_warnings)
"""
warnings = []
text_lower = text.lower()
# Check for public figures
found_figures = [name for name in COMMON_PUBLIC_FIGURES if name in text_lower]
if found_figures:
warnings.append(
f"⚠️ Detected public figure(s): {', '.join(found_figures)}. "
"Consider using generic descriptions instead (e.g., 'a business executive' instead of specific names)."
)
# Check for copyrighted terms
found_copyrighted = [term for term in COPYRIGHTED_TERMS if term in text_lower]
if found_copyrighted:
warnings.append(
f"⚠️ Detected copyrighted term(s): {', '.join(found_copyrighted)}. "
"Consider using generic alternatives to avoid content policy issues."
)
# Check for potentially sensitive content
sensitive_patterns = [
(r'\b(kill|murder|death|blood|violence)\b', "violent content"),
(r'\b(naked|nude|sex|sexual)\b', "explicit content"),
(r'\b(hate|racist|discriminat)\w*\b', "discriminatory language"),
]
for pattern, content_type in sensitive_patterns:
if re.search(pattern, text_lower):
warnings.append(f"⚠️ Potentially sensitive {content_type} detected. Review for content policy compliance.")
is_valid = len(warnings) == 0
return is_valid, warnings
def sanitize_prompt_content(text: str) -> str:
"""
Automatically sanitize prompt content by replacing problematic terms.
This is a basic implementation - the AI-powered fix is more sophisticated.
Returns:
Sanitized text
"""
sanitized = text
# Replace common public figures with generic terms
replacements = {
# Politicians
"donald trump": "a business executive",
"joe biden": "a senior politician",
"elon musk": "a tech entrepreneur",
"jeff bezos": "a business mogul",
"mark zuckerberg": "a tech founder",
"bill gates": "a technology pioneer",
# Celebrities
"taylor swift": "a popular singer",
"beyonce": "a renowned performer",
"kim kardashian": "a media personality",
# Athletes
"lebron james": "a professional basketball player",
"cristiano ronaldo": "a soccer star",
"lionel messi": "a soccer champion",
# Characters
"spider-man": "a superhero",
"spiderman": "a superhero",
"batman": "a crime fighter",
"superman": "a hero with superpowers",
"harry potter": "a young wizard",
# Brands
"nike": "athletic",
"adidas": "sportswear",
"apple": "tech",
"google": "a search engine",
"starbucks": "a coffee shop",
"mcdonalds": "a restaurant",
}
for term, replacement in replacements.items():
# Case-insensitive replacement
pattern = re.compile(re.escape(term), re.IGNORECASE)
sanitized = pattern.sub(replacement, sanitized)
return sanitized
def get_content_guidance() -> Dict[str, List[str]]:
"""
Get guidance on what to avoid in prompts.
Returns:
Dictionary of content categories and examples
"""
return {
"avoid_public_figures": [
"Real politicians, celebrities, athletes, or public figures",
"Use generic roles instead: 'a business executive', 'a singer', 'an athlete'",
],
"avoid_copyrighted": [
"Trademarked characters (Spider-Man, Mickey Mouse, etc.)",
"Brand names (Nike, Apple, Starbucks, etc.)",
"Use generic alternatives: 'a superhero', 'athletic shoes', 'a coffee shop'",
],
"avoid_sensitive": [
"Violence, gore, or disturbing imagery",
"Explicit or sexual content",
"Hate speech or discriminatory language",
"Dangerous or illegal activities",
],
"best_practices": [
"Use generic, descriptive language",
"Focus on actions, emotions, and settings rather than specific identities",
"Keep content family-friendly and brand-safe",
"Test with shorter scripts first to validate content compliance",
],
}