Spaces:

userIdc2024
/

Video_AdGenesis_App

Running

File size: 5,563 Bytes

82a1419

"""
Pre-submission prompt validation to catch common issues before generation
"""

import re
from typing import Dict, List, Tuple

# Common public figure names (not exhaustive, just examples)
COMMON_PUBLIC_FIGURES = [
    # Politicians
    "donald trump", "joe biden", "barack obama", "kamala harris",
    "vladimir putin", "xi jinping", "narendra modi", "boris johnson",
    # Tech CEOs
    "elon musk", "jeff bezos", "mark zuckerberg", "bill gates", "steve jobs",
    "tim cook", "sundar pichai", "satya nadella",
    # Celebrities
    "taylor swift", "beyonce", "kim kardashian", "kanye west",
    "dwayne johnson", "tom cruise", "leonardo dicaprio",
    # Athletes
    "lebron james", "cristiano ronaldo", "lionel messi", "serena williams",
    "tiger woods", "michael jordan",
]

# Copyrighted characters and brands
COPYRIGHTED_TERMS = [
    # Characters
    "spider-man", "spiderman", "batman", "superman", "iron man",
    "mickey mouse", "harry potter", "darth vader",
    # Brands
    "nike", "adidas", "apple", "google", "microsoft", "coca-cola",
    "pepsi", "mcdonalds", "starbucks", "amazon",
]


def validate_prompt_content(text: str) -> Tuple[bool, List[str]]:
    """
    Validate prompt content for potential content policy violations.
    
    Returns:
        Tuple of (is_valid, list_of_warnings)
    """
    warnings = []
    text_lower = text.lower()
    
    # Check for public figures
    found_figures = [name for name in COMMON_PUBLIC_FIGURES if name in text_lower]
    if found_figures:
        warnings.append(
            f"⚠️ Detected public figure(s): {', '.join(found_figures)}. "
            "Consider using generic descriptions instead (e.g., 'a business executive' instead of specific names)."
        )
    
    # Check for copyrighted terms
    found_copyrighted = [term for term in COPYRIGHTED_TERMS if term in text_lower]
    if found_copyrighted:
        warnings.append(
            f"⚠️ Detected copyrighted term(s): {', '.join(found_copyrighted)}. "
            "Consider using generic alternatives to avoid content policy issues."
        )
    
    # Check for potentially sensitive content
    sensitive_patterns = [
        (r'\b(kill|murder|death|blood|violence)\b', "violent content"),
        (r'\b(naked|nude|sex|sexual)\b', "explicit content"),
        (r'\b(hate|racist|discriminat)\w*\b', "discriminatory language"),
    ]
    
    for pattern, content_type in sensitive_patterns:
        if re.search(pattern, text_lower):
            warnings.append(f"⚠️ Potentially sensitive {content_type} detected. Review for content policy compliance.")
    
    is_valid = len(warnings) == 0
    return is_valid, warnings


def sanitize_prompt_content(text: str) -> str:
    """
    Automatically sanitize prompt content by replacing problematic terms.
    This is a basic implementation - the AI-powered fix is more sophisticated.
    
    Returns:
        Sanitized text
    """
    sanitized = text
    
    # Replace common public figures with generic terms
    replacements = {
        # Politicians
        "donald trump": "a business executive",
        "joe biden": "a senior politician",
        "elon musk": "a tech entrepreneur",
        "jeff bezos": "a business mogul",
        "mark zuckerberg": "a tech founder",
        "bill gates": "a technology pioneer",
        # Celebrities
        "taylor swift": "a popular singer",
        "beyonce": "a renowned performer",
        "kim kardashian": "a media personality",
        # Athletes
        "lebron james": "a professional basketball player",
        "cristiano ronaldo": "a soccer star",
        "lionel messi": "a soccer champion",
        # Characters
        "spider-man": "a superhero",
        "spiderman": "a superhero",
        "batman": "a crime fighter",
        "superman": "a hero with superpowers",
        "harry potter": "a young wizard",
        # Brands
        "nike": "athletic",
        "adidas": "sportswear",
        "apple": "tech",
        "google": "a search engine",
        "starbucks": "a coffee shop",
        "mcdonalds": "a restaurant",
    }
    
    for term, replacement in replacements.items():
        # Case-insensitive replacement
        pattern = re.compile(re.escape(term), re.IGNORECASE)
        sanitized = pattern.sub(replacement, sanitized)
    
    return sanitized


def get_content_guidance() -> Dict[str, List[str]]:
    """
    Get guidance on what to avoid in prompts.
    
    Returns:
        Dictionary of content categories and examples
    """
    return {
        "avoid_public_figures": [
            "Real politicians, celebrities, athletes, or public figures",
            "Use generic roles instead: 'a business executive', 'a singer', 'an athlete'",
        ],
        "avoid_copyrighted": [
            "Trademarked characters (Spider-Man, Mickey Mouse, etc.)",
            "Brand names (Nike, Apple, Starbucks, etc.)",
            "Use generic alternatives: 'a superhero', 'athletic shoes', 'a coffee shop'",
        ],
        "avoid_sensitive": [
            "Violence, gore, or disturbing imagery",
            "Explicit or sexual content",
            "Hate speech or discriminatory language",
            "Dangerous or illegal activities",
        ],
        "best_practices": [
            "Use generic, descriptive language",
            "Focus on actions, emotions, and settings rather than specific identities",
            "Keep content family-friendly and brand-safe",
            "Test with shorter scripts first to validate content compliance",
        ],
    }