Spaces:

userIdc2024
/

Video_AdGenesis_App

Running

App Files Files Community

Video_AdGenesis_App / api /prompt_validator.py

sushilideaclan01

Enhance prompt validation and safety features

82a1419 9 days ago

raw

history blame contribute delete

5.56 kB

	"""
	Pre-submission prompt validation to catch common issues before generation
	"""

	import re
	from typing import Dict, List, Tuple

	# Common public figure names (not exhaustive, just examples)
	COMMON_PUBLIC_FIGURES = [
	# Politicians
	"donald trump", "joe biden", "barack obama", "kamala harris",
	"vladimir putin", "xi jinping", "narendra modi", "boris johnson",
	# Tech CEOs
	"elon musk", "jeff bezos", "mark zuckerberg", "bill gates", "steve jobs",
	"tim cook", "sundar pichai", "satya nadella",
	# Celebrities
	"taylor swift", "beyonce", "kim kardashian", "kanye west",
	"dwayne johnson", "tom cruise", "leonardo dicaprio",
	# Athletes
	"lebron james", "cristiano ronaldo", "lionel messi", "serena williams",
	"tiger woods", "michael jordan",
	]

	# Copyrighted characters and brands
	COPYRIGHTED_TERMS = [
	# Characters
	"spider-man", "spiderman", "batman", "superman", "iron man",
	"mickey mouse", "harry potter", "darth vader",
	# Brands
	"nike", "adidas", "apple", "google", "microsoft", "coca-cola",
	"pepsi", "mcdonalds", "starbucks", "amazon",
	]


	def validate_prompt_content(text: str) -> Tuple[bool, List[str]]:
	"""
	Validate prompt content for potential content policy violations.

	Returns:
	Tuple of (is_valid, list_of_warnings)
	"""
	warnings = []
	text_lower = text.lower()

	# Check for public figures
	found_figures = [name for name in COMMON_PUBLIC_FIGURES if name in text_lower]
	if found_figures:
	warnings.append(
	f"⚠️ Detected public figure(s): {', '.join(found_figures)}. "
	"Consider using generic descriptions instead (e.g., 'a business executive' instead of specific names)."
	)

	# Check for copyrighted terms
	found_copyrighted = [term for term in COPYRIGHTED_TERMS if term in text_lower]
	if found_copyrighted:
	warnings.append(
	f"⚠️ Detected copyrighted term(s): {', '.join(found_copyrighted)}. "
	"Consider using generic alternatives to avoid content policy issues."
	)

	# Check for potentially sensitive content
	sensitive_patterns = [
	(r'\b(kill\|murder\|death\|blood\|violence)\b', "violent content"),
	(r'\b(naked\|nude\|sex\|sexual)\b', "explicit content"),
	(r'\b(hate\|racist\|discriminat)\w*\b', "discriminatory language"),
	]

	for pattern, content_type in sensitive_patterns:
	if re.search(pattern, text_lower):
	warnings.append(f"⚠️ Potentially sensitive {content_type} detected. Review for content policy compliance.")

	is_valid = len(warnings) == 0
	return is_valid, warnings


	def sanitize_prompt_content(text: str) -> str:
	"""
	Automatically sanitize prompt content by replacing problematic terms.
	This is a basic implementation - the AI-powered fix is more sophisticated.

	Returns:
	Sanitized text
	"""
	sanitized = text

	# Replace common public figures with generic terms
	replacements = {
	# Politicians
	"donald trump": "a business executive",
	"joe biden": "a senior politician",
	"elon musk": "a tech entrepreneur",
	"jeff bezos": "a business mogul",
	"mark zuckerberg": "a tech founder",
	"bill gates": "a technology pioneer",
	# Celebrities
	"taylor swift": "a popular singer",
	"beyonce": "a renowned performer",
	"kim kardashian": "a media personality",
	# Athletes
	"lebron james": "a professional basketball player",
	"cristiano ronaldo": "a soccer star",
	"lionel messi": "a soccer champion",
	# Characters
	"spider-man": "a superhero",
	"spiderman": "a superhero",
	"batman": "a crime fighter",
	"superman": "a hero with superpowers",
	"harry potter": "a young wizard",
	# Brands
	"nike": "athletic",
	"adidas": "sportswear",
	"apple": "tech",
	"google": "a search engine",
	"starbucks": "a coffee shop",
	"mcdonalds": "a restaurant",
	}

	for term, replacement in replacements.items():
	# Case-insensitive replacement
	pattern = re.compile(re.escape(term), re.IGNORECASE)
	sanitized = pattern.sub(replacement, sanitized)

	return sanitized


	def get_content_guidance() -> Dict[str, List[str]]:
	"""
	Get guidance on what to avoid in prompts.

	Returns:
	Dictionary of content categories and examples
	"""
	return {
	"avoid_public_figures": [
	"Real politicians, celebrities, athletes, or public figures",
	"Use generic roles instead: 'a business executive', 'a singer', 'an athlete'",
	],
	"avoid_copyrighted": [
	"Trademarked characters (Spider-Man, Mickey Mouse, etc.)",
	"Brand names (Nike, Apple, Starbucks, etc.)",
	"Use generic alternatives: 'a superhero', 'athletic shoes', 'a coffee shop'",
	],
	"avoid_sensitive": [
	"Violence, gore, or disturbing imagery",
	"Explicit or sexual content",
	"Hate speech or discriminatory language",
	"Dangerous or illegal activities",
	],
	"best_practices": [
	"Use generic, descriptive language",
	"Focus on actions, emotions, and settings rather than specific identities",
	"Keep content family-friendly and brand-safe",
	"Test with shorter scripts first to validate content compliance",
	],
	}