vindhaus
commited on
Commit
·
c4b197e
1
Parent(s):
83a5179
Initial release of Worldly - v1
Browse files- worldly-v1.py +115 -0
worldly-v1.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
# Updated ethnicity map
|
| 5 |
+
ethnicity_map = [
|
| 6 |
+
"European American", "English", "Irish", "Italian", "German", "Polish",
|
| 7 |
+
"French", "Scottish", "Scandinavian", "Eastern European", "Jewish",
|
| 8 |
+
"Middle Eastern", "African-American", "Afro-Caribbean", "African",
|
| 9 |
+
"Afro-Latinx", "Mexican", "Puerto Rican", "Cuban", "Dominican",
|
| 10 |
+
"Salvadoran", "Guatemalan", "Colombian", "Venezuelan", "Nicaraguan",
|
| 11 |
+
"Honduran", "Argentinian", "Chilean", "Peruvian", "Ecuadorian",
|
| 12 |
+
"Panamanian", "Bolivian", "Costa Rican", "Chinese", "Japanese",
|
| 13 |
+
"Korean", "Vietnamese", "Filipino", "Thai", "Cambodian", "Laotian",
|
| 14 |
+
"Burmese", "Malaysian", "Indonesian", "Indian", "Pakistani",
|
| 15 |
+
"Bangladeshi", "Sri Lankan", "Nepalese", "Bhutanese", "Maldivian",
|
| 16 |
+
"Native Hawaiian", "Samoan", "Tongan", "Chamorro", "Fijian",
|
| 17 |
+
"Tahitian", "Palauan", "Marshallese", "Cherokee", "Navajo", "Sioux",
|
| 18 |
+
"Chippewa", "Apache", "Blackfeet", "Choctaw", "Inuit", "Yupik",
|
| 19 |
+
"Aleut", "Egyptian", "Moroccan", "Algerian", "Tunisian", "Iraqi",
|
| 20 |
+
"Syrian", "Palestinian", "Jordanian", "Kurdish", "Turkish",
|
| 21 |
+
"Mixed-race", "Afro-Latino", "Eurasian", "Mestizo", "Armenian",
|
| 22 |
+
"Assyrian", "Chaldean", "Somali Bantu", "Hmong", "Tibetan", "Aboriginal"
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
# Expanded list of person-related terms (including plurals)
|
| 26 |
+
person_terms = [
|
| 27 |
+
"person", "people", "man", "woman", "child", "boy", "girl",
|
| 28 |
+
"men", "women", "children", "boys", "girls"
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# term_check with redundant terms removed (terms that already exist in ethnicity_map)
|
| 32 |
+
term_check = ethnicity_map + [
|
| 33 |
+
"White", "Black", "European", "Asian", "Latino", "Hispanic",
|
| 34 |
+
"Native American", "South American", "East Asian",
|
| 35 |
+
"South Asian", "Southeast Asian", "Pacific Islander", "Middle Eastern"
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
def contains_specified_term(prompt):
|
| 39 |
+
"""
|
| 40 |
+
Check if the prompt contains any specified ethnicity or other relevant terms.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
prompt (str): The input prompt to check.
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
bool: True if a term is found, False otherwise.
|
| 47 |
+
"""
|
| 48 |
+
# Check both term_check and ethnicity_map for any matches in the prompt
|
| 49 |
+
for term in term_check + ethnicity_map:
|
| 50 |
+
if term.lower() in prompt.lower():
|
| 51 |
+
logging.debug(f"Specified term '{term}' found in prompt: {prompt}")
|
| 52 |
+
return True
|
| 53 |
+
return False
|
| 54 |
+
|
| 55 |
+
def select_random_ethnicity():
|
| 56 |
+
"""
|
| 57 |
+
Randomly select an ethnicity from the ethnicity map.
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
str: A selected ethnicity (e.g., "European American", "African").
|
| 61 |
+
"""
|
| 62 |
+
ethnicity = random.choice(ethnicity_map)
|
| 63 |
+
logging.debug(f"Selected ethnicity: {ethnicity}")
|
| 64 |
+
return ethnicity
|
| 65 |
+
|
| 66 |
+
def modify_prompt_for_group(prompt):
|
| 67 |
+
"""
|
| 68 |
+
Modify prompts containing groups to have a mix of different ethnicities.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
prompt (str): The input prompt that contains group terms.
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
str: The modified prompt with a mix of ethnicities.
|
| 75 |
+
"""
|
| 76 |
+
if contains_specified_term(prompt):
|
| 77 |
+
return prompt
|
| 78 |
+
|
| 79 |
+
# Generate a diverse group
|
| 80 |
+
mixed_ethnicities = []
|
| 81 |
+
for _ in range(3): # Adjust number for more diversity in the group
|
| 82 |
+
ethnicity = select_random_ethnicity()
|
| 83 |
+
mixed_ethnicities.append(ethnicity + " person")
|
| 84 |
+
|
| 85 |
+
logging.debug(f"Generated mixed ethnicities: {mixed_ethnicities}")
|
| 86 |
+
|
| 87 |
+
return prompt.replace("group of people", ', '.join(mixed_ethnicities))
|
| 88 |
+
|
| 89 |
+
def modify_prompt(prompt):
|
| 90 |
+
"""
|
| 91 |
+
Modify the prompt by replacing generic demographic terms or groups with specific
|
| 92 |
+
ethnicities based on equal probability for all.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
prompt (str): The input prompt to modify.
|
| 96 |
+
|
| 97 |
+
Returns:
|
| 98 |
+
str: The modified prompt with ethnicities inserted.
|
| 99 |
+
"""
|
| 100 |
+
logging.debug(f"Modifying prompt: {prompt}")
|
| 101 |
+
|
| 102 |
+
if contains_specified_term(prompt):
|
| 103 |
+
logging.debug(f"Specified terms detected. No modification applied: {prompt}")
|
| 104 |
+
return prompt
|
| 105 |
+
|
| 106 |
+
words = prompt.split()
|
| 107 |
+
for i, word in enumerate(words):
|
| 108 |
+
if word in person_terms: # Replace any term related to a person
|
| 109 |
+
selected_ethnicity = select_random_ethnicity()
|
| 110 |
+
words[i] = f"{selected_ethnicity} {word}"
|
| 111 |
+
logging.debug(f"Replaced '{word}' with '{selected_ethnicity} {word}'")
|
| 112 |
+
|
| 113 |
+
modified_prompt = ' '.join(words)
|
| 114 |
+
logging.debug(f"Modified prompt: {modified_prompt}")
|
| 115 |
+
return modified_prompt
|