soci2 / src /soci /agents /generator.py
RayMelius's picture
Fix agent clustering: valid house slots, even street routing, wider crowd spread
ff7a12f
"""Procedural persona generator — creates unique personas for scaling beyond YAML."""
from __future__ import annotations
import random
from typing import TYPE_CHECKING
from soci.agents.persona import Persona
if TYPE_CHECKING:
from soci.world.city import City
# --- Name pools (diverse) ---
FIRST_NAMES_MALE = [
"James", "Marcus", "Omar", "Kai", "Devon", "Theo", "Frank", "George", "Sam",
"Liam", "Noah", "Ethan", "Lucas", "Mason", "Logan", "Aiden", "Jackson", "Caleb",
"Owen", "Carter", "Jayden", "Dylan", "Gabriel", "Anthony", "Isaac", "Adrian",
"Mateo", "Ryan", "Leo", "Sebastian", "Jaxon", "Dominic", "Nathan", "Ezra",
"Ravi", "Hiroshi", "Dmitri", "Kwame", "Alejandro", "Tariq", "Jian", "Nikolai",
"Emeka", "Yousef", "Andrei", "Kofi", "Rafael", "Jin", "Arjun", "Tomás",
"Bryce", "Malcolm", "Rohan", "Declan", "Felix", "Miles", "Hugo", "Jasper",
"Elliot", "Wesley", "Damian", "Silas", "Tristan", "Vincent", "Abel", "Cyrus",
"Kenneth", "Curtis", "Derek", "Troy", "Mitchell", "Grant", "Russell", "Brent",
"Daryl", "Reginald", "Cecil", "Wallace", "Clifford", "Howard", "Vernon", "Earl",
"Cedric", "Marvin", "Desmond", "Ruben", "Terrence", "Darius", "Lamar", "Winston",
"Trevor", "Patrick", "Cody", "Brett", "Lance", "Reed", "Clark", "Blake",
]
FIRST_NAMES_FEMALE = [
"Elena", "Lila", "Zoe", "Helen", "Alice", "Diana", "Priya", "Nina", "Rosa",
"Yuki", "Emma", "Olivia", "Ava", "Sophia", "Isabella", "Mia", "Charlotte",
"Amelia", "Harper", "Evelyn", "Abigail", "Ella", "Scarlett", "Grace", "Lily",
"Aria", "Riley", "Nora", "Zoey", "Penelope", "Layla", "Chloe", "Victoria",
"Aisha", "Mei", "Fatima", "Anya", "Sakura", "Ingrid", "Carmen", "Leila",
"Nalini", "Chioma", "Esmeralda", "Suki", "Tatiana", "Amara", "Ximena", "Hana",
"Iris", "Jade", "Stella", "Violet", "Luna", "Ivy", "Hazel", "Aurora",
"Savannah", "Audrey", "Brooklyn", "Bella", "Claire", "Lucy", "Skylar", "Paisley",
"Clara", "Margot", "Fiona", "Wren", "Elise", "Daphne", "Celeste", "Lydia",
"Bea", "Greta", "Tessa", "June", "Pearl", "Opal", "Vera", "Ruth",
"Dorothy", "Mabel", "Agnes", "Edith", "Gladys", "Mildred", "Bernice", "Lucille",
"Tamara", "Simone", "Rochelle", "Denise", "Monica", "Bianca", "Giselle", "Naomi",
]
FIRST_NAMES_NB = [
"Alex", "Jordan", "Taylor", "Morgan", "Casey", "Quinn", "Avery", "Riley",
"Dakota", "Skyler", "Sage", "Rowan", "Finley", "Emery", "River", "Hayden",
]
LAST_NAMES = [
"Chen", "Rodriguez", "Patel", "Kim", "Garcia", "Williams", "Johnson", "Brown",
"Davis", "Wilson", "Moore", "Taylor", "Anderson", "Thomas", "Jackson", "White",
"Harris", "Martin", "Thompson", "Lee", "Walker", "Hall", "Allen", "Young",
"Hernandez", "King", "Wright", "Lopez", "Hill", "Scott", "Green", "Adams",
"Baker", "Gonzalez", "Nelson", "Carter", "Mitchell", "Perez", "Roberts", "Turner",
"Phillips", "Campbell", "Parker", "Evans", "Edwards", "Collins", "Stewart", "Sanchez",
"Morris", "Rogers", "Reed", "Cook", "Morgan", "Bell", "Murphy", "Bailey",
"Okafor", "Nakamura", "Petrov", "Johansson", "Müller", "Dubois", "Rossi", "Silva",
"Tanaka", "Singh", "Ali", "Sato", "Ivanov", "Larsson", "Kowalski", "Novak",
"O'Brien", "Brennan", "Reeves", "Holt", "Vasquez", "Santiago", "Delgado", "Moreno",
"Fischer", "Wagner", "Becker", "Meyer", "Weber", "Hoffman", "Schultz", "Lang",
"Stone", "Fox", "Cross", "Lane", "Rush", "Day", "Snow", "Frost",
"Wolfe", "Marsh", "Banks", "Hope", "Wise", "Chase", "Steele", "Drake",
"Blair", "Hale", "Vega", "Luna", "Rios", "Campos", "Soto", "Reyes",
"Mendez", "Ortiz", "Flores", "Ramos", "Cruz", "Gutierrez", "Vargas", "Medina",
"Choi", "Park", "Yoon", "Han", "Lim", "Kwon", "Cho", "Jang",
"Wang", "Liu", "Zhang", "Li", "Yang", "Huang", "Zhou", "Wu",
"Gupta", "Sharma", "Kumar", "Verma", "Joshi", "Mehta", "Shah", "Reddy",
"Osei", "Mensah", "Asante", "Boateng", "Amoah", "Opoku", "Owusu", "Adjei",
]
OCCUPATIONS = [
# White collar → office, office_tower
("software engineer", "office"), ("accountant", "office"), ("marketing manager", "office"),
("architect", "office"), ("data analyst", "office"), ("project manager", "office"),
("graphic designer", "office"), ("lawyer", "office_tower"), ("consultant", "office_tower"),
("financial advisor", "office_tower"),
# Blue collar → factory
("mechanic", "factory"), ("electrician", "factory"), ("plumber", "factory"),
("construction worker", "factory"),
# Service / hospitality (evening shifts) → commercial
("bartender", "bar"), ("chef", "restaurant"), ("waiter", "restaurant"),
("barista", "cafe"),
# Creative → office
("writer", "office"), ("musician", "office"), ("photographer", "office"),
("artist", "office"),
# Education → school
("teacher", "school"), ("professor", "school"), ("tutor", "school"),
# Health → hospital
("nurse", "hospital"), ("personal trainer", "gym"), ("therapist", "hospital"),
# Student / retired
("college student", "school"), ("retired", None),
]
VALUES_POOL = [
"family", "career", "honesty", "creativity", "adventure", "community",
"independence", "knowledge", "health", "tradition", "justice", "compassion",
"wealth", "spirituality", "loyalty", "ambition", "simplicity", "humor",
"freedom", "respect",
]
QUIRKS_POOL = [
"always carries a book", "hums while walking", "talks to plants",
"obsessed with coffee", "compulsive note-taker", "never remembers names",
"always early", "chronic over-sharer", "apologizes too much",
"uses old-fashioned slang", "collects random things", "doodles during conversations",
"quotes movies constantly", "always has snacks", "fidgets with keys",
"checks phone compulsively", "whistles off-key", "gives unsolicited advice",
"afraid of pigeons", "tells the same stories", "makes up words",
"eats loudly", "gestures wildly when talking", "has strong opinions about weather",
"always wears a hat",
]
# Occupation categories for schedule variation
EVENING_SHIFT_JOBS = {"bartender", "chef", "waiter", "barista"}
STUDENT_OCCUPATIONS = {"college student", "elementary student", "middle school student", "high school student"}
RETIRED_OCCUPATIONS = {"retired"}
PHYSICAL_JOBS = {"mechanic", "electrician", "plumber", "construction worker", "personal trainer"}
def _pick_gender() -> str:
"""Weighted random gender."""
r = random.random()
if r < 0.47:
return "male"
elif r < 0.94:
return "female"
else:
return "nonbinary"
def _pick_name(gender: str, used_names: set[str]) -> str:
"""Pick a unique full name."""
for _ in range(100):
if gender == "male":
first = random.choice(FIRST_NAMES_MALE)
elif gender == "female":
first = random.choice(FIRST_NAMES_FEMALE)
else:
first = random.choice(FIRST_NAMES_NB)
last = random.choice(LAST_NAMES)
full = f"{first} {last}"
if full not in used_names:
used_names.add(full)
return full
# Fallback: add a number
full = f"{first} {last} Jr"
used_names.add(full)
return full
def _pick_occupation(age: int) -> tuple[str, str | None]:
"""Pick occupation based on age. Returns (title, work_location_id)."""
if age <= 11:
return "elementary student", "school"
if age <= 14:
return "middle school student", "school"
if age <= 17:
return "high school student", "school"
if age >= 65 and random.random() < 0.7:
return "retired", None
if 18 <= age <= 22 and random.random() < 0.6:
return "college student", "school"
return random.choice(OCCUPATIONS)
def _generate_traits() -> dict[str, int]:
"""Generate Big Five traits with slight correlations."""
o = random.randint(2, 9)
c = random.randint(2, 9)
e = random.randint(2, 9)
a = random.randint(2, 9)
# High conscientiousness slightly correlates with lower neuroticism
n_base = random.randint(2, 9)
n = max(1, min(10, n_base - (c - 5) // 3))
return {
"openness": o,
"conscientiousness": c,
"extraversion": e,
"agreeableness": a,
"neuroticism": n,
}
def _pick_values(traits: dict[str, int]) -> list[str]:
"""Pick 2-4 values weighted by personality."""
count = random.randint(2, 4)
weights = {}
for v in VALUES_POOL:
w = 1.0
if v == "career" and traits["conscientiousness"] >= 7:
w = 2.0
elif v == "creativity" and traits["openness"] >= 7:
w = 2.0
elif v == "community" and traits["agreeableness"] >= 7:
w = 2.0
elif v == "adventure" and traits["openness"] >= 7:
w = 2.0
elif v == "independence" and traits["extraversion"] <= 4:
w = 1.5
elif v == "health" and traits["conscientiousness"] >= 6:
w = 1.5
weights[v] = w
pool = list(weights.keys())
w_list = [weights[v] for v in pool]
chosen = []
for _ in range(count):
if not pool:
break
selected = random.choices(pool, weights=w_list, k=1)[0]
chosen.append(selected)
idx = pool.index(selected)
pool.pop(idx)
w_list.pop(idx)
return chosen
def _pick_quirks() -> list[str]:
"""Pick 1-3 random quirks."""
return random.sample(QUIRKS_POOL, k=random.randint(1, 3))
def _communication_style(extraversion: int, agreeableness: int) -> str:
"""Derive communication style from traits."""
if extraversion >= 7 and agreeableness >= 7:
return "warm and chatty"
elif extraversion >= 7 and agreeableness <= 4:
return "loud and blunt"
elif extraversion <= 3 and agreeableness >= 7:
return "quiet and polite"
elif extraversion <= 3 and agreeableness <= 4:
return "terse and reserved"
elif extraversion >= 7:
return "talkative and expressive"
elif extraversion <= 3:
return "quiet and thoughtful"
elif agreeableness >= 7:
return "friendly and considerate"
elif agreeableness <= 4:
return "direct and no-nonsense"
return "neutral"
def _generate_background(name: str, age: int, occupation: str, traits: dict[str, int]) -> str:
"""Generate a 2-3 sentence background."""
first = name.split()[0]
# Age-based life stage
if age <= 11:
stage = f"{first} is {age} years old and attends Soci Elementary School"
elif age <= 14:
stage = f"{first} is {age} years old and is in middle school"
elif age <= 17:
stage = f"{first} is {age} years old and is a high schooler at Soci School"
elif age <= 22:
stage = f"{first} is a {age}-year-old finding their way in life"
elif age <= 35:
stage = f"{first} is a {age}-year-old building their career"
elif age <= 55:
stage = f"{first} is a {age}-year-old well-established in the community"
elif age <= 65:
stage = f"{first} is a {age}-year-old approaching the later chapters of life"
else:
stage = f"{first} is a {age}-year-old enjoying their golden years"
# Occupation context
if occupation == "elementary student":
job_part = "They love recess, have strong opinions about their favourite subjects, and make friends easily."
elif occupation in ("middle school student", "high school student"):
job_part = "They're navigating homework, friendships, and figuring out who they are."
elif occupation == "retired":
job_part = "After decades of work, they now fill their days with hobbies and neighborhood life."
elif occupation == "college student":
subjects = random.choice([
"literature", "engineering", "biology", "business", "art history",
"computer science", "psychology", "nursing", "philosophy", "music",
])
job_part = f"They're studying {subjects} and juggling classes with a social life."
else:
job_part = f"They work as a {occupation} and take pride in what they do."
# Personality flavor
flavors = []
if traits["openness"] >= 7:
flavors.append("loves trying new things")
if traits["conscientiousness"] >= 7:
flavors.append("keeps a tight schedule")
if traits["extraversion"] >= 7:
flavors.append("lights up every room they enter")
if traits["agreeableness"] >= 7:
flavors.append("is always ready to lend a hand")
if traits["neuroticism"] >= 7:
flavors.append("tends to overthink things")
if traits["extraversion"] <= 3:
flavors.append("prefers a quiet evening at home")
if traits["conscientiousness"] <= 3:
flavors.append("goes with the flow")
personality_part = ""
if flavors:
picked = random.sample(flavors, k=min(2, len(flavors)))
personality_part = f" {first} {' and '.join(picked)}."
return f"{stage}. {job_part}{personality_part}"
def _llm_temperature(openness: int) -> float:
"""Map openness to LLM temperature."""
return 0.5 + (openness / 10.0) * 0.4 # 0.5 - 0.9
def _assign_locations(
occupation: str,
work_location_id: str | None,
residential_ids: list[str],
city_locations: dict,
res_index: int,
) -> tuple[str, str]:
"""Assign home and work locations. Returns (home_id, work_id)."""
home_id = residential_ids[res_index % len(residential_ids)]
if occupation in RETIRED_OCCUPATIONS or work_location_id is None:
work_id = home_id # Retired folks stay home
elif work_location_id in city_locations:
work_id = work_location_id
else:
# Fallback: find any work-zone location
work_ids = [lid for lid, loc in city_locations.items() if loc.zone == "work"]
work_id = random.choice(work_ids) if work_ids else home_id
return home_id, work_id
def generate_personas(count: int, city: City) -> list[Persona]:
"""Generate `count` unique personas with assigned home/work locations."""
# Assign generated agents to GENERATED houses only (house_gen_XX).
# Named homes are reserved for YAML personas, preventing empty generated houses.
residential_ids = [lid for lid, loc in city.locations.items()
if loc.zone == "residential" and lid.startswith("house_gen_")]
if not residential_ids:
# Fallback: use all residential (e.g., standalone run without YAML personas)
residential_ids = [lid for lid, loc in city.locations.items()
if loc.zone == "residential"]
if not residential_ids:
raise ValueError("City has no residential locations — cannot assign homes.")
used_names: set[str] = set()
personas: list[Persona] = []
for i in range(count):
gender = _pick_gender()
name = _pick_name(gender, used_names)
age = random.randint(8, 17) if random.random() < 0.20 else random.randint(18, 75)
occupation, work_location_id = _pick_occupation(age)
traits = _generate_traits()
values = _pick_values(traits)
quirks = _pick_quirks()
comm_style = _communication_style(traits["extraversion"], traits["agreeableness"])
background = _generate_background(name, age, occupation, traits)
temperature = _llm_temperature(traits["openness"])
home_id, work_id = _assign_locations(
occupation,
work_location_id,
residential_ids,
city.locations,
i,
)
persona = Persona(
id=f"gen_{i+1:03d}",
name=name,
age=age,
occupation=occupation,
gender=gender,
openness=traits["openness"],
conscientiousness=traits["conscientiousness"],
extraversion=traits["extraversion"],
agreeableness=traits["agreeableness"],
neuroticism=traits["neuroticism"],
background=background,
values=values,
quirks=quirks,
communication_style=comm_style,
home_location=home_id,
work_location=work_id,
llm_temperature=round(temperature, 2),
)
personas.append(persona)
return personas