soci2 / test_simulation.py
RayMelius's picture
Redesign city: individual houses, real streets, async Ollama client
492d303
"""Offline integration test — runs the full simulation loop with a mock LLM.
This test validates the entire pipeline without requiring an API key.
Run: python test_simulation.py
"""
from __future__ import annotations
import asyncio
import json
import random
import sys
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock
sys.path.insert(0, str(Path(__file__).parent / "src"))
from soci.world.city import City
from soci.world.clock import SimClock
from soci.world.events import EventSystem
from soci.agents.persona import load_personas, Persona
from soci.agents.agent import Agent, AgentAction, AgentState
from soci.agents.memory import MemoryStream, MemoryType
from soci.agents.needs import NeedsState
from soci.agents.relationships import RelationshipGraph, Relationship
from soci.actions.registry import resolve_action, ActionType
from soci.actions.movement import execute_move, get_best_location_for_need
from soci.actions.activities import execute_activity
from soci.actions.social import should_initiate_conversation, pick_conversation_partner
from soci.engine.entropy import EntropyManager
from soci.engine.scheduler import prioritize_agents, should_skip_llm
from soci.engine.simulation import Simulation
from soci.persistence.database import Database
class MockLLM:
"""Mock LLM that returns plausible JSON responses without calling the API."""
def __init__(self):
self.usage = MagicMock()
self.usage.total_calls = 0
self.usage.total_input_tokens = 0
self.usage.total_output_tokens = 0
self.usage.estimated_cost_usd = 0.0
self.usage.calls_by_model = {}
self.usage.summary.return_value = "Mock LLM: 0 calls, $0.00"
async def complete(self, system, user_message, model=None, temperature=0.7, max_tokens=1024):
self.usage.total_calls += 1
return "I'm thinking about my day."
async def complete_json(self, system, user_message, model=None, temperature=0.7, max_tokens=1024):
self.usage.total_calls += 1
# Detect what kind of prompt this is and return appropriate mock data
msg = user_message.lower()
if "plan your day" in msg:
return {
"plan": [
"Wake up and have breakfast at home",
"Go to work at the office",
"Have lunch at the cafe",
"Continue working",
"Go to the park for a walk",
"Have dinner",
"Relax at home",
],
"reasoning": "A balanced day with work and leisure."
}
if "what do you do next" in msg:
actions = ["work", "eat", "relax", "wander", "move", "exercise"]
action = random.choice(actions)
targets = {
"move": random.choice(["cafe", "park", "house_elena", "office", "grocery"]),
"work": "",
"eat": "",
"relax": "",
"wander": "",
"exercise": "",
}
details = {
"move": "heading somewhere new",
"work": "focusing on a project",
"eat": "having a quick meal",
"relax": "taking it easy",
"wander": "strolling around",
"exercise": "doing some stretches",
}
return {
"action": action,
"target": targets.get(action, ""),
"detail": details.get(action, "doing something"),
"duration": random.randint(1, 3),
"reasoning": "Felt like it."
}
if "how important" in msg:
return {
"importance": random.randint(3, 8),
"reaction": "Interesting, I'll remember that."
}
if "reflect" in msg:
return {
"reflections": [
"I notice I've been spending a lot of time at work lately.",
"The neighborhood feels alive today."
],
"mood_shift": random.uniform(-0.1, 0.2),
"reasoning": "Just thinking about things."
}
if "start a conversation" in msg or "you decide to start" in msg:
return {
"message": "Hey, how's it going?",
"inner_thought": "I should catch up with them.",
"topic": "daily life"
}
if "says:" in msg:
return {
"message": "Yeah, things are good. How about you?",
"inner_thought": "Nice to chat.",
"sentiment_delta": 0.05,
"trust_delta": 0.02
}
return {"status": "ok"}
async def run_tests():
print("=" * 60)
print("SOCI — OFFLINE INTEGRATION TEST")
print("=" * 60)
errors = 0
# --- Test 1: Clock ---
print("\n[1/12] Clock system...")
clock = SimClock(tick_minutes=15, hour=6, minute=0)
for _ in range(96): # Full day
clock.tick()
assert clock.day == 2, f"Expected day 2, got {clock.day}"
assert clock.hour == 6, f"Expected hour 6, got {clock.hour}"
clock_dict = clock.to_dict()
restored_clock = SimClock.from_dict(clock_dict)
assert restored_clock.day == clock.day
print(" PASS: Clock ticks correctly for a full day, serialization works")
# --- Test 2: City ---
print("\n[2/12] City system...")
city = City.from_yaml("config/city.yaml")
assert len(city.locations) == 20
# Test connectivity
cafe = city.get_location("cafe")
assert cafe is not None
assert "street_north" in cafe.connected_to
connected = city.get_connected("cafe")
assert len(connected) > 0
# Test agent placement and movement
city.place_agent("test_agent", "cafe")
assert "test_agent" in city.get_agents_at("cafe")
city.move_agent("test_agent", "cafe", "office")
assert "test_agent" not in city.get_agents_at("cafe")
assert "test_agent" in city.get_agents_at("office")
assert city.find_agent("test_agent") == "office"
city.locations["office"].remove_occupant("test_agent")
print(" PASS: City loads, connections work, movement works")
# --- Test 3: Personas ---
print("\n[3/12] Persona system...")
personas = load_personas("config/personas.yaml")
assert len(personas) == 20
# Check diversity
ages = [p.age for p in personas]
assert min(ages) <= 20, "Should have young people"
assert max(ages) >= 60, "Should have older people"
occupations = set(p.occupation for p in personas)
assert len(occupations) >= 15, "Should have diverse occupations"
# Test system prompt
prompt = personas[0].system_prompt()
assert personas[0].name in prompt
assert "personality" in prompt.lower() or "PERSONALITY" in prompt
print(f" PASS: 20 personas loaded, ages {min(ages)}-{max(ages)}, {len(occupations)} occupations")
# --- Test 4: Needs ---
print("\n[4/12] Needs system...")
needs = NeedsState()
initial_hunger = needs.hunger
for _ in range(20):
needs.tick()
assert needs.hunger < initial_hunger, "Hunger should decay"
assert needs.energy < 1.0, "Energy should decay"
needs.satisfy("hunger", 0.5)
assert needs.hunger > 0.0, "Hunger should be partially satisfied"
urgent = needs.urgent_needs
desc = needs.describe()
assert isinstance(desc, str)
print(f" PASS: Needs decay ({desc}), satisfaction works")
# --- Test 5: Memory ---
print("\n[5/12] Memory system...")
mem = MemoryStream()
for i in range(30):
mem.add(i, 1, f"{6+i//4:02d}:{(i%4)*15:02d}",
MemoryType.OBSERVATION, f"Event {i}", importance=random.randint(1, 10))
assert len(mem.memories) == 30
retrieved = mem.retrieve(30, top_k=5)
assert len(retrieved) == 5
recent = mem.get_recent(3)
assert len(recent) == 3
assert recent[-1].content == "Event 29"
# Test reflection trigger
mem._importance_accumulator = 100
assert mem.should_reflect()
mem.reset_reflection_accumulator()
assert not mem.should_reflect()
# Test serialization
mem_dict = mem.to_dict()
restored_mem = MemoryStream.from_dict(mem_dict)
assert len(restored_mem.memories) == 30
print(" PASS: Memory storage, retrieval, reflection trigger, serialization")
# --- Test 6: Relationships ---
print("\n[6/12] Relationship system...")
graph = RelationshipGraph()
rel = graph.get_or_create("elena", "Elena Vasquez")
assert rel.familiarity == 0.0
rel.update_after_interaction(tick=10, sentiment_delta=0.1, trust_delta=0.05, note="Had coffee together")
assert rel.familiarity > 0.0
assert rel.sentiment > 0.5
assert len(rel.notes) == 1
closest = graph.get_closest(5)
assert len(closest) == 1
desc = rel.describe()
assert "Elena" in desc
# Serialization
g_dict = graph.to_dict()
restored_g = RelationshipGraph.from_dict(g_dict)
assert restored_g.get("elena") is not None
print(" PASS: Relationships form, track sentiment/trust, serialize")
# --- Test 7: Agent ---
print("\n[7/12] Agent system...")
persona = personas[0] # Elena
agent = Agent(persona)
assert agent.name == "Elena Vasquez"
assert agent.location == "house_elena"
assert agent.state == AgentState.IDLE
# Test action
action = AgentAction(type="work", detail="coding", duration_ticks=3, needs_satisfied={"purpose": 0.3})
agent.start_action(action)
assert agent.is_busy
assert agent.state == AgentState.WORKING
for _ in range(3):
agent.tick_action()
assert not agent.is_busy
assert agent.state == AgentState.IDLE
# Test mood + needs interaction
for _ in range(10):
agent.tick_needs()
# Test observation
agent.add_observation(0, 1, "06:00", "Saw a cat in the park", importance=4)
assert len(agent.memory.memories) == 1
# Serialization
a_dict = agent.to_dict()
restored_a = Agent.from_dict(a_dict)
assert restored_a.name == agent.name
assert len(restored_a.memory.memories) == 1
print(" PASS: Agent actions, needs, mood, memory, serialization")
# --- Test 8: Action resolution ---
print("\n[8/12] Action resolution...")
city2 = City.from_yaml("config/city.yaml")
agent2 = Agent(personas[0])
city2.place_agent(agent2.id, agent2.location)
raw = {"action": "move", "target": "cafe", "detail": "heading to cafe", "duration": 1}
resolved = resolve_action(raw, agent2, city2)
assert resolved.type == "move"
assert resolved.target == "cafe"
# Invalid action falls back to wander
raw_bad = {"action": "fly", "target": "moon"}
resolved_bad = resolve_action(raw_bad, agent2, city2)
assert resolved_bad.type == "wander"
print(" PASS: Valid actions resolve, invalid actions fall back to wander")
# --- Test 9: Movement ---
print("\n[9/12] Movement system...")
clock2 = SimClock()
agent3 = Agent(personas[0])
city3 = City.from_yaml("config/city.yaml")
city3.place_agent(agent3.id, "house_elena")
move_action = AgentAction(type="move", target="cafe", detail="walking to cafe")
desc = execute_move(agent3, move_action, city3, clock2)
assert "cafe" in desc.lower() or "Daily Grind" in desc
assert agent3.location == "cafe"
# Test location suggestion
suggested = get_best_location_for_need(agent3, "hunger", city3)
assert suggested is not None
print(f" PASS: Movement works, need-based suggestion: {suggested}")
# --- Test 10: Events & Entropy ---
print("\n[10/12] Events and entropy...")
events = EventSystem(event_chance_per_tick=1.0) # Force events
new = events.tick(["cafe", "park", "office"])
assert len(events.active_events) > 0 or len(new) > 0
world_desc = events.get_world_description()
assert "Weather" in world_desc
entropy = EntropyManager()
agents_list = [Agent(p) for p in personas[:5]]
# Simulate repetitive behavior
entropy._action_history["elena"] = ["work"] * 15
assert entropy._is_stuck_in_loop("elena")
conflicts = entropy.get_conflict_catalysts(agents_list)
print(f" PASS: Events fire, entropy detects loops, {len(conflicts)} potential conflicts found")
# --- Test 11: Full simulation loop (mock LLM) ---
print("\n[11/12] Full simulation loop (mock LLM)...")
mock_llm = MockLLM()
city4 = City.from_yaml("config/city.yaml")
clock4 = SimClock(tick_minutes=15, hour=6, minute=0)
sim = Simulation(city=city4, clock=clock4, llm=mock_llm)
sim.load_agents_from_yaml("config/personas.yaml")
# Limit to 5 agents for speed
agent_ids = list(sim.agents.keys())[:5]
sim.agents = {aid: sim.agents[aid] for aid in agent_ids}
events_collected = []
sim.on_event = lambda msg: events_collected.append(msg)
# Run 10 ticks
for _ in range(10):
await sim.tick()
assert sim.clock.total_ticks == 10
assert len(events_collected) > 0
print(f" PASS: 10 ticks completed, {len(events_collected)} events, "
f"{mock_llm.usage.total_calls} LLM calls")
# Check agents moved, have memories, etc.
for aid, agent in sim.agents.items():
assert len(agent.memory.memories) > 0, f"{agent.name} should have memories"
# --- Test 12: State serialization roundtrip ---
print("\n[12/12] Full state serialization...")
state = sim.to_dict()
state_json = json.dumps(state)
assert len(state_json) > 1000, "State should be substantial"
restored_state = json.loads(state_json)
sim2 = Simulation.from_dict(restored_state, mock_llm)
assert len(sim2.agents) == len(sim.agents)
assert sim2.clock.total_ticks == sim.clock.total_ticks
for aid in sim.agents:
assert aid in sim2.agents
assert sim2.agents[aid].name == sim.agents[aid].name
print(f" PASS: Full state serialized ({len(state_json):,} bytes) and restored")
# --- Summary ---
print("\n" + "=" * 60)
if errors == 0:
print("ALL 12 TESTS PASSED")
else:
print(f"{errors} TEST(S) FAILED")
print("=" * 60)
# Print some interesting stats
print(f"\nSimulation state:")
print(f" Clock: {sim.clock.datetime_str}")
print(f" Weather: {sim.events.weather.value}")
print(f" Mock LLM calls: {mock_llm.usage.total_calls}")
print(f"\nAgent status after 10 ticks:")
for aid, agent in sim.agents.items():
loc = sim.city.get_location(agent.location)
loc_name = loc.name if loc else agent.location
print(f" {agent.name}: {agent.state.value} at {loc_name} "
f"(mood={agent.mood:.2f}, memories={len(agent.memory.memories)})")
return errors == 0
if __name__ == "__main__":
success = asyncio.run(run_tests())
sys.exit(0 if success else 1)