"""Offline integration test — runs the full simulation loop with a mock LLM. This test validates the entire pipeline without requiring an API key. Run: python test_simulation.py """ from __future__ import annotations import asyncio import json import random import sys from pathlib import Path from unittest.mock import AsyncMock, MagicMock sys.path.insert(0, str(Path(__file__).parent / "src")) from soci.world.city import City from soci.world.clock import SimClock from soci.world.events import EventSystem from soci.agents.persona import load_personas, Persona from soci.agents.agent import Agent, AgentAction, AgentState from soci.agents.memory import MemoryStream, MemoryType from soci.agents.needs import NeedsState from soci.agents.relationships import RelationshipGraph, Relationship from soci.actions.registry import resolve_action, ActionType from soci.actions.movement import execute_move, get_best_location_for_need from soci.actions.activities import execute_activity from soci.actions.social import should_initiate_conversation, pick_conversation_partner from soci.engine.entropy import EntropyManager from soci.engine.scheduler import prioritize_agents, should_skip_llm from soci.engine.simulation import Simulation from soci.persistence.database import Database class MockLLM: """Mock LLM that returns plausible JSON responses without calling the API.""" def __init__(self): self.usage = MagicMock() self.usage.total_calls = 0 self.usage.total_input_tokens = 0 self.usage.total_output_tokens = 0 self.usage.estimated_cost_usd = 0.0 self.usage.calls_by_model = {} self.usage.summary.return_value = "Mock LLM: 0 calls, $0.00" async def complete(self, system, user_message, model=None, temperature=0.7, max_tokens=1024): self.usage.total_calls += 1 return "I'm thinking about my day." async def complete_json(self, system, user_message, model=None, temperature=0.7, max_tokens=1024): self.usage.total_calls += 1 # Detect what kind of prompt this is and return appropriate mock data msg = user_message.lower() if "plan your day" in msg: return { "plan": [ "Wake up and have breakfast at home", "Go to work at the office", "Have lunch at the cafe", "Continue working", "Go to the park for a walk", "Have dinner", "Relax at home", ], "reasoning": "A balanced day with work and leisure." } if "what do you do next" in msg: actions = ["work", "eat", "relax", "wander", "move", "exercise"] action = random.choice(actions) targets = { "move": random.choice(["cafe", "park", "house_elena", "office", "grocery"]), "work": "", "eat": "", "relax": "", "wander": "", "exercise": "", } details = { "move": "heading somewhere new", "work": "focusing on a project", "eat": "having a quick meal", "relax": "taking it easy", "wander": "strolling around", "exercise": "doing some stretches", } return { "action": action, "target": targets.get(action, ""), "detail": details.get(action, "doing something"), "duration": random.randint(1, 3), "reasoning": "Felt like it." } if "how important" in msg: return { "importance": random.randint(3, 8), "reaction": "Interesting, I'll remember that." } if "reflect" in msg: return { "reflections": [ "I notice I've been spending a lot of time at work lately.", "The neighborhood feels alive today." ], "mood_shift": random.uniform(-0.1, 0.2), "reasoning": "Just thinking about things." } if "start a conversation" in msg or "you decide to start" in msg: return { "message": "Hey, how's it going?", "inner_thought": "I should catch up with them.", "topic": "daily life" } if "says:" in msg: return { "message": "Yeah, things are good. How about you?", "inner_thought": "Nice to chat.", "sentiment_delta": 0.05, "trust_delta": 0.02 } return {"status": "ok"} async def run_tests(): print("=" * 60) print("SOCI — OFFLINE INTEGRATION TEST") print("=" * 60) errors = 0 # --- Test 1: Clock --- print("\n[1/12] Clock system...") clock = SimClock(tick_minutes=15, hour=6, minute=0) for _ in range(96): # Full day clock.tick() assert clock.day == 2, f"Expected day 2, got {clock.day}" assert clock.hour == 6, f"Expected hour 6, got {clock.hour}" clock_dict = clock.to_dict() restored_clock = SimClock.from_dict(clock_dict) assert restored_clock.day == clock.day print(" PASS: Clock ticks correctly for a full day, serialization works") # --- Test 2: City --- print("\n[2/12] City system...") city = City.from_yaml("config/city.yaml") assert len(city.locations) == 20 # Test connectivity cafe = city.get_location("cafe") assert cafe is not None assert "street_north" in cafe.connected_to connected = city.get_connected("cafe") assert len(connected) > 0 # Test agent placement and movement city.place_agent("test_agent", "cafe") assert "test_agent" in city.get_agents_at("cafe") city.move_agent("test_agent", "cafe", "office") assert "test_agent" not in city.get_agents_at("cafe") assert "test_agent" in city.get_agents_at("office") assert city.find_agent("test_agent") == "office" city.locations["office"].remove_occupant("test_agent") print(" PASS: City loads, connections work, movement works") # --- Test 3: Personas --- print("\n[3/12] Persona system...") personas = load_personas("config/personas.yaml") assert len(personas) == 20 # Check diversity ages = [p.age for p in personas] assert min(ages) <= 20, "Should have young people" assert max(ages) >= 60, "Should have older people" occupations = set(p.occupation for p in personas) assert len(occupations) >= 15, "Should have diverse occupations" # Test system prompt prompt = personas[0].system_prompt() assert personas[0].name in prompt assert "personality" in prompt.lower() or "PERSONALITY" in prompt print(f" PASS: 20 personas loaded, ages {min(ages)}-{max(ages)}, {len(occupations)} occupations") # --- Test 4: Needs --- print("\n[4/12] Needs system...") needs = NeedsState() initial_hunger = needs.hunger for _ in range(20): needs.tick() assert needs.hunger < initial_hunger, "Hunger should decay" assert needs.energy < 1.0, "Energy should decay" needs.satisfy("hunger", 0.5) assert needs.hunger > 0.0, "Hunger should be partially satisfied" urgent = needs.urgent_needs desc = needs.describe() assert isinstance(desc, str) print(f" PASS: Needs decay ({desc}), satisfaction works") # --- Test 5: Memory --- print("\n[5/12] Memory system...") mem = MemoryStream() for i in range(30): mem.add(i, 1, f"{6+i//4:02d}:{(i%4)*15:02d}", MemoryType.OBSERVATION, f"Event {i}", importance=random.randint(1, 10)) assert len(mem.memories) == 30 retrieved = mem.retrieve(30, top_k=5) assert len(retrieved) == 5 recent = mem.get_recent(3) assert len(recent) == 3 assert recent[-1].content == "Event 29" # Test reflection trigger mem._importance_accumulator = 100 assert mem.should_reflect() mem.reset_reflection_accumulator() assert not mem.should_reflect() # Test serialization mem_dict = mem.to_dict() restored_mem = MemoryStream.from_dict(mem_dict) assert len(restored_mem.memories) == 30 print(" PASS: Memory storage, retrieval, reflection trigger, serialization") # --- Test 6: Relationships --- print("\n[6/12] Relationship system...") graph = RelationshipGraph() rel = graph.get_or_create("elena", "Elena Vasquez") assert rel.familiarity == 0.0 rel.update_after_interaction(tick=10, sentiment_delta=0.1, trust_delta=0.05, note="Had coffee together") assert rel.familiarity > 0.0 assert rel.sentiment > 0.5 assert len(rel.notes) == 1 closest = graph.get_closest(5) assert len(closest) == 1 desc = rel.describe() assert "Elena" in desc # Serialization g_dict = graph.to_dict() restored_g = RelationshipGraph.from_dict(g_dict) assert restored_g.get("elena") is not None print(" PASS: Relationships form, track sentiment/trust, serialize") # --- Test 7: Agent --- print("\n[7/12] Agent system...") persona = personas[0] # Elena agent = Agent(persona) assert agent.name == "Elena Vasquez" assert agent.location == "house_elena" assert agent.state == AgentState.IDLE # Test action action = AgentAction(type="work", detail="coding", duration_ticks=3, needs_satisfied={"purpose": 0.3}) agent.start_action(action) assert agent.is_busy assert agent.state == AgentState.WORKING for _ in range(3): agent.tick_action() assert not agent.is_busy assert agent.state == AgentState.IDLE # Test mood + needs interaction for _ in range(10): agent.tick_needs() # Test observation agent.add_observation(0, 1, "06:00", "Saw a cat in the park", importance=4) assert len(agent.memory.memories) == 1 # Serialization a_dict = agent.to_dict() restored_a = Agent.from_dict(a_dict) assert restored_a.name == agent.name assert len(restored_a.memory.memories) == 1 print(" PASS: Agent actions, needs, mood, memory, serialization") # --- Test 8: Action resolution --- print("\n[8/12] Action resolution...") city2 = City.from_yaml("config/city.yaml") agent2 = Agent(personas[0]) city2.place_agent(agent2.id, agent2.location) raw = {"action": "move", "target": "cafe", "detail": "heading to cafe", "duration": 1} resolved = resolve_action(raw, agent2, city2) assert resolved.type == "move" assert resolved.target == "cafe" # Invalid action falls back to wander raw_bad = {"action": "fly", "target": "moon"} resolved_bad = resolve_action(raw_bad, agent2, city2) assert resolved_bad.type == "wander" print(" PASS: Valid actions resolve, invalid actions fall back to wander") # --- Test 9: Movement --- print("\n[9/12] Movement system...") clock2 = SimClock() agent3 = Agent(personas[0]) city3 = City.from_yaml("config/city.yaml") city3.place_agent(agent3.id, "house_elena") move_action = AgentAction(type="move", target="cafe", detail="walking to cafe") desc = execute_move(agent3, move_action, city3, clock2) assert "cafe" in desc.lower() or "Daily Grind" in desc assert agent3.location == "cafe" # Test location suggestion suggested = get_best_location_for_need(agent3, "hunger", city3) assert suggested is not None print(f" PASS: Movement works, need-based suggestion: {suggested}") # --- Test 10: Events & Entropy --- print("\n[10/12] Events and entropy...") events = EventSystem(event_chance_per_tick=1.0) # Force events new = events.tick(["cafe", "park", "office"]) assert len(events.active_events) > 0 or len(new) > 0 world_desc = events.get_world_description() assert "Weather" in world_desc entropy = EntropyManager() agents_list = [Agent(p) for p in personas[:5]] # Simulate repetitive behavior entropy._action_history["elena"] = ["work"] * 15 assert entropy._is_stuck_in_loop("elena") conflicts = entropy.get_conflict_catalysts(agents_list) print(f" PASS: Events fire, entropy detects loops, {len(conflicts)} potential conflicts found") # --- Test 11: Full simulation loop (mock LLM) --- print("\n[11/12] Full simulation loop (mock LLM)...") mock_llm = MockLLM() city4 = City.from_yaml("config/city.yaml") clock4 = SimClock(tick_minutes=15, hour=6, minute=0) sim = Simulation(city=city4, clock=clock4, llm=mock_llm) sim.load_agents_from_yaml("config/personas.yaml") # Limit to 5 agents for speed agent_ids = list(sim.agents.keys())[:5] sim.agents = {aid: sim.agents[aid] for aid in agent_ids} events_collected = [] sim.on_event = lambda msg: events_collected.append(msg) # Run 10 ticks for _ in range(10): await sim.tick() assert sim.clock.total_ticks == 10 assert len(events_collected) > 0 print(f" PASS: 10 ticks completed, {len(events_collected)} events, " f"{mock_llm.usage.total_calls} LLM calls") # Check agents moved, have memories, etc. for aid, agent in sim.agents.items(): assert len(agent.memory.memories) > 0, f"{agent.name} should have memories" # --- Test 12: State serialization roundtrip --- print("\n[12/12] Full state serialization...") state = sim.to_dict() state_json = json.dumps(state) assert len(state_json) > 1000, "State should be substantial" restored_state = json.loads(state_json) sim2 = Simulation.from_dict(restored_state, mock_llm) assert len(sim2.agents) == len(sim.agents) assert sim2.clock.total_ticks == sim.clock.total_ticks for aid in sim.agents: assert aid in sim2.agents assert sim2.agents[aid].name == sim.agents[aid].name print(f" PASS: Full state serialized ({len(state_json):,} bytes) and restored") # --- Summary --- print("\n" + "=" * 60) if errors == 0: print("ALL 12 TESTS PASSED") else: print(f"{errors} TEST(S) FAILED") print("=" * 60) # Print some interesting stats print(f"\nSimulation state:") print(f" Clock: {sim.clock.datetime_str}") print(f" Weather: {sim.events.weather.value}") print(f" Mock LLM calls: {mock_llm.usage.total_calls}") print(f"\nAgent status after 10 ticks:") for aid, agent in sim.agents.items(): loc = sim.city.get_location(agent.location) loc_name = loc.name if loc else agent.location print(f" {agent.name}: {agent.state.value} at {loc_name} " f"(mood={agent.mood:.2f}, memories={len(agent.memory.memories)})") return errors == 0 if __name__ == "__main__": success = asyncio.run(run_tests()) sys.exit(0 if success else 1)