Spaces:

RayMelius
/

soci2

Running

App Files Files Community

soci2 / test_simulation.py

RayMelius

Redesign city: individual houses, real streets, async Ollama client

492d303 22 days ago

raw

history blame contribute delete

14.9 kB

	"""Offline integration test — runs the full simulation loop with a mock LLM.

	This test validates the entire pipeline without requiring an API key.
	Run: python test_simulation.py
	"""

	from __future__ import annotations

	import asyncio
	import json
	import random
	import sys
	from pathlib import Path
	from unittest.mock import AsyncMock, MagicMock

	sys.path.insert(0, str(Path(__file__).parent / "src"))

	from soci.world.city import City
	from soci.world.clock import SimClock
	from soci.world.events import EventSystem
	from soci.agents.persona import load_personas, Persona
	from soci.agents.agent import Agent, AgentAction, AgentState
	from soci.agents.memory import MemoryStream, MemoryType
	from soci.agents.needs import NeedsState
	from soci.agents.relationships import RelationshipGraph, Relationship
	from soci.actions.registry import resolve_action, ActionType
	from soci.actions.movement import execute_move, get_best_location_for_need
	from soci.actions.activities import execute_activity
	from soci.actions.social import should_initiate_conversation, pick_conversation_partner
	from soci.engine.entropy import EntropyManager
	from soci.engine.scheduler import prioritize_agents, should_skip_llm
	from soci.engine.simulation import Simulation
	from soci.persistence.database import Database


	class MockLLM:
	"""Mock LLM that returns plausible JSON responses without calling the API."""

	def __init__(self):
	self.usage = MagicMock()
	self.usage.total_calls = 0
	self.usage.total_input_tokens = 0
	self.usage.total_output_tokens = 0
	self.usage.estimated_cost_usd = 0.0
	self.usage.calls_by_model = {}
	self.usage.summary.return_value = "Mock LLM: 0 calls, $0.00"

	async def complete(self, system, user_message, model=None, temperature=0.7, max_tokens=1024):
	self.usage.total_calls += 1
	return "I'm thinking about my day."

	async def complete_json(self, system, user_message, model=None, temperature=0.7, max_tokens=1024):
	self.usage.total_calls += 1

	# Detect what kind of prompt this is and return appropriate mock data
	msg = user_message.lower()

	if "plan your day" in msg:
	return {
	"plan": [
	"Wake up and have breakfast at home",
	"Go to work at the office",
	"Have lunch at the cafe",
	"Continue working",
	"Go to the park for a walk",
	"Have dinner",
	"Relax at home",
	],
	"reasoning": "A balanced day with work and leisure."
	}

	if "what do you do next" in msg:
	actions = ["work", "eat", "relax", "wander", "move", "exercise"]
	action = random.choice(actions)
	targets = {
	"move": random.choice(["cafe", "park", "house_elena", "office", "grocery"]),
	"work": "",
	"eat": "",
	"relax": "",
	"wander": "",
	"exercise": "",
	}
	details = {
	"move": "heading somewhere new",
	"work": "focusing on a project",
	"eat": "having a quick meal",
	"relax": "taking it easy",
	"wander": "strolling around",
	"exercise": "doing some stretches",
	}
	return {
	"action": action,
	"target": targets.get(action, ""),
	"detail": details.get(action, "doing something"),
	"duration": random.randint(1, 3),
	"reasoning": "Felt like it."
	}

	if "how important" in msg:
	return {
	"importance": random.randint(3, 8),
	"reaction": "Interesting, I'll remember that."
	}

	if "reflect" in msg:
	return {
	"reflections": [
	"I notice I've been spending a lot of time at work lately.",
	"The neighborhood feels alive today."
	],
	"mood_shift": random.uniform(-0.1, 0.2),
	"reasoning": "Just thinking about things."
	}

	if "start a conversation" in msg or "you decide to start" in msg:
	return {
	"message": "Hey, how's it going?",
	"inner_thought": "I should catch up with them.",
	"topic": "daily life"
	}

	if "says:" in msg:
	return {
	"message": "Yeah, things are good. How about you?",
	"inner_thought": "Nice to chat.",
	"sentiment_delta": 0.05,
	"trust_delta": 0.02
	}

	return {"status": "ok"}


	async def run_tests():
	print("=" * 60)
	print("SOCI — OFFLINE INTEGRATION TEST")
	print("=" * 60)
	errors = 0

	# --- Test 1: Clock ---
	print("\n[1/12] Clock system...")
	clock = SimClock(tick_minutes=15, hour=6, minute=0)
	for _ in range(96): # Full day
	clock.tick()
	assert clock.day == 2, f"Expected day 2, got {clock.day}"
	assert clock.hour == 6, f"Expected hour 6, got {clock.hour}"
	clock_dict = clock.to_dict()
	restored_clock = SimClock.from_dict(clock_dict)
	assert restored_clock.day == clock.day
	print(" PASS: Clock ticks correctly for a full day, serialization works")

	# --- Test 2: City ---
	print("\n[2/12] City system...")
	city = City.from_yaml("config/city.yaml")
	assert len(city.locations) == 20
	# Test connectivity
	cafe = city.get_location("cafe")
	assert cafe is not None
	assert "street_north" in cafe.connected_to
	connected = city.get_connected("cafe")
	assert len(connected) > 0
	# Test agent placement and movement
	city.place_agent("test_agent", "cafe")
	assert "test_agent" in city.get_agents_at("cafe")
	city.move_agent("test_agent", "cafe", "office")
	assert "test_agent" not in city.get_agents_at("cafe")
	assert "test_agent" in city.get_agents_at("office")
	assert city.find_agent("test_agent") == "office"
	city.locations["office"].remove_occupant("test_agent")
	print(" PASS: City loads, connections work, movement works")

	# --- Test 3: Personas ---
	print("\n[3/12] Persona system...")
	personas = load_personas("config/personas.yaml")
	assert len(personas) == 20
	# Check diversity
	ages = [p.age for p in personas]
	assert min(ages) <= 20, "Should have young people"
	assert max(ages) >= 60, "Should have older people"
	occupations = set(p.occupation for p in personas)
	assert len(occupations) >= 15, "Should have diverse occupations"
	# Test system prompt
	prompt = personas[0].system_prompt()
	assert personas[0].name in prompt
	assert "personality" in prompt.lower() or "PERSONALITY" in prompt
	print(f" PASS: 20 personas loaded, ages {min(ages)}-{max(ages)}, {len(occupations)} occupations")

	# --- Test 4: Needs ---
	print("\n[4/12] Needs system...")
	needs = NeedsState()
	initial_hunger = needs.hunger
	for _ in range(20):
	needs.tick()
	assert needs.hunger < initial_hunger, "Hunger should decay"
	assert needs.energy < 1.0, "Energy should decay"
	needs.satisfy("hunger", 0.5)
	assert needs.hunger > 0.0, "Hunger should be partially satisfied"
	urgent = needs.urgent_needs
	desc = needs.describe()
	assert isinstance(desc, str)
	print(f" PASS: Needs decay ({desc}), satisfaction works")

	# --- Test 5: Memory ---
	print("\n[5/12] Memory system...")
	mem = MemoryStream()
	for i in range(30):
	mem.add(i, 1, f"{6+i//4:02d}:{(i%4)*15:02d}",
	MemoryType.OBSERVATION, f"Event {i}", importance=random.randint(1, 10))
	assert len(mem.memories) == 30
	retrieved = mem.retrieve(30, top_k=5)
	assert len(retrieved) == 5
	recent = mem.get_recent(3)
	assert len(recent) == 3
	assert recent[-1].content == "Event 29"
	# Test reflection trigger
	mem._importance_accumulator = 100
	assert mem.should_reflect()
	mem.reset_reflection_accumulator()
	assert not mem.should_reflect()
	# Test serialization
	mem_dict = mem.to_dict()
	restored_mem = MemoryStream.from_dict(mem_dict)
	assert len(restored_mem.memories) == 30
	print(" PASS: Memory storage, retrieval, reflection trigger, serialization")

	# --- Test 6: Relationships ---
	print("\n[6/12] Relationship system...")
	graph = RelationshipGraph()
	rel = graph.get_or_create("elena", "Elena Vasquez")
	assert rel.familiarity == 0.0
	rel.update_after_interaction(tick=10, sentiment_delta=0.1, trust_delta=0.05, note="Had coffee together")
	assert rel.familiarity > 0.0
	assert rel.sentiment > 0.5
	assert len(rel.notes) == 1
	closest = graph.get_closest(5)
	assert len(closest) == 1
	desc = rel.describe()
	assert "Elena" in desc
	# Serialization
	g_dict = graph.to_dict()
	restored_g = RelationshipGraph.from_dict(g_dict)
	assert restored_g.get("elena") is not None
	print(" PASS: Relationships form, track sentiment/trust, serialize")

	# --- Test 7: Agent ---
	print("\n[7/12] Agent system...")
	persona = personas[0] # Elena
	agent = Agent(persona)
	assert agent.name == "Elena Vasquez"
	assert agent.location == "house_elena"
	assert agent.state == AgentState.IDLE
	# Test action
	action = AgentAction(type="work", detail="coding", duration_ticks=3, needs_satisfied={"purpose": 0.3})
	agent.start_action(action)
	assert agent.is_busy
	assert agent.state == AgentState.WORKING
	for _ in range(3):
	agent.tick_action()
	assert not agent.is_busy
	assert agent.state == AgentState.IDLE
	# Test mood + needs interaction
	for _ in range(10):
	agent.tick_needs()
	# Test observation
	agent.add_observation(0, 1, "06:00", "Saw a cat in the park", importance=4)
	assert len(agent.memory.memories) == 1
	# Serialization
	a_dict = agent.to_dict()
	restored_a = Agent.from_dict(a_dict)
	assert restored_a.name == agent.name
	assert len(restored_a.memory.memories) == 1
	print(" PASS: Agent actions, needs, mood, memory, serialization")

	# --- Test 8: Action resolution ---
	print("\n[8/12] Action resolution...")
	city2 = City.from_yaml("config/city.yaml")
	agent2 = Agent(personas[0])
	city2.place_agent(agent2.id, agent2.location)
	raw = {"action": "move", "target": "cafe", "detail": "heading to cafe", "duration": 1}
	resolved = resolve_action(raw, agent2, city2)
	assert resolved.type == "move"
	assert resolved.target == "cafe"
	# Invalid action falls back to wander
	raw_bad = {"action": "fly", "target": "moon"}
	resolved_bad = resolve_action(raw_bad, agent2, city2)
	assert resolved_bad.type == "wander"
	print(" PASS: Valid actions resolve, invalid actions fall back to wander")

	# --- Test 9: Movement ---
	print("\n[9/12] Movement system...")
	clock2 = SimClock()
	agent3 = Agent(personas[0])
	city3 = City.from_yaml("config/city.yaml")
	city3.place_agent(agent3.id, "house_elena")
	move_action = AgentAction(type="move", target="cafe", detail="walking to cafe")
	desc = execute_move(agent3, move_action, city3, clock2)
	assert "cafe" in desc.lower() or "Daily Grind" in desc
	assert agent3.location == "cafe"
	# Test location suggestion
	suggested = get_best_location_for_need(agent3, "hunger", city3)
	assert suggested is not None
	print(f" PASS: Movement works, need-based suggestion: {suggested}")

	# --- Test 10: Events & Entropy ---
	print("\n[10/12] Events and entropy...")
	events = EventSystem(event_chance_per_tick=1.0) # Force events
	new = events.tick(["cafe", "park", "office"])
	assert len(events.active_events) > 0 or len(new) > 0
	world_desc = events.get_world_description()
	assert "Weather" in world_desc
	entropy = EntropyManager()
	agents_list = [Agent(p) for p in personas[:5]]
	# Simulate repetitive behavior
	entropy._action_history["elena"] = ["work"] * 15
	assert entropy._is_stuck_in_loop("elena")
	conflicts = entropy.get_conflict_catalysts(agents_list)
	print(f" PASS: Events fire, entropy detects loops, {len(conflicts)} potential conflicts found")

	# --- Test 11: Full simulation loop (mock LLM) ---
	print("\n[11/12] Full simulation loop (mock LLM)...")
	mock_llm = MockLLM()
	city4 = City.from_yaml("config/city.yaml")
	clock4 = SimClock(tick_minutes=15, hour=6, minute=0)
	sim = Simulation(city=city4, clock=clock4, llm=mock_llm)
	sim.load_agents_from_yaml("config/personas.yaml")

	# Limit to 5 agents for speed
	agent_ids = list(sim.agents.keys())[:5]
	sim.agents = {aid: sim.agents[aid] for aid in agent_ids}

	events_collected = []
	sim.on_event = lambda msg: events_collected.append(msg)

	# Run 10 ticks
	for _ in range(10):
	await sim.tick()

	assert sim.clock.total_ticks == 10
	assert len(events_collected) > 0
	print(f" PASS: 10 ticks completed, {len(events_collected)} events, "
	f"{mock_llm.usage.total_calls} LLM calls")

	# Check agents moved, have memories, etc.
	for aid, agent in sim.agents.items():
	assert len(agent.memory.memories) > 0, f"{agent.name} should have memories"

	# --- Test 12: State serialization roundtrip ---
	print("\n[12/12] Full state serialization...")
	state = sim.to_dict()
	state_json = json.dumps(state)
	assert len(state_json) > 1000, "State should be substantial"
	restored_state = json.loads(state_json)
	sim2 = Simulation.from_dict(restored_state, mock_llm)
	assert len(sim2.agents) == len(sim.agents)
	assert sim2.clock.total_ticks == sim.clock.total_ticks
	for aid in sim.agents:
	assert aid in sim2.agents
	assert sim2.agents[aid].name == sim.agents[aid].name
	print(f" PASS: Full state serialized ({len(state_json):,} bytes) and restored")

	# --- Summary ---
	print("\n" + "=" * 60)
	if errors == 0:
	print("ALL 12 TESTS PASSED")
	else:
	print(f"{errors} TEST(S) FAILED")
	print("=" * 60)

	# Print some interesting stats
	print(f"\nSimulation state:")
	print(f" Clock: {sim.clock.datetime_str}")
	print(f" Weather: {sim.events.weather.value}")
	print(f" Mock LLM calls: {mock_llm.usage.total_calls}")
	print(f"\nAgent status after 10 ticks:")
	for aid, agent in sim.agents.items():
	loc = sim.city.get_location(agent.location)
	loc_name = loc.name if loc else agent.location
	print(f" {agent.name}: {agent.state.value} at {loc_name} "
	f"(mood={agent.mood:.2f}, memories={len(agent.memory.memories)})")

	return errors == 0


	if __name__ == "__main__":
	success = asyncio.run(run_tests())
	sys.exit(0 if success else 1)