Spaces:

RayMelius
/

soci2

Running

RayMelius Claude Sonnet 4.6 commited on 20 days ago

Commit

708397c

1 Parent(s): 7033b26

Add GitHub state persistence and reduce default agent count to 50

- On startup: fetch state/autosave.json from GitHub (GITHUB_TOKEN + GITHUB_REPO env vars)
and write locally so load_simulation() can restore state after Render redeploy
- On shutdown: push autosave.json back to GitHub (preserves state across deploys)
- Periodic GitHub push every 96 ticks (~1 sim-day) as extra safety net
- Default SOCI_AGENTS reduced from 100 → 50 to lower Groq LLM hit rate

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show

src/soci/api/server.py +103 -2

src/soci/api/server.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import asyncio
 import logging
 import os
 import sys
@@ -10,6 +11,7 @@ from contextlib import asynccontextmanager
 from pathlib import Path
 from typing import Optional
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
@@ -91,9 +93,12 @@ async def simulation_loop(sim: Simulation, db: Database, tick_delay: float = 2.0
             await sim.tick()
-            # Auto-save every 24 ticks
             if sim.clock.total_ticks % 24 == 0:
                 await save_simulation(sim, db, "autosave")
             # At high speeds, skip the delay entirely
             delay = tick_delay * _sim_speed
@@ -110,6 +115,96 @@ async def simulation_loop(sim: Simulation, db: Database, tick_delay: float = 2.0
             await asyncio.sleep(5)  # Wait before retrying
 def _choose_provider() -> str:
     """Let the user choose an LLM provider on startup.
@@ -173,6 +268,10 @@ async def lifespan(app: FastAPI):
     await db.connect()
     _database = db
     # Try to resume
     sim = await load_simulation(db, llm)
     if sim is None:
@@ -183,7 +282,7 @@ async def lifespan(app: FastAPI):
         sim = Simulation(city=city, clock=clock, llm=llm)
         sim.load_agents_from_yaml(str(config_dir / "personas.yaml"))
         # Scale to target agent count with procedural generation
-        target_agents = int(os.environ.get("SOCI_AGENTS", "100"))
         if len(sim.agents) < target_agents:
             sim.generate_agents(target_agents - len(sim.agents))
         logger.info(f"Created new simulation with {len(sim.agents)} agents")
@@ -206,6 +305,8 @@ async def lifespan(app: FastAPI):
         except asyncio.CancelledError:
             pass
     await save_simulation(sim, db, "shutdown_save")
     await db.close()
     logger.info("Soci API server stopped.")

 from __future__ import annotations
 import asyncio
+import base64
 import logging
 import os
 import sys
 from pathlib import Path
 from typing import Optional
+import httpx
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
             await sim.tick()
+            # Auto-save every 24 ticks (~6 sim-hours); push to GitHub every 96 ticks (~1 sim-day)
             if sim.clock.total_ticks % 24 == 0:
                 await save_simulation(sim, db, "autosave")
+                if sim.clock.total_ticks % 96 == 0:
+                    data_dir_bg = Path(os.environ.get("SOCI_DATA_DIR", "data"))
+                    asyncio.create_task(save_state_to_github(data_dir_bg))
             # At high speeds, skip the delay entirely
             delay = tick_delay * _sim_speed
             await asyncio.sleep(5)  # Wait before retrying
+async def load_state_from_github(data_dir: Path) -> bool:
+    """Fetch autosave.json from GitHub and write it locally so load_simulation() can find it.
+    Env vars:
+        GITHUB_TOKEN  — personal access token with repo read/write
+        GITHUB_REPO   — "owner/repo" e.g. "alice/soci"
+        GITHUB_STATE_FILE — path inside repo (default: "state/autosave.json")
+    """
+    token = os.environ.get("GITHUB_TOKEN", "")
+    repo = os.environ.get("GITHUB_REPO", "")
+    if not token or not repo:
+        return False
+    path = os.environ.get("GITHUB_STATE_FILE", "state/autosave.json")
+    try:
+        async with httpx.AsyncClient() as client:
+            resp = await client.get(
+                f"https://api.github.com/repos/{repo}/contents/{path}",
+                headers={
+                    "Authorization": f"token {token}",
+                    "Accept": "application/vnd.github.v3+json",
+                },
+                timeout=30.0,
+            )
+            if resp.status_code == 404:
+                logger.info("No GitHub state file found — starting fresh")
+                return False
+            resp.raise_for_status()
+            content = base64.b64decode(resp.json()["content"]).decode("utf-8")
+            local_path = data_dir / "snapshots" / "autosave.json"
+            local_path.parent.mkdir(parents=True, exist_ok=True)
+            local_path.write_text(content, encoding="utf-8")
+            logger.info(f"Loaded state from GitHub ({len(content):,} bytes)")
+            return True
+    except Exception as e:
+        logger.warning(f"Could not load state from GitHub: {e}")
+        return False
+async def save_state_to_github(data_dir: Path) -> bool:
+    """Push autosave.json to GitHub for durable cross-deploy persistence."""
+    token = os.environ.get("GITHUB_TOKEN", "")
+    repo = os.environ.get("GITHUB_REPO", "")
+    if not token or not repo:
+        return False
+    path = os.environ.get("GITHUB_STATE_FILE", "state/autosave.json")
+    local_path = data_dir / "snapshots" / "autosave.json"
+    if not local_path.exists():
+        logger.warning("No autosave.json to push to GitHub")
+        return False
+    try:
+        content_bytes = local_path.read_bytes()
+        encoded = base64.b64encode(content_bytes).decode("ascii")
+        async with httpx.AsyncClient() as client:
+            # Fetch current SHA (needed to update an existing file)
+            sha: Optional[str] = None
+            get_resp = await client.get(
+                f"https://api.github.com/repos/{repo}/contents/{path}",
+                headers={
+                    "Authorization": f"token {token}",
+                    "Accept": "application/vnd.github.v3+json",
+                },
+                timeout=30.0,
+            )
+            if get_resp.status_code == 200:
+                sha = get_resp.json().get("sha")
+            body: dict = {
+                "message": "chore: update simulation state [skip ci]",
+                "content": encoded,
+            }
+            if sha:
+                body["sha"] = sha
+            put_resp = await client.put(
+                f"https://api.github.com/repos/{repo}/contents/{path}",
+                headers={
+                    "Authorization": f"token {token}",
+                    "Accept": "application/vnd.github.v3+json",
+                },
+                json=body,
+                timeout=60.0,
+            )
+            put_resp.raise_for_status()
+            logger.info(f"Saved state to GitHub ({len(content_bytes):,} bytes)")
+            return True
+    except Exception as e:
+        logger.warning(f"Could not save state to GitHub: {e}")
+        return False
 def _choose_provider() -> str:
     """Let the user choose an LLM provider on startup.
     await db.connect()
     _database = db
+    # Pull saved state from GitHub before trying to load locally
+    data_dir = Path(os.environ.get("SOCI_DATA_DIR", "data"))
+    await load_state_from_github(data_dir)
     # Try to resume
     sim = await load_simulation(db, llm)
     if sim is None:
         sim = Simulation(city=city, clock=clock, llm=llm)
         sim.load_agents_from_yaml(str(config_dir / "personas.yaml"))
         # Scale to target agent count with procedural generation
+        target_agents = int(os.environ.get("SOCI_AGENTS", "50"))
         if len(sim.agents) < target_agents:
             sim.generate_agents(target_agents - len(sim.agents))
         logger.info(f"Created new simulation with {len(sim.agents)} agents")
         except asyncio.CancelledError:
             pass
     await save_simulation(sim, db, "shutdown_save")
+    # Push state to GitHub so it survives the next redeploy
+    await save_state_to_github(data_dir)
     await db.close()
     logger.info("Soci API server stopped.")