Spaces:

RayMelius
/

soci2

Running

RayMelius Claude Sonnet 4.6 commited on 16 days ago

Commit

adb6d19

1 Parent(s): 29d9da4

Add Gemini LLM support, fix back-view direction, scripted conversation fallbacks

- Add GeminiClient using OpenAI-compatible AI Studio endpoint (free tier:
15 RPM / 1M tokens/day on gemini-2.0-flash, set GEMINI_API_KEY to use)
- Auto-detect provider order: Claude → Groq → Gemini → Ollama
- Fix agent back-view: move direction tracking from drawPerson() to animate()
using pixel delta — reliable for all path angles, no waypoint guessing
- Add scripted fallback dialogue so conversations animate in the UI even
when no LLM is configured

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show

src/soci/actions/conversation.py +21 -5
src/soci/engine/llm.py +198 -2
web/index.html +12 -11

src/soci/actions/conversation.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import logging
 from dataclasses import dataclass, field
 from typing import Optional, TYPE_CHECKING
@@ -114,9 +115,17 @@ async def initiate_conversation(
         max_tokens=512,
     )
-    # LLM unavailable — skip conversation entirely
     if not result:
-        return None
     message = result.get("message", f"Hey, {target.name}.")
     topic = result.get("topic", "small talk")
@@ -187,10 +196,17 @@ async def continue_conversation(
         max_tokens=512,
     )
-    # LLM unavailable (rate-limited / circuit breaker) — end conversation cleanly
     if not result:
-        conversation.is_active = False
-        return last_turn
     message = result.get("message", "Hmm, interesting.")

 from __future__ import annotations
 import logging
+import random
 from dataclasses import dataclass, field
 from typing import Optional, TYPE_CHECKING
         max_tokens=512,
     )
+    # LLM unavailable — use scripted fallback so conversations still animate in the UI
     if not result:
+        starters = [
+            {"message": f"Hey {target.name}, how's it going?", "topic": "greeting", "inner_thought": "Making small talk."},
+            {"message": f"Oh, {target.name}! Didn't expect to run into you here.", "topic": "chance meeting", "inner_thought": "Good to see a familiar face."},
+            {"message": "What have you been up to lately?", "topic": "small talk", "inner_thought": "Curious about their day."},
+            {"message": "Lovely weather today, isn't it?", "topic": "weather", "inner_thought": "Breaking the ice."},
+            {"message": f"Hi {target.name}! Have you heard any news lately?", "topic": "news", "inner_thought": "Looking for something to talk about."},
+            {"message": "I was just thinking about grabbing something to eat. You?", "topic": "food", "inner_thought": "Maybe we can go together."},
+        ]
+        result = random.choice(starters)
     message = result.get("message", f"Hey, {target.name}.")
     topic = result.get("topic", "small talk")
         max_tokens=512,
     )
+    # LLM unavailable — scripted response keeps conversation alive in the UI
     if not result:
+        replies = [
+            {"message": "Ha, yeah, I was just thinking the same thing!", "inner_thought": "Go with the flow.", "sentiment_delta": 0.05, "trust_delta": 0.02},
+            {"message": "Not too bad, honestly. Just keeping busy.", "inner_thought": "Keep it light.", "sentiment_delta": 0.03, "trust_delta": 0.01},
+            {"message": "Interesting! Tell me more.", "inner_thought": "Show some curiosity.", "sentiment_delta": 0.04, "trust_delta": 0.02},
+            {"message": "Yeah, it's been that kind of day.", "inner_thought": "Relate to them.", "sentiment_delta": 0.02, "trust_delta": 0.01},
+            {"message": "I hear you. Things have been a bit hectic on my end too.", "inner_thought": "Empathize.", "sentiment_delta": 0.04, "trust_delta": 0.03},
+            {"message": "Good point. I hadn't thought of it that way.", "inner_thought": "Give them credit.", "sentiment_delta": 0.05, "trust_delta": 0.03},
+        ]
+        result = random.choice(replies)
     message = result.get("message", "Hmm, interesting.")

src/soci/engine/llm.py CHANGED Viewed

@@ -18,6 +18,7 @@ logger = logging.getLogger(__name__)
 PROVIDER_CLAUDE = "claude"
 PROVIDER_OLLAMA = "ollama"
 PROVIDER_GROQ = "groq"
 # Claude model IDs
 MODEL_SONNET = "claude-sonnet-4-5-20250929"
@@ -35,6 +36,10 @@ MODEL_GROQ_LLAMA_8B = "llama-3.1-8b-instant"
 MODEL_GROQ_LLAMA_70B = "llama-3.3-70b-versatile"
 MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
 # Approximate cost per 1M tokens (USD) — Ollama is free, Groq is very cheap
 COST_PER_1M = {
     MODEL_SONNET: {"input": 3.0, "output": 15.0},
@@ -583,6 +588,192 @@ class GroqClient:
         return mapping.get(model, model)
 # ============================================================
 # Factory — create the right client based on config
 # ============================================================
@@ -605,11 +796,13 @@ def create_llm_client(
         provider = os.environ.get("LLM_PROVIDER", "").lower()
     if not provider:
-        # Auto-detect: Claude → Groq → Ollama
         if os.environ.get("ANTHROPIC_API_KEY"):
             provider = PROVIDER_CLAUDE
         elif os.environ.get("GROQ_API_KEY"):
             provider = PROVIDER_GROQ
         else:
             provider = PROVIDER_OLLAMA
@@ -619,11 +812,14 @@ def create_llm_client(
     elif provider == PROVIDER_GROQ:
         default_model = model or os.environ.get("GROQ_MODEL", MODEL_GROQ_LLAMA_8B)
         return GroqClient(default_model=default_model)
     elif provider == PROVIDER_OLLAMA:
         default_model = model or os.environ.get("OLLAMA_MODEL", MODEL_LLAMA)
         return OllamaClient(base_url=ollama_url, default_model=default_model)
     else:
-        raise ValueError(f"Unknown LLM provider: {provider}. Use 'claude', 'groq', or 'ollama'.")
 # --- Prompt Templates ---

 PROVIDER_CLAUDE = "claude"
 PROVIDER_OLLAMA = "ollama"
 PROVIDER_GROQ = "groq"
+PROVIDER_GEMINI = "gemini"
 # Claude model IDs
 MODEL_SONNET = "claude-sonnet-4-5-20250929"
 MODEL_GROQ_LLAMA_70B = "llama-3.3-70b-versatile"
 MODEL_GROQ_MIXTRAL = "mixtral-8x7b-32768"
+# Google Gemini model IDs (free tier via AI Studio)
+MODEL_GEMINI_FLASH = "gemini-2.0-flash"
+MODEL_GEMINI_PRO = "gemini-1.5-pro"
 # Approximate cost per 1M tokens (USD) — Ollama is free, Groq is very cheap
 COST_PER_1M = {
     MODEL_SONNET: {"input": 3.0, "output": 15.0},
         return mapping.get(model, model)
+# ============================================================
+# Google Gemini Client (free tier via OpenAI-compatible endpoint)
+# ============================================================
+class GeminiClient:
+    """Google Gemini via the OpenAI-compatible AI Studio endpoint.
+    Free tier (no credit card):
+      - gemini-2.0-flash: 15 RPM, 1 M tokens/day — plenty for a simulation.
+      - Get a free key at https://aistudio.google.com/apikey
+    Uses the OpenAI-compatible endpoint so no extra SDK is needed.
+    """
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        default_model: str = MODEL_GEMINI_FLASH,
+        max_retries: int = 3,
+        max_rpm: int = 14,  # stay under the 15 RPM free-tier limit
+    ) -> None:
+        self.api_key = api_key or os.environ.get("GEMINI_API_KEY", "")
+        if not self.api_key:
+            raise ValueError(
+                "GEMINI_API_KEY not set. "
+                "Get a free key at https://aistudio.google.com/apikey"
+            )
+        self.default_model = default_model
+        self.max_retries = max_retries
+        self.usage = LLMUsage()
+        self.provider = PROVIDER_GEMINI
+        self._http = httpx.AsyncClient(
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+            headers={
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            },
+            timeout=60.0,
+        )
+        self._min_request_interval = 60.0 / max_rpm
+        self._last_request_time: float = 0.0
+        self._rate_lock = asyncio.Lock()
+        self._rate_limited_until: float = 0.0
+    def _is_quota_exhausted(self) -> bool:
+        return time.monotonic() < self._rate_limited_until
+    async def _wait_for_rate_limit(self) -> None:
+        async with self._rate_lock:
+            now = time.monotonic()
+            elapsed = now - self._last_request_time
+            if elapsed < self._min_request_interval:
+                await asyncio.sleep(self._min_request_interval - elapsed)
+            self._last_request_time = time.monotonic()
+    def _map_model(self, model: str) -> str:
+        """Map Claude/Groq model names to Gemini equivalents."""
+        mapping = {
+            MODEL_SONNET: self.default_model,
+            MODEL_HAIKU: self.default_model,
+            MODEL_GROQ_LLAMA_8B: MODEL_GEMINI_FLASH,
+        }
+        return mapping.get(model, model)
+    async def complete(
+        self,
+        system: str,
+        user_message: str,
+        model: Optional[str] = None,
+        temperature: float = 0.7,
+        max_tokens: int = 1024,
+    ) -> str:
+        """Send a chat completion request to Gemini."""
+        if self._is_quota_exhausted():
+            logger.debug("Gemini quota circuit breaker active — skipping complete()")
+            return ""
+        model = self._map_model(model or self.default_model)
+        payload = {
+            "model": model,
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user_message},
+            ],
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        for attempt in range(self.max_retries):
+            try:
+                await self._wait_for_rate_limit()
+                resp = await self._http.post("chat/completions", json=payload)
+                resp.raise_for_status()
+                data = resp.json()
+                usage = data.get("usage", {})
+                self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
+                return data["choices"][0]["message"]["content"]
+            except httpx.HTTPStatusError as e:
+                if e.response.status_code == 429:
+                    retry_after = e.response.headers.get("retry-after", "5")
+                    try:
+                        wait = float(retry_after)
+                    except (ValueError, TypeError):
+                        wait = 5.0
+                    if wait > 30:
+                        self._rate_limited_until = time.monotonic() + wait
+                        logger.warning(f"Gemini quota exhausted for {wait:.0f}s")
+                        return ""
+                    logger.warning(f"Gemini rate limited, waiting {wait}s")
+                    await asyncio.sleep(wait)
+                else:
+                    logger.error(f"Gemini HTTP error: {e.response.status_code}")
+                    if attempt == self.max_retries - 1:
+                        return ""
+                    await asyncio.sleep(1)
+            except Exception as e:
+                logger.error(f"Gemini error: {e}")
+                if attempt == self.max_retries - 1:
+                    return ""
+                await asyncio.sleep(1)
+        return ""
+    async def complete_json(
+        self,
+        system: str,
+        user_message: str,
+        model: Optional[str] = None,
+        temperature: float = 0.7,
+        max_tokens: int = 1024,
+    ) -> dict:
+        """Send a JSON-mode request to Gemini."""
+        if self._is_quota_exhausted():
+            logger.debug("Gemini quota circuit breaker active — skipping complete_json()")
+            return {}
+        model = self._map_model(model or self.default_model)
+        json_instruction = (
+            "\n\nRespond ONLY with valid JSON. No markdown, no explanation, no extra text. "
+            "Just the JSON object."
+        )
+        payload = {
+            "model": model,
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user_message + json_instruction},
+            ],
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "response_format": {"type": "json_object"},
+        }
+        for attempt in range(self.max_retries):
+            try:
+                await self._wait_for_rate_limit()
+                resp = await self._http.post("chat/completions", json=payload)
+                resp.raise_for_status()
+                data = resp.json()
+                usage = data.get("usage", {})
+                self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
+                text = data["choices"][0]["message"]["content"]
+                return _parse_json_response(text)
+            except httpx.HTTPStatusError as e:
+                if e.response.status_code == 429:
+                    retry_after = e.response.headers.get("retry-after", "5")
+                    try:
+                        wait = float(retry_after)
+                    except (ValueError, TypeError):
+                        wait = 5.0
+                    if wait > 30:
+                        self._rate_limited_until = time.monotonic() + wait
+                        logger.warning(f"Gemini quota exhausted for {wait:.0f}s")
+                        return {}
+                    logger.warning(f"Gemini rate limited, waiting {wait}s")
+                    await asyncio.sleep(wait)
+                else:
+                    logger.error(f"Gemini JSON error: {e.response.status_code}")
+                    if attempt == self.max_retries - 1:
+                        return {}
+                    await asyncio.sleep(1)
+            except Exception as e:
+                logger.error(f"Gemini JSON error: {e}")
+                if attempt == self.max_retries - 1:
+                    return {}
+                await asyncio.sleep(1)
+        return {}
 # ============================================================
 # Factory — create the right client based on config
 # ============================================================
         provider = os.environ.get("LLM_PROVIDER", "").lower()
     if not provider:
+        # Auto-detect: Claude → Groq → Gemini → Ollama
         if os.environ.get("ANTHROPIC_API_KEY"):
             provider = PROVIDER_CLAUDE
         elif os.environ.get("GROQ_API_KEY"):
             provider = PROVIDER_GROQ
+        elif os.environ.get("GEMINI_API_KEY"):
+            provider = PROVIDER_GEMINI
         else:
             provider = PROVIDER_OLLAMA
     elif provider == PROVIDER_GROQ:
         default_model = model or os.environ.get("GROQ_MODEL", MODEL_GROQ_LLAMA_8B)
         return GroqClient(default_model=default_model)
+    elif provider == PROVIDER_GEMINI:
+        default_model = model or os.environ.get("GEMINI_MODEL", MODEL_GEMINI_FLASH)
+        return GeminiClient(default_model=default_model)
     elif provider == PROVIDER_OLLAMA:
         default_model = model or os.environ.get("OLLAMA_MODEL", MODEL_LLAMA)
         return OllamaClient(base_url=ollama_url, default_model=default_model)
     else:
+        raise ValueError(f"Unknown LLM provider: {provider}. Use 'claude', 'groq', 'gemini', or 'ollama'.")
 # --- Prompt Templates ---

web/index.html CHANGED Viewed

@@ -748,8 +748,19 @@ function animate() {
     // Moving agents travel slower so the walk is visible; others snap faster
     const isMoving = agent && (agent.state === 'moving');
     const lerpRate = isMoving ? 0.022 : 0.07;
     p.x += (dest.x - p.x) * lerpRate;
     p.y += (dest.y - p.y) * lerpRate;
   }
   draw();
   requestAnimationFrame(animate);
@@ -1948,17 +1959,7 @@ function drawPerson(id, agent, globalIdx, W, H) {
   const armSwing = walkAnim ? Math.sin(walkPhase) * 10  : 0;
   const tY = -10 + bounce;   // torso top Y — hoisted so profile view can use it
-  // Facing direction — track dominant movement axis (H or V)
-  const destPt = (agentWaypoints[id] && agentWaypoints[id].length) ? agentWaypoints[id][0] : agentTargets[id];
-  if (destPt && walkAnim) {
-    const ddx = destPt.x - ax, ddy = destPt.y - ay;
-    if (Math.abs(ddx) > Math.abs(ddy) + 5) {
-      agentFacingRight[id] = ddx > 0;
-      agentMovingUp[id] = false;
-    } else if (Math.abs(ddy) > Math.abs(ddx) + 5) {
-      agentMovingUp[id] = ddy < 0;
-    }
-  }
   const facingRight = agentFacingRight[id] !== false; // default right
   const movingUp = agentMovingUp[id] === true;

     // Moving agents travel slower so the walk is visible; others snap faster
     const isMoving = agent && (agent.state === 'moving');
     const lerpRate = isMoving ? 0.022 : 0.07;
+    const prevX = p.x, prevY = p.y;
     p.x += (dest.x - p.x) * lerpRate;
     p.y += (dest.y - p.y) * lerpRate;
+    // Track facing direction from actual pixel delta — reliable for all path types
+    const mdx = p.x - prevX, mdy = p.y - prevY;
+    if (Math.abs(mdx) > 0.1 || Math.abs(mdy) > 0.1) {
+      if (Math.abs(mdy) > Math.abs(mdx)) {
+        agentMovingUp[id] = mdy < 0;   // moving up = back view
+      } else {
+        agentFacingRight[id] = mdx > 0; // moving horizontally = profile
+        agentMovingUp[id] = false;
+      }
+    }
   }
   draw();
   requestAnimationFrame(animate);
   const armSwing = walkAnim ? Math.sin(walkPhase) * 10  : 0;
   const tY = -10 + bounce;   // torso top Y — hoisted so profile view can use it
+  // Facing direction — maintained by animate() from position delta
   const facingRight = agentFacingRight[id] !== false; // default right
   const movingUp = agentMovingUp[id] === true;