Spaces:

dek924
/

PatientSim

Running

App Files Files Community

dek924 commited on 11 days ago

Commit

08298d9

1 Parent(s): e646c57

feat: api call limit init

Browse files

Files changed (2) hide show

app.py +25 -2
rate_limiter.py +285 -0

app.py CHANGED Viewed

@@ -5,9 +5,14 @@ import gradio as gr
 from dotenv import load_dotenv, find_dotenv
 from patientsim import PatientAgent, DoctorAgent
 from patientsim.utils.common_utils import detect_ed_termination
 load_dotenv(find_dotenv(usecwd=True), override=True)
 # ---------------------------------------------------------------------------
 # Constants
@@ -510,10 +515,16 @@ def start_simulation(
     personality: str,
     recall: str,
     confusion: str,
 ):
     if not hadm_id:
         return _setup_error("Please select a patient first.")
     is_openai = "gpt" in model.lower()
     if is_openai:
@@ -611,12 +622,17 @@ def start_manual(profile_mode: str, agent, sim_config: dict):
     )
-def chat(message: str, history: list, agent):
     if agent is None:
         raise gr.Error("No simulation running. Please start a simulation first.")
     if not message.strip():
         return history, ""
     response = agent(user_prompt=message, using_multi_turn=True, verbose=False)
     history = history + [
         {"role": "user", "content": message},
@@ -663,13 +679,20 @@ def _auto_fallback_outputs():
     )
-def start_auto(agent, sim_config: dict):
     """Generator — yields chatbot updates turn-by-turn so the UI streams live."""
     if agent is None or sim_config is None:
         gr.Warning("Session expired. Please restart.")
         yield _auto_fallback_outputs()
         return
     agent.reset_history(verbose=False)
     # Show auto_section immediately; set per-patient avatar on first yield

 from dotenv import load_dotenv, find_dotenv
 from patientsim import PatientAgent, DoctorAgent
 from patientsim.utils.common_utils import detect_ed_termination
+from rate_limiter import RateLimiter, get_client_key
 load_dotenv(find_dotenv(usecwd=True), override=True)
+# ---------------------------------------------------------------------------
+# Rate limiter (singleton — shared across all Gradio worker threads)
+# ---------------------------------------------------------------------------
+_rate_limiter = RateLimiter()
 # ---------------------------------------------------------------------------
 # Constants
     personality: str,
     recall: str,
     confusion: str,
+    request: gr.Request = None,
 ):
     if not hadm_id:
         return _setup_error("Please select a patient first.")
+    client_key = get_client_key(request)
+    allowed, limit_msg = _rate_limiter.check_simulation_start(client_key)
+    if not allowed:
+        return _setup_error(limit_msg)
     is_openai = "gpt" in model.lower()
     if is_openai:
     )
+def chat(message: str, history: list, agent, request: gr.Request = None):
     if agent is None:
         raise gr.Error("No simulation running. Please start a simulation first.")
     if not message.strip():
         return history, ""
+    client_key = get_client_key(request)
+    allowed, limit_msg = _rate_limiter.check_chat_message(client_key)
+    if not allowed:
+        raise gr.Error(limit_msg)
     response = agent(user_prompt=message, using_multi_turn=True, verbose=False)
     history = history + [
         {"role": "user", "content": message},
     )
+def start_auto(agent, sim_config: dict, request: gr.Request = None):
     """Generator — yields chatbot updates turn-by-turn so the UI streams live."""
     if agent is None or sim_config is None:
         gr.Warning("Session expired. Please restart.")
         yield _auto_fallback_outputs()
         return
+    client_key = get_client_key(request)
+    allowed, limit_msg = _rate_limiter.check_auto_run(client_key)
+    if not allowed:
+        gr.Warning(limit_msg)
+        yield _auto_fallback_outputs()
+        return
     agent.reset_history(verbose=False)
     # Show auto_section immediately; set per-patient avatar on first yield

rate_limiter.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""
+IP-based hard-cap rate limiter for PatientSim Gradio demo.
+Each counter is a simple cumulative total — no time window, no reset.
+Once a limit is reached the client is permanently blocked for that action
+until the process is restarted.
+Limits are configurable via environment variables:
+  RATE_LIMIT_SIM_STARTS       — max simulation setups total per IP   (default: 5)
+  RATE_LIMIT_CHAT_MSGS        — max chat messages total per IP        (default: 50)
+  RATE_LIMIT_AUTO_RUNS        — max auto simulation runs total per IP (default: 5)
+  RATE_LIMIT_TOTAL_API_CALLS  — max total LLM calls across all modes  (default: 200)
+Client identification priority (for HuggingFace Spaces):
+  1. HF OAuth username  (if the Space has OAuth enabled)
+  2. X-Forwarded-For header  (first IP in the proxy chain)
+  3. X-Real-IP header
+  4. Direct client host
+"""
+from __future__ import annotations
+import os
+import threading
+from collections import defaultdict
+from typing import Dict, Tuple
+import gradio as gr
+# ---------------------------------------------------------------------------
+# Configuration — overridable via environment variables
+# ---------------------------------------------------------------------------
+SIM_STARTS_LIMIT: int = int(os.environ.get("RATE_LIMIT_SIM_STARTS", "5"))
+CHAT_MSGS_LIMIT: int = int(os.environ.get("RATE_LIMIT_CHAT_MSGS", "50"))
+AUTO_RUNS_LIMIT: int = int(os.environ.get("RATE_LIMIT_AUTO_RUNS", "5"))
+TOTAL_API_CALLS_LIMIT: int = int(os.environ.get("RATE_LIMIT_TOTAL_API_CALLS", "200"))
+# Each auto simulation consumes at most (2 agents × MAX_AUTO_INFERENCES) API calls.
+# We reserve this many slots upfront in the total_calls counter when an auto run starts.
+_AUTO_RUN_CALL_RESERVATION: int = 20
+# ---------------------------------------------------------------------------
+# Client identifier extraction
+# ---------------------------------------------------------------------------
+def get_client_key(request: gr.Request | None) -> str:
+    """
+    Return a stable string that identifies the caller.
+    The key is prefixed with ``"user:"`` for authenticated HF users and
+    ``"ip:"`` for anonymous IP-based identification.  Falls back to
+    ``"unknown"`` when no identifier can be extracted.
+    Parameters
+    ----------
+    request:
+        The :class:`gradio.Request` object injected by Gradio into event
+        handler functions.
+    Returns
+    -------
+    str
+        A non-empty identifier string.
+    """
+    if request is None:
+        return "unknown"
+    # 1. HuggingFace OAuth username (available when HF OAuth is enabled on the Space)
+    username = getattr(request, "username", None)
+    if username:
+        return f"user:{username}"
+    # Normalise headers to lowercase keys for consistent lookup
+    raw_headers: dict = {}
+    if hasattr(request, "headers") and request.headers:
+        try:
+            raw_headers = {k.lower(): v for k, v in dict(request.headers).items()}
+        except Exception:
+            pass
+    # 2. X-Forwarded-For — proxy / CDN chain; leftmost entry is the original client
+    xff = raw_headers.get("x-forwarded-for", "")
+    if xff:
+        client_ip = xff.split(",")[0].strip()
+        if client_ip:
+            return f"ip:{client_ip}"
+    # 3. X-Real-IP — set by some reverse proxies (nginx, etc.)
+    x_real_ip = raw_headers.get("x-real-ip", "")
+    if x_real_ip:
+        return f"ip:{x_real_ip.strip()}"
+    # 4. Direct connection host (only reliable when not behind a proxy)
+    client = getattr(request, "client", None)
+    if client and getattr(client, "host", None):
+        return f"ip:{client.host}"
+    return "unknown"
+# ---------------------------------------------------------------------------
+# Rate limiter
+# ---------------------------------------------------------------------------
+class RateLimiter:
+    """
+    Thread-safe hard-cap rate limiter keyed by client identifier.
+    Counters are cumulative totals with no time window — once a limit is
+    reached the client is permanently blocked for that action.
+    Tracks four independent counters per key:
+    * **sim_starts** — calls to ``start_simulation()``
+    * **chat_msgs**  — individual chat messages (1 LLM call each)
+    * **auto_runs**  — auto simulation runs (each reserved as
+      ``_AUTO_RUN_CALL_RESERVATION`` LLM calls in ``total_calls``)
+    * **total_calls** — aggregate LLM API calls across all modes
+    Example
+    -------
+    >>> limiter = RateLimiter()
+    >>> allowed, msg = limiter.check_simulation_start("ip:1.2.3.4")
+    >>> if not allowed:
+    ...     raise gr.Error(msg)
+    """
+    def __init__(self) -> None:
+        self._lock = threading.Lock()
+        self._sim_starts: Dict[str, int] = defaultdict(int)
+        self._chat_msgs: Dict[str, int] = defaultdict(int)
+        self._auto_runs: Dict[str, int] = defaultdict(int)
+        self._total_calls: Dict[str, int] = defaultdict(int)
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+    def _increment(
+        self,
+        store: Dict[str, int],
+        key: str,
+        limit: int,
+        *,
+        n: int = 1,
+    ) -> Tuple[bool, int]:
+        """
+        Increment counter by *n* and check whether the new total exceeds *limit*.
+        Returns
+        -------
+        (allowed, new_count)
+        """
+        with self._lock:
+            store[key] += n
+            count = store[key]
+        return count <= limit, count
+    def _decrement(self, store: Dict[str, int], key: str, n: int = 1) -> None:
+        """Roll back a previous increment (used when a subsequent check fails)."""
+        with self._lock:
+            store[key] = max(0, store[key] - n)
+    # ------------------------------------------------------------------
+    # Public check methods
+    # ------------------------------------------------------------------
+    def check_simulation_start(self, key: str) -> Tuple[bool, str]:
+        """
+        Check whether a new simulation setup is allowed.
+        Called once when the user clicks **Start Simulation**.
+        Parameters
+        ----------
+        key:
+            Client identifier returned by :func:`get_client_key`.
+        Returns
+        -------
+        (True, "")                       — allowed
+        (False, human-readable message)  — denied
+        """
+        allowed, count = self._increment(self._sim_starts, key, SIM_STARTS_LIMIT)
+        if not allowed:
+            return False, (
+                f"Simulation setup limit reached "
+                f"(maximum {SIM_STARTS_LIMIT} simulations per session)."
+            )
+        return True, ""
+    def check_chat_message(self, key: str) -> Tuple[bool, str]:
+        """
+        Check whether sending a chat message is allowed (= 1 LLM API call).
+        Increments both ``chat_msgs`` and ``total_calls``.
+        Parameters
+        ----------
+        key:
+            Client identifier returned by :func:`get_client_key`.
+        Returns
+        -------
+        (True, "")                       — allowed
+        (False, human-readable message)  — denied
+        """
+        allowed_msg, _ = self._increment(self._chat_msgs, key, CHAT_MSGS_LIMIT)
+        if not allowed_msg:
+            return False, (
+                f"Chat message limit reached "
+                f"(maximum {CHAT_MSGS_LIMIT} messages per session)."
+            )
+        allowed_total, _ = self._increment(self._total_calls, key, TOTAL_API_CALLS_LIMIT)
+        if not allowed_total:
+            self._decrement(self._chat_msgs, key)
+            return False, (
+                f"Total API call limit reached "
+                f"(maximum {TOTAL_API_CALLS_LIMIT} API calls per session)."
+            )
+        return True, ""
+    def check_auto_run(self, key: str) -> Tuple[bool, str]:
+        """
+        Check whether starting an auto simulation is allowed.
+        Reserves ``_AUTO_RUN_CALL_RESERVATION`` slots in the ``total_calls``
+        counter upfront because each auto run may issue up to that many LLM
+        calls before it finishes.
+        Parameters
+        ----------
+        key:
+            Client identifier returned by :func:`get_client_key`.
+        Returns
+        -------
+        (True, "")                       — allowed
+        (False, human-readable message)  — denied
+        """
+        allowed_run, _ = self._increment(self._auto_runs, key, AUTO_RUNS_LIMIT)
+        if not allowed_run:
+            return False, (
+                f"Auto simulation limit reached "
+                f"(maximum {AUTO_RUNS_LIMIT} auto runs per session)."
+            )
+        allowed_total, _ = self._increment(
+            self._total_calls, key, TOTAL_API_CALLS_LIMIT,
+            n=_AUTO_RUN_CALL_RESERVATION,
+        )
+        if not allowed_total:
+            self._decrement(self._auto_runs, key)
+            return False, (
+                f"Total API call limit reached "
+                f"(maximum {TOTAL_API_CALLS_LIMIT} API calls per session)."
+            )
+        return True, ""
+    # ------------------------------------------------------------------
+    # Diagnostic
+    # ------------------------------------------------------------------
+    def status(self, key: str) -> dict:
+        """
+        Return current counter snapshots for *key*.
+        Useful for debugging or exposing quota information in the UI.
+        Returns
+        -------
+        dict with keys ``sim_starts``, ``chat_messages``, ``auto_runs``,
+        ``total_api_calls``; each value is a dict with ``used`` and ``limit``.
+        """
+        with self._lock:
+            return {
+                "sim_starts":     {"used": self._sim_starts[key],  "limit": SIM_STARTS_LIMIT},
+                "chat_messages":  {"used": self._chat_msgs[key],   "limit": CHAT_MSGS_LIMIT},
+                "auto_runs":      {"used": self._auto_runs[key],   "limit": AUTO_RUNS_LIMIT},
+                "total_api_calls":{"used": self._total_calls[key], "limit": TOTAL_API_CALLS_LIMIT},
+            }