ml-intern

Sleeping

Aksel Joonas Reedi commited on Apr 28

Commit

2715896

unverified ·

1 Parent(s): c21a9b1

Track Pro conversions + credits top-up; emit user_id + new KPIs (#174)

Adds two telemetry events the dashboard needs to answer "how many users
converted to Pro?" and "how many came back from a billing block?":

- pro_conversion: per-user Pro state lives in a new Mongo `pro_users`
collection. mark_pro_seen upserts on every authenticated request and
uses an atomic find_one_and_update to fire at-most-once when a user
first appears as Pro after having been seen as non-Pro. Wired through
session_manager.create_session via the existing `is_pro` signal from
/auth.

- credits_topped_up: fires from jobs_tool when an hf_job submit succeeds
in a session that previously hit a billing-required tool_state_change.
Guarded against re-firing within a session.

build_kpis aggregates both as plain count columns. CLI/local users with
no Mongo silently skip via the NoopSessionStore stub.

Files changed (8) hide show

agent/core/session_persistence.py +61 -0
agent/core/telemetry.py +38 -0
agent/tools/jobs_tool.py +17 -0
backend/routes/agent.py +8 -2
backend/session_manager.py +28 -0
scripts/build_kpis.py +13 -1
tests/unit/test_build_kpis.py +26 -0
tests/unit/test_session_persistence.py +99 -1

agent/core/session_persistence.py CHANGED Viewed

@@ -98,6 +98,9 @@ class NoopSessionStore:
     async def refund_quota(self, *_: Any, **__: Any) -> None:
         return None
 class MongoSessionStore(NoopSessionStore):
     """MongoDB-backed session store."""
@@ -152,6 +155,7 @@ class MongoSessionStore(NoopSessionStore):
             [("session_id", 1), ("seq", 1)], unique=True
         )
         await self.db.session_trace_messages.create_index([("created_at", -1)])
     def _ready(self) -> bool:
         return bool(self.enabled and self.db is not None)
@@ -410,6 +414,63 @@ class MongoSessionStore(NoopSessionStore):
             {"$inc": {"count": -1}, "$set": {"updated_at": _now()}},
         )
 _store: NoopSessionStore | MongoSessionStore | None = None

     async def refund_quota(self, *_: Any, **__: Any) -> None:
         return None
+    async def mark_pro_seen(self, *_: Any, **__: Any) -> dict[str, Any] | None:
+        return None
 class MongoSessionStore(NoopSessionStore):
     """MongoDB-backed session store."""
             [("session_id", 1), ("seq", 1)], unique=True
         )
         await self.db.session_trace_messages.create_index([("created_at", -1)])
+        await self.db.pro_users.create_index([("first_seen_pro_at", -1)])
     def _ready(self) -> bool:
         return bool(self.enabled and self.db is not None)
             {"$inc": {"count": -1}, "$set": {"updated_at": _now()}},
         )
+    async def mark_pro_seen(
+        self, user_id: str, *, is_pro: bool
+    ) -> dict[str, Any] | None:
+        """Track per-user Pro state and detect free→Pro conversions.
+        Returns ``{"converted": True, "first_seen_at": ..."}`` exactly once
+        per user — the first time we see them as Pro after having recorded
+        them as non-Pro at least once. Otherwise returns ``None``.
+        Storing ``ever_non_pro`` lets us distinguish "user joined as Pro"
+        (no conversion) from "user upgraded" (conversion). The atomic
+        ``find_one_and_update`` on a guarded filter makes the conversion
+        emit at-most-once even under concurrent requests.
+        """
+        if not self._ready() or not user_id:
+            return None
+        now = _now()
+        set_fields: dict[str, Any] = {"last_seen_at": now, "is_pro": bool(is_pro)}
+        if not is_pro:
+            set_fields["ever_non_pro"] = True
+        try:
+            await self.db.pro_users.update_one(
+                {"_id": user_id},
+                {
+                    "$setOnInsert": {"_id": user_id, "first_seen_at": now},
+                    "$set": set_fields,
+                },
+                upsert=True,
+            )
+        except PyMongoError as e:
+            logger.debug("mark_pro_seen upsert failed for %s: %s", user_id, e)
+            return None
+        if not is_pro:
+            return None
+        try:
+            doc = await self.db.pro_users.find_one_and_update(
+                {
+                    "_id": user_id,
+                    "ever_non_pro": True,
+                    "first_seen_pro_at": {"$exists": False},
+                },
+                {"$set": {"first_seen_pro_at": now}},
+                return_document=ReturnDocument.AFTER,
+            )
+        except PyMongoError as e:
+            logger.debug("mark_pro_seen conversion check failed for %s: %s", user_id, e)
+            return None
+        if not doc:
+            return None
+        return {
+            "converted": True,
+            "first_seen_at": (doc.get("first_seen_at") or now).isoformat(),
+        }
 _store: NoopSessionStore | MongoSessionStore | None = None

agent/core/telemetry.py CHANGED Viewed

@@ -277,6 +277,44 @@ async def record_pro_cta_click(
         logger.debug("record_pro_cta_click failed (non-fatal): %s", e)
 # ── heartbeat ──────────────────────────────────────────────────────────────
 # Module-level reference set for fire-and-forget heartbeat tasks. asyncio only

         logger.debug("record_pro_cta_click failed (non-fatal): %s", e)
+async def record_pro_conversion(
+    session: Any,
+    *,
+    first_seen_at: str | None = None,
+) -> None:
+    """Emit a ``pro_conversion`` event for a user we've previously observed
+    as non-Pro and now see as Pro for the first time. Detected upstream in
+    ``MongoSessionStore.mark_pro_seen``; fired into the user's first Pro
+    session so the rollup picks it up alongside other event-driven KPIs."""
+    from agent.core.session import Event
+    try:
+        await session.send_event(Event(
+            event_type="pro_conversion",
+            data={"first_seen_at": first_seen_at},
+        ))
+    except Exception as e:
+        logger.debug("record_pro_conversion failed (non-fatal): %s", e)
+async def record_credits_topped_up(
+    session: Any,
+    *,
+    namespace: str | None = None,
+) -> None:
+    """Emit a ``credits_topped_up`` event when an hf_job submits successfully
+    in a session that previously hit ``jobs_access_blocked`` — i.e. the user
+    came back from the HF billing top-up flow and unblocked themselves.
+    Caller is responsible for firing this at most once per session."""
+    from agent.core.session import Event
+    try:
+        await session.send_event(Event(
+            event_type="credits_topped_up",
+            data={"namespace": namespace},
+        ))
+    except Exception as e:
+        logger.debug("record_credits_topped_up failed (non-fatal): %s", e)
 # ── heartbeat ──────────────────────────────────────────────────────────────
 # Module-level reference set for fire-and-forget heartbeat tasks. asyncio only

agent/tools/jobs_tool.py CHANGED Viewed

@@ -641,6 +641,23 @@ class HfJobsTool:
                     {**args, "hardware_flavor": flavor, "timeout": timeout_str, "namespace": self.namespace},
                     image=image, job_type=job_type,
                 )
             # Wait for completion and stream logs
             logger.info(f"{job_type} job started: {job.url}")

                     {**args, "hardware_flavor": flavor, "timeout": timeout_str, "namespace": self.namespace},
                     image=image, job_type=job_type,
                 )
+                # Top-up signal: this submit succeeded after a prior billing
+                # block in the same session, and we haven't fired the event
+                # yet — the user came back from the HF billing flow.
+                events = self.session.logged_events
+                already_fired = any(
+                    e.get("event_type") == "credits_topped_up" for e in events
+                )
+                if not already_fired:
+                    blocked = any(
+                        e.get("event_type") == "tool_state_change"
+                        and (e.get("data") or {}).get("state") == "billing_required"
+                        for e in events
+                    )
+                    if blocked:
+                        await telemetry.record_credits_topped_up(
+                            self.session, namespace=self.namespace,
+                        )
             # Wait for completion and stream logs
             logger.info(f"{job_type} job started: {job.url}")

backend/routes/agent.py CHANGED Viewed

@@ -334,7 +334,10 @@ async def create_session(
     try:
         session_id = await session_manager.create_session(
-            user_id=user["user_id"], hf_token=hf_token, model=model
         )
     except SessionCapacityError as e:
         raise HTTPException(status_code=503, detail=str(e))
@@ -370,7 +373,10 @@ async def restore_session_summary(
     try:
         session_id = await session_manager.create_session(
-            user_id=user["user_id"], hf_token=hf_token, model=model
         )
     except SessionCapacityError as e:
         raise HTTPException(status_code=503, detail=str(e))

     try:
         session_id = await session_manager.create_session(
+            user_id=user["user_id"],
+            hf_token=hf_token,
+            model=model,
+            is_pro=user.get("plan") == "pro",
         )
     except SessionCapacityError as e:
         raise HTTPException(status_code=503, detail=str(e))
     try:
         session_id = await session_manager.create_session(
+            user_id=user["user_id"],
+            hf_token=hf_token,
+            model=model,
+            is_pro=user.get("plan") == "pro",
         )
     except SessionCapacityError as e:
         raise HTTPException(status_code=503, detail=str(e))

backend/session_manager.py CHANGED Viewed

@@ -465,6 +465,7 @@ class SessionManager:
         user_id: str = "dev",
         hf_token: str | None = None,
         model: str | None = None,
     ) -> str:
         """Create a new agent session and return its ID.
@@ -534,9 +535,36 @@ class SessionManager:
         )
         await self.persist_session_snapshot(agent_session, runtime_state="idle")
         logger.info(f"Created session {session_id} for user {user_id}")
         return session_id
     async def seed_from_summary(self, session_id: str, messages: list[dict]) -> int:
         """Rehydrate a session from cached prior messages via summarization.

         user_id: str = "dev",
         hf_token: str | None = None,
         model: str | None = None,
+        is_pro: bool | None = None,
     ) -> str:
         """Create a new agent session and return its ID.
         )
         await self.persist_session_snapshot(agent_session, runtime_state="idle")
+        if is_pro is not None and user_id and user_id != "dev":
+            await self._track_pro_status(agent_session, is_pro=is_pro)
         logger.info(f"Created session {session_id} for user {user_id}")
         return session_id
+    async def _track_pro_status(self, agent_session: AgentSession, *, is_pro: bool) -> None:
+        """Update Mongo per-user Pro state and emit a one-shot conversion
+        event if the store reports a free→Pro transition. Best-effort: any
+        Mongo failure is swallowed so we never fail session creation on
+        telemetry."""
+        store = self._store()
+        if not getattr(store, "enabled", False):
+            return
+        try:
+            result = await store.mark_pro_seen(agent_session.user_id, is_pro=is_pro)
+        except Exception as e:
+            logger.debug("mark_pro_seen failed: %s", e)
+            return
+        if not result or not result.get("converted"):
+            return
+        try:
+            from agent.core import telemetry
+            await telemetry.record_pro_conversion(
+                agent_session.session,
+                first_seen_at=result.get("first_seen_at"),
+            )
+        except Exception as e:
+            logger.debug("record_pro_conversion failed: %s", e)
     async def seed_from_summary(self, session_id: str, messages: list[dict]) -> int:
         """Rehydrate a session from cached prior messages via summarization.

scripts/build_kpis.py CHANGED Viewed

@@ -224,7 +224,7 @@ def _session_metrics(session: dict) -> dict:
         "failures": 0, "regenerate_sessions": 0,
         "thumbs_up": 0, "thumbs_down": 0,
         "hf_jobs_submitted": 0, "hf_jobs_succeeded": 0, "hf_jobs_blocked": 0,
-        "pro_cta_clicks": 0,
         "sandboxes_created": 0, "sandboxes_cpu": 0, "sandboxes_gpu": 0,
         "first_tool_s": -1,
     }
@@ -251,6 +251,8 @@ def _session_metrics(session: dict) -> dict:
     sandboxes_gpu = 0
     jobs_blocked = 0
     pro_cta_clicks = 0
     pro_cta_by_source: dict[str, int] = defaultdict(int)
     # Per-tool counters from tool_call events. Counted off tool_call (which
     # carries data["tool"]) rather than tool_output (which only carries
@@ -321,6 +323,12 @@ def _session_metrics(session: dict) -> dict:
             source = str(data.get("source") or "unknown")
             pro_cta_by_source[source] += 1
         elif et == "sandbox_create":
             sandboxes_created += 1
             hardware = (data.get("hardware") or "").lower()
@@ -347,6 +355,8 @@ def _session_metrics(session: dict) -> dict:
     out["sandboxes_gpu"] = sandboxes_gpu
     out["hf_jobs_blocked"] = jobs_blocked
     out["pro_cta_clicks"] = pro_cta_clicks
     out["first_tool_s"] = first_tool_ts if first_tool_ts is not None else -1
     out["_gpu_hours_by_flavor"] = dict(gpu_hours_by_flavor)
     out["_pro_cta_by_source"] = dict(pro_cta_by_source)
@@ -462,6 +472,8 @@ def _aggregate(per_session: list[dict]) -> dict:
         "sandboxes_gpu": int(sum(s.get("sandboxes_gpu", 0) for s in per_session)),
         "hf_jobs_blocked": int(sum(s.get("hf_jobs_blocked", 0) for s in per_session)),
         "pro_cta_clicks": int(sum(s.get("pro_cta_clicks", 0) for s in per_session)),
         "gpu_hours_by_flavor_json": json.dumps(dict(gpu_hours), sort_keys=True),
         # Research KPIs — answer "is the agent reaching for research?".
         "research_calls": research_calls_total,

         "failures": 0, "regenerate_sessions": 0,
         "thumbs_up": 0, "thumbs_down": 0,
         "hf_jobs_submitted": 0, "hf_jobs_succeeded": 0, "hf_jobs_blocked": 0,
+        "pro_cta_clicks": 0, "pro_conversions": 0, "credits_topped_up": 0,
         "sandboxes_created": 0, "sandboxes_cpu": 0, "sandboxes_gpu": 0,
         "first_tool_s": -1,
     }
     sandboxes_gpu = 0
     jobs_blocked = 0
     pro_cta_clicks = 0
+    pro_conversions = 0
+    credits_topped_up = 0
     pro_cta_by_source: dict[str, int] = defaultdict(int)
     # Per-tool counters from tool_call events. Counted off tool_call (which
     # carries data["tool"]) rather than tool_output (which only carries
             source = str(data.get("source") or "unknown")
             pro_cta_by_source[source] += 1
+        elif et == "pro_conversion":
+            pro_conversions += 1
+        elif et == "credits_topped_up":
+            credits_topped_up += 1
         elif et == "sandbox_create":
             sandboxes_created += 1
             hardware = (data.get("hardware") or "").lower()
     out["sandboxes_gpu"] = sandboxes_gpu
     out["hf_jobs_blocked"] = jobs_blocked
     out["pro_cta_clicks"] = pro_cta_clicks
+    out["pro_conversions"] = pro_conversions
+    out["credits_topped_up"] = credits_topped_up
     out["first_tool_s"] = first_tool_ts if first_tool_ts is not None else -1
     out["_gpu_hours_by_flavor"] = dict(gpu_hours_by_flavor)
     out["_pro_cta_by_source"] = dict(pro_cta_by_source)
         "sandboxes_gpu": int(sum(s.get("sandboxes_gpu", 0) for s in per_session)),
         "hf_jobs_blocked": int(sum(s.get("hf_jobs_blocked", 0) for s in per_session)),
         "pro_cta_clicks": int(sum(s.get("pro_cta_clicks", 0) for s in per_session)),
+        "pro_conversions": int(sum(s.get("pro_conversions", 0) for s in per_session)),
+        "credits_topped_up": int(sum(s.get("credits_topped_up", 0) for s in per_session)),
         "gpu_hours_by_flavor_json": json.dumps(dict(gpu_hours), sort_keys=True),
         # Research KPIs — answer "is the agent reaching for research?".
         "research_calls": research_calls_total,

tests/unit/test_build_kpis.py CHANGED Viewed

@@ -104,6 +104,32 @@ def test_hf_job_blocked_and_pro_clicks_are_counted():
     }
 def test_feedback_counts():
     mod = _load()
     events = [

     }
+def test_pro_conversions_and_credits_topped_up_per_session():
+    mod = _load()
+    events = [
+        _ev("pro_conversion", {"first_seen_at": "2026-04-20T10:00:00"}),
+        _ev("credits_topped_up", {"namespace": "smolagents"}),
+        _ev("credits_topped_up", {"namespace": "smolagents"}),
+    ]
+    m = mod._session_metrics(_session(events))
+    assert m["pro_conversions"] == 1
+    assert m["credits_topped_up"] == 2
+def test_aggregate_sums_pro_conversions_and_credits_topped_up():
+    mod = _load()
+    s1 = mod._session_metrics(_session([
+        _ev("pro_conversion", {}),
+    ], user_id="u1"))
+    s2 = mod._session_metrics(_session([
+        _ev("credits_topped_up", {"namespace": "ns"}),
+    ], user_id="u2"))
+    s3 = mod._session_metrics(_session([], user_id="u3"))
+    row = mod._aggregate([s1, s2, s3])
+    assert row["pro_conversions"] == 1
+    assert row["credits_topped_up"] == 1
 def test_feedback_counts():
     mod = _load()
     events = [

tests/unit/test_session_persistence.py CHANGED Viewed

@@ -2,7 +2,11 @@
 import pytest
-from agent.core.session_persistence import NoopSessionStore, _safe_message_doc
 @pytest.mark.asyncio
@@ -29,3 +33,97 @@ def test_unsafe_message_payload_is_replaced_with_marker():
     assert marker["role"] == "tool"
     assert marker["ml_intern_persistence_error"] == "message_too_large_or_invalid"

 import pytest
+from agent.core.session_persistence import (
+    MongoSessionStore,
+    NoopSessionStore,
+    _safe_message_doc,
+)
 @pytest.mark.asyncio
     assert marker["role"] == "tool"
     assert marker["ml_intern_persistence_error"] == "message_too_large_or_invalid"
+# ── mark_pro_seen ─────────────────────────────────────────────────────────
+class _FakeProUsers:
+    """In-memory stand-in for the ``pro_users`` collection.
+    Supports just enough of the Motor API to exercise ``mark_pro_seen``:
+    ``update_one`` with ``$setOnInsert`` + ``$set`` + ``upsert=True``, and
+    ``find_one_and_update`` with the guarded filter the conversion check uses.
+    """
+    def __init__(self) -> None:
+        self.docs: dict[str, dict] = {}
+    async def update_one(self, filt, update, upsert=False):
+        _id = filt["_id"]
+        doc = self.docs.get(_id)
+        if doc is None and upsert:
+            doc = dict(update.get("$setOnInsert") or {})
+            self.docs[_id] = doc
+        if doc is None:
+            return
+        for k, v in (update.get("$set") or {}).items():
+            doc[k] = v
+    async def find_one_and_update(self, filt, update, return_document=None):
+        _id = filt["_id"]
+        doc = self.docs.get(_id)
+        if doc is None:
+            return None
+        # Guard checks the conversion test uses: ever_non_pro=True AND
+        # first_seen_pro_at missing.
+        for k, v in filt.items():
+            if k == "_id":
+                continue
+            if isinstance(v, dict) and "$exists" in v:
+                if v["$exists"] and k not in doc:
+                    return None
+                if not v["$exists"] and k in doc:
+                    return None
+            elif doc.get(k) != v:
+                return None
+        for k, v in (update.get("$set") or {}).items():
+            doc[k] = v
+        return dict(doc)
+class _FakeDB:
+    def __init__(self) -> None:
+        self.pro_users = _FakeProUsers()
+def _store_with_fake_db() -> MongoSessionStore:
+    s = MongoSessionStore.__new__(MongoSessionStore)
+    s.enabled = True
+    s.db = _FakeDB()
+    return s
+@pytest.mark.asyncio
+async def test_mark_pro_seen_returns_none_when_unknown_user_starts_pro():
+    """Joining as Pro shouldn't count as a conversion."""
+    store = _store_with_fake_db()
+    assert await store.mark_pro_seen("u-new-pro", is_pro=True) is None
+@pytest.mark.asyncio
+async def test_mark_pro_seen_emits_conversion_after_seeing_user_as_free():
+    store = _store_with_fake_db()
+    assert await store.mark_pro_seen("u1", is_pro=False) is None
+    result = await store.mark_pro_seen("u1", is_pro=True)
+    assert result is not None
+    assert result["converted"] is True
+    assert isinstance(result["first_seen_at"], str)
+@pytest.mark.asyncio
+async def test_mark_pro_seen_only_fires_conversion_once():
+    """Re-checking a converted user must not re-emit the event."""
+    store = _store_with_fake_db()
+    await store.mark_pro_seen("u1", is_pro=False)
+    first = await store.mark_pro_seen("u1", is_pro=True)
+    assert first is not None and first["converted"] is True
+    second = await store.mark_pro_seen("u1", is_pro=True)
+    assert second is None
+@pytest.mark.asyncio
+async def test_noop_store_mark_pro_seen_returns_none():
+    store = NoopSessionStore()
+    assert await store.mark_pro_seen("u1", is_pro=True) is None
+    assert await store.mark_pro_seen("u1", is_pro=False) is None