Rafael Poyiadzi Claude Opus 4.6 commited on
Commit
2f173ce
·
1 Parent(s): bd620e6

Improve PostHog tracking coverage and reliability

Browse files

- Track validation errors (empty key, no file, bad schema) as
"validation_error" events
- Add duration_s to agent_map_completed and agent_map_failed events
- Enrich agent_map_started with field type breakdown (field_types,
has_category)
- Register atexit handler to flush remaining PostHog events on shutdown
- Move css/head params from gr.Blocks() to demo.launch() for Gradio 6
compatibility
- Note frontend/backend identity mismatch as a known limitation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +53 -5
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import ast
 
2
  import hashlib
3
  import json
4
  import os
5
  import tempfile
 
6
  import uuid
7
  from typing import Literal
8
 
@@ -18,7 +20,12 @@ _POSTHOG_ENABLED = bool(_POSTHOG_KEY and _POSTHOG_HOST)
18
  posthog.api_key = _POSTHOG_KEY
19
  posthog.project_api_key = _POSTHOG_KEY
20
  posthog.host = _POSTHOG_HOST
 
 
21
 
 
 
 
22
  POSTHOG_HEAD = f"""
23
  <script>
24
  !function(t,e){{var o,n,p,r;e.__SV||(window.posthog=e,e._i=[],e.init=function(i,s,a){{function g(t,e){{var o=e.split(".");2==o.length&&(t=t[o[0]],e=o[1]),t[e]=function(){{t.push([e].concat(Array.prototype.slice.call(arguments,0)))}}}}(p=t.createElement("script")).type="text/javascript",p.async=!0,p.src=s.api_host+"/static/array.js",(r=t.getElementsByTagName("script")[0]).parentNode.insertBefore(p,r);var u=e;for(void 0!==a?u=e[a]=[]:a="posthog",u.people=u.people||[],u.toString=function(t){{var e="posthog";return"posthog"!==a&&(e+="."+a),t||(e+=" (stub)"),e}},u.people.toString=function(){{return u.toString(1)+".people (stub)"}},o="init capture register register_once unregister opt_in_capturing opt_out_capturing has_opted_in_capturing has_opted_out_capturing identify alias people.set people.set_once set_config reset opt_in_capturing".split(" "),n=0;n<o.length;n++)g(u,o[n]);e._i.push([i,s,a])}},e.__SV=1)}}(document,window.posthog||[]);
@@ -130,18 +137,50 @@ def _posthog_distinct_id(api_key: str) -> str:
130
  return hashlib.sha256(api_key.encode()).hexdigest()[:16]
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  async def run_agent_map(api_key, file, query, effort_label, fields_list, session_id):
134
  if not api_key:
135
  raise gr.Error("Please enter your everyrow API key.")
136
  if file is None:
 
137
  raise gr.Error("Please upload a CSV file.")
138
  if not query.strip():
 
139
  raise gr.Error("Please enter a research query.")
140
 
141
  os.environ["EVERYROW_API_KEY"] = api_key
142
 
143
  df = pd.read_csv(file)
144
  if df.empty:
 
145
  raise gr.Error("The uploaded CSV is empty.")
146
 
147
  effort_level = EFFORT_LEVELS[effort_label]
@@ -150,6 +189,7 @@ async def run_agent_map(api_key, file, query, effort_label, fields_list, session
150
  try:
151
  response_model = build_response_model(fields_list)
152
  except ValueError as e:
 
153
  raise gr.Error(str(e))
154
 
155
  kwargs = dict(task=query, input=df, effort_level=effort_level)
@@ -165,19 +205,26 @@ async def run_agent_map(api_key, file, query, effort_label, fields_list, session
165
  "effort_level": effort_label,
166
  "row_count": len(df),
167
  "column_count": len(df.columns),
168
- "field_count": len(fields_list) if fields_list else 0,
169
  "app": "everyrow-research-space",
170
  },
171
  )
172
 
 
173
  result = await agent_map(**kwargs)
 
174
 
175
  if result.error:
176
  if _POSTHOG_ENABLED:
177
  posthog.capture(
178
  distinct_id=distinct_id,
179
  event="agent_map_failed",
180
- properties={"$session_id": session_id, "error": str(result.error), "app": "everyrow-research-space"},
 
 
 
 
 
181
  )
182
  raise gr.Error(f"agent_map failed: {result.error}")
183
 
@@ -189,6 +236,7 @@ async def run_agent_map(api_key, file, query, effort_label, fields_list, session
189
  "$session_id": session_id,
190
  "output_rows": len(result.data),
191
  "output_columns": len(result.data.columns),
 
192
  "app": "everyrow-research-space",
193
  },
194
  )
@@ -246,10 +294,10 @@ def expand_research(df: pd.DataFrame) -> pd.DataFrame:
246
  return result
247
 
248
 
 
 
249
  with gr.Blocks(
250
  title="everyrow annotate – AI Data Annotation & Web Research",
251
- head=POSTHOG_HEAD,
252
- css=".error-box { background: #fee; border: 1px solid #c00; border-radius: 8px; padding: 12px; color: #900; }",
253
  ) as demo:
254
  gr.Markdown(
255
  """
@@ -530,4 +578,4 @@ with gr.Blocks(
530
  )
531
 
532
  if __name__ == "__main__":
533
- demo.launch()
 
1
  import ast
2
+ import atexit
3
  import hashlib
4
  import json
5
  import os
6
  import tempfile
7
+ import time
8
  import uuid
9
  from typing import Literal
10
 
 
20
  posthog.api_key = _POSTHOG_KEY
21
  posthog.project_api_key = _POSTHOG_KEY
22
  posthog.host = _POSTHOG_HOST
23
+ if _POSTHOG_ENABLED:
24
+ atexit.register(posthog.shutdown)
25
 
26
+ # NOTE: Frontend JS uses an anonymous distinct_id while server-side uses a hashed
27
+ # API key. PostHog won't auto-link these identities. To link them, the frontend
28
+ # would need to call posthog.identify() with the hashed key after the user submits.
29
  POSTHOG_HEAD = f"""
30
  <script>
31
  !function(t,e){{var o,n,p,r;e.__SV||(window.posthog=e,e._i=[],e.init=function(i,s,a){{function g(t,e){{var o=e.split(".");2==o.length&&(t=t[o[0]],e=o[1]),t[e]=function(){{t.push([e].concat(Array.prototype.slice.call(arguments,0)))}}}}(p=t.createElement("script")).type="text/javascript",p.async=!0,p.src=s.api_host+"/static/array.js",(r=t.getElementsByTagName("script")[0]).parentNode.insertBefore(p,r);var u=e;for(void 0!==a?u=e[a]=[]:a="posthog",u.people=u.people||[],u.toString=function(t){{var e="posthog";return"posthog"!==a&&(e+="."+a),t||(e+=" (stub)"),e}},u.people.toString=function(){{return u.toString(1)+".people (stub)"}},o="init capture register register_once unregister opt_in_capturing opt_out_capturing has_opted_in_capturing has_opted_out_capturing identify alias people.set people.set_once set_config reset opt_in_capturing".split(" "),n=0;n<o.length;n++)g(u,o[n]);e._i.push([i,s,a])}},e.__SV=1)}}(document,window.posthog||[]);
 
137
  return hashlib.sha256(api_key.encode()).hexdigest()[:16]
138
 
139
 
140
+ def _track_validation_error(api_key, session_id, error_msg):
141
+ """Track validation errors in PostHog when we have an API key."""
142
+ if _POSTHOG_ENABLED and api_key:
143
+ posthog.capture(
144
+ distinct_id=_posthog_distinct_id(api_key),
145
+ event="validation_error",
146
+ properties={
147
+ "$session_id": session_id,
148
+ "error": error_msg,
149
+ "app": "everyrow-research-space",
150
+ },
151
+ )
152
+
153
+
154
+ def _schema_properties(fields_list):
155
+ """Summarize the output schema for PostHog events."""
156
+ if not fields_list:
157
+ return {"field_count": 0}
158
+ type_counts = {}
159
+ for f in fields_list:
160
+ t = f.get("type", "str")
161
+ type_counts[t] = type_counts.get(t, 0) + 1
162
+ return {
163
+ "field_count": len(fields_list),
164
+ "field_types": type_counts,
165
+ "has_category": "category" in type_counts,
166
+ }
167
+
168
+
169
  async def run_agent_map(api_key, file, query, effort_label, fields_list, session_id):
170
  if not api_key:
171
  raise gr.Error("Please enter your everyrow API key.")
172
  if file is None:
173
+ _track_validation_error(api_key, session_id, "No file uploaded")
174
  raise gr.Error("Please upload a CSV file.")
175
  if not query.strip():
176
+ _track_validation_error(api_key, session_id, "Empty query")
177
  raise gr.Error("Please enter a research query.")
178
 
179
  os.environ["EVERYROW_API_KEY"] = api_key
180
 
181
  df = pd.read_csv(file)
182
  if df.empty:
183
+ _track_validation_error(api_key, session_id, "Empty CSV")
184
  raise gr.Error("The uploaded CSV is empty.")
185
 
186
  effort_level = EFFORT_LEVELS[effort_label]
 
189
  try:
190
  response_model = build_response_model(fields_list)
191
  except ValueError as e:
192
+ _track_validation_error(api_key, session_id, str(e))
193
  raise gr.Error(str(e))
194
 
195
  kwargs = dict(task=query, input=df, effort_level=effort_level)
 
205
  "effort_level": effort_label,
206
  "row_count": len(df),
207
  "column_count": len(df.columns),
208
+ **_schema_properties(fields_list),
209
  "app": "everyrow-research-space",
210
  },
211
  )
212
 
213
+ t0 = time.time()
214
  result = await agent_map(**kwargs)
215
+ duration_s = round(time.time() - t0, 2)
216
 
217
  if result.error:
218
  if _POSTHOG_ENABLED:
219
  posthog.capture(
220
  distinct_id=distinct_id,
221
  event="agent_map_failed",
222
+ properties={
223
+ "$session_id": session_id,
224
+ "error": str(result.error),
225
+ "duration_s": duration_s,
226
+ "app": "everyrow-research-space",
227
+ },
228
  )
229
  raise gr.Error(f"agent_map failed: {result.error}")
230
 
 
236
  "$session_id": session_id,
237
  "output_rows": len(result.data),
238
  "output_columns": len(result.data.columns),
239
+ "duration_s": duration_s,
240
  "app": "everyrow-research-space",
241
  },
242
  )
 
294
  return result
295
 
296
 
297
+ _CSS = ".error-box { background: #fee; border: 1px solid #c00; border-radius: 8px; padding: 12px; color: #900; }"
298
+
299
  with gr.Blocks(
300
  title="everyrow annotate – AI Data Annotation & Web Research",
 
 
301
  ) as demo:
302
  gr.Markdown(
303
  """
 
578
  )
579
 
580
  if __name__ == "__main__":
581
+ demo.launch(css=_CSS, head=POSTHOG_HEAD)