Gintarė Zokaitytė commited on
Commit
974c830
·
1 Parent(s): b0d64b7

update app

Browse files
Files changed (1) hide show
  1. app.py +78 -106
app.py CHANGED
@@ -1,5 +1,3 @@
1
- """Annotation Progress Dashboard - Simple & Elegant"""
2
-
3
  import re
4
  import os
5
  import pickle
@@ -10,10 +8,6 @@ import pandas as pd
10
  import plotly.graph_objects as go
11
  import requests
12
 
13
- # =============================================================================
14
- # Configuration
15
- # =============================================================================
16
-
17
  GOAL_WORDS = 2_200_000
18
  CATEGORY_GOAL = 1_100_000
19
 
@@ -33,17 +27,9 @@ TEAM_COLORS = {
33
  # Cache file location (persists between runs)
34
  CACHE_FILE = Path(".cache.pkl")
35
 
36
- # =============================================================================
37
- # Setup
38
- # =============================================================================
39
-
40
  st.set_page_config(page_title="Annotation Progress", page_icon="📊", layout="wide")
41
 
42
 
43
- # =============================================================================
44
- # Data Loading
45
- # =============================================================================
46
-
47
  def fetch_project_data(proj, url, headers):
48
  """Fetch data from one project (for parallel execution)."""
49
  pid, name, task_count = proj["id"], proj.get("title", f"Project {proj['id']}"), proj.get("task_number", 0)
@@ -52,12 +38,7 @@ def fetch_project_data(proj, url, headers):
52
  rows = []
53
  page = 1
54
  while True:
55
- resp = requests.get(
56
- f"{url}/api/projects/{pid}/tasks",
57
- headers=headers,
58
- params={"page": page, "page_size": 100},
59
- timeout=30
60
- )
61
  resp.raise_for_status()
62
  data = resp.json()
63
  tasks = data if isinstance(data, list) else data.get("tasks", [])
@@ -72,11 +53,17 @@ def fetch_project_data(proj, url, headers):
72
 
73
  annots = [a for a in task.get("annotations", []) if not a.get("was_cancelled")]
74
  if not annots:
75
- rows.append({
76
- "project_id": pid, "project": name, "project_group": group,
77
- "date": None, "state": "Not Annotated",
78
- "words": int(words), "category": category
79
- })
 
 
 
 
 
 
80
  continue
81
 
82
  ann = annots[0]
@@ -98,11 +85,9 @@ def fetch_project_data(proj, url, headers):
98
  else:
99
  state = "Acceptable"
100
 
101
- rows.append({
102
- "project_id": pid, "project": name, "project_group": group,
103
- "date": date, "state": state,
104
- "words": int(words), "category": category
105
- })
106
 
107
  if isinstance(data, list) and len(data) < 100:
108
  break
@@ -188,10 +173,6 @@ def load_data():
188
  return df
189
 
190
 
191
- # =============================================================================
192
- # Helper Functions
193
- # =============================================================================
194
-
195
  def anonymize(name):
196
  """Convert '26 [Name Lastname]' to 'N.L. (26)'"""
197
  if name == "Others":
@@ -205,10 +186,6 @@ def anonymize(name):
205
  return name
206
 
207
 
208
- # =============================================================================
209
- # Main App
210
- # =============================================================================
211
-
212
  st.title("📊 Annotation Progress Dashboard")
213
  st.markdown("---")
214
 
@@ -223,7 +200,7 @@ progress = total / GOAL_WORDS * 100
223
 
224
  col1, col2 = st.columns(2)
225
  col1.metric("Progress toward 2.2M", f"{total:,}", f"{progress:.1f}%")
226
- col2.metric("Remaining", f"{remaining:,}", f"{100-progress:.1f}%")
227
 
228
  st.markdown("---")
229
 
@@ -239,15 +216,10 @@ with tab1:
239
  # Filter data - use GOAL_STATES to match progress metrics
240
  df_week = df[df["is_goal_state"] & df["date"].notna()].copy()
241
  df_week["week_start"] = df_week["date"] - pd.to_timedelta(df_week["date"].dt.dayofweek, unit="d")
242
- df_week["member"] = df_week.apply(
243
- lambda r: anonymize(r["project"]) if r["project_group"] == "Our Team" else "Others",
244
- axis=1
245
- )
246
 
247
  # Weekly pivot (all data)
248
- weekly_all = df_week.pivot_table(
249
- index="week_start", columns="member", values="words", aggfunc="sum", fill_value=0
250
- ).astype(int)
251
 
252
  # Split into before and after cutoff
253
  weekly_before = weekly_all[weekly_all.index < cutoff_date]
@@ -275,10 +247,7 @@ with tab1:
275
 
276
  # Format weekly data for display
277
  display = weekly_after.reset_index()
278
- display["Week"] = (
279
- display["week_start"].dt.strftime("%Y-%m-%d") + " - " +
280
- (display["week_start"] + pd.Timedelta(days=6)).dt.strftime("%Y-%m-%d")
281
- )
282
  display = display.drop("week_start", axis=1)
283
  display = display[["Week"] + list(totals.index) + ["Total"]]
284
 
@@ -331,18 +300,20 @@ with tab2:
331
  total_fixing = mok_fixing + zin_fixing
332
  total_all = total_ready + total_fixing
333
 
334
- cat_df = pd.DataFrame({
335
- "Category": ["mokslinis", "ziniasklaida", "TOTAL"],
336
- "Ready": [f"{mok_ready:,}", f"{zin_ready:,}", f"{total_ready:,}"],
337
- "Needs Fixing": [f"{mok_fixing:,}", f"{zin_fixing:,}", f"{total_fixing:,}"],
338
- "Total": [f"{mok_total:,}", f"{zin_total:,}", f"{total_all:,}"],
339
- "Goal": [f"{CATEGORY_GOAL:,}", f"{CATEGORY_GOAL:,}", f"{GOAL_WORDS:,}"],
340
- "Progress": [
341
- f"{mok_total/CATEGORY_GOAL*100:.1f}%",
342
- f"{zin_total/CATEGORY_GOAL*100:.1f}%",
343
- f"{total_all/GOAL_WORDS*100:.1f}%"
344
- ]
345
- })
 
 
346
  st.dataframe(cat_df, hide_index=True, use_container_width=True)
347
 
348
  st.markdown("---")
@@ -350,10 +321,7 @@ with tab2:
350
 
351
  # Cumulative data
352
  df_cum = df[df["is_goal_state"] & df["date"].notna()].copy()
353
- df_cum["member"] = df_cum.apply(
354
- lambda r: anonymize(r["project"]) if r["project_group"] == "Our Team" else "Others",
355
- axis=1
356
- )
357
 
358
  daily = df_cum.groupby(["date", "member"])["words"].sum().reset_index()
359
  pivot = daily.pivot_table(index="date", columns="member", values="words", fill_value=0)
@@ -380,61 +348,68 @@ with tab2:
380
  fig = go.Figure()
381
 
382
  # Goal lines
383
- fig.add_hline(y=1_100_000, line_dash="dot", line_color="orange",
384
- annotation_text="Midpoint: 1.1M", annotation_position="top left")
385
- fig.add_hline(y=GOAL_WORDS, line_dash="dot", line_color="red",
386
- annotation_text="Goal: 2.2M", annotation_position="top left")
387
 
388
  # Members
389
  members = [c for c in cumulative.columns if c not in ["Total", "Others"]]
390
  members = sorted(members, key=lambda x: cumulative[x].iloc[-1], reverse=True)
391
 
392
  if "Others" in cumulative.columns:
393
- fig.add_trace(go.Scatter(
394
- x=cumulative.index, y=cumulative["Others"],
395
- name=f"Others: {cumulative['Others'].iloc[-1]:,.0f}",
396
- mode="lines", line=dict(width=2, color="#7f8c8d")
397
- ))
 
 
 
 
398
 
399
  for m in members:
400
  color = TEAM_COLORS.get(m, "#34495e")
401
- fig.add_trace(go.Scatter(
402
- x=cumulative.index, y=cumulative[m],
403
- name=f"{m}: {cumulative[m].iloc[-1]:,.0f}",
404
- mode="lines", line=dict(width=2, color=color)
405
- ))
406
 
407
  # Total
408
- fig.add_trace(go.Scatter(
409
- x=cumulative.index, y=cumulative["Total"],
410
- name=f"Total: {cumulative['Total'].iloc[-1]:,.0f}",
411
- mode="lines", line=dict(width=3, color="#d4af37"),
412
- fill="tozeroy", fillcolor="rgba(212, 175, 55, 0.1)"
413
- ))
 
 
 
 
 
414
 
415
  # Projection
416
  if completion:
417
  proj_dates = pd.date_range(last_date, completion, freq="D")
418
  proj_vals = current + rate * (proj_dates - last_date).days
419
- fig.add_trace(go.Scatter(
420
- x=proj_dates, y=proj_vals,
421
- name=f"Projection ({int(weekly_rate):,}/wk)",
422
- mode="lines", line=dict(width=3, color="#d4af37", dash="dot")
423
- ))
424
- fig.add_trace(go.Scatter(
425
- x=[completion], y=[GOAL_WORDS],
426
- mode="markers+text", marker=dict(size=14, color="#d4af37", symbol="diamond"),
427
- text=[completion.strftime("%b %d")], textposition="top center",
428
- showlegend=False
429
- ))
 
 
 
 
 
430
  title = f"Cumulative Progress → Est. {completion.strftime('%B %d, %Y')}"
431
  else:
432
  title = "Cumulative Progress"
433
 
434
- fig.update_layout(
435
- title=title, xaxis_title="Date", yaxis_title="Cumulative Words",
436
- height=600, hovermode="x unified", template="plotly_white"
437
- )
438
  fig.update_yaxes(tickformat=".2s")
439
 
440
  st.plotly_chart(fig, use_container_width=True)
@@ -444,12 +419,9 @@ with tab2:
444
  st.markdown("### Pacing Estimates")
445
  c1, c2, c3 = st.columns(3)
446
  c1.metric("Per Week Rate", f"{int(weekly_rate):,} words")
447
- c2.metric("Weeks Remaining", f"{days_left/7:.1f} weeks")
448
  c3.metric("Est. Completion", completion.strftime("%Y-%m-%d"))
449
 
450
  # Footer
451
  st.markdown("---")
452
- st.caption(
453
- f"Updated: {pd.Timestamp.now(tz='Europe/Vilnius').strftime('%Y-%m-%d %H:%M:%S')} | "
454
- "Auto-refresh: 5 min | Press 'R' to refresh"
455
- )
 
 
 
1
  import re
2
  import os
3
  import pickle
 
8
  import plotly.graph_objects as go
9
  import requests
10
 
 
 
 
 
11
  GOAL_WORDS = 2_200_000
12
  CATEGORY_GOAL = 1_100_000
13
 
 
27
  # Cache file location (persists between runs)
28
  CACHE_FILE = Path(".cache.pkl")
29
 
 
 
 
 
30
  st.set_page_config(page_title="Annotation Progress", page_icon="📊", layout="wide")
31
 
32
 
 
 
 
 
33
  def fetch_project_data(proj, url, headers):
34
  """Fetch data from one project (for parallel execution)."""
35
  pid, name, task_count = proj["id"], proj.get("title", f"Project {proj['id']}"), proj.get("task_number", 0)
 
38
  rows = []
39
  page = 1
40
  while True:
41
+ resp = requests.get(f"{url}/api/projects/{pid}/tasks", headers=headers, params={"page": page, "page_size": 100}, timeout=30)
 
 
 
 
 
42
  resp.raise_for_status()
43
  data = resp.json()
44
  tasks = data if isinstance(data, list) else data.get("tasks", [])
 
53
 
54
  annots = [a for a in task.get("annotations", []) if not a.get("was_cancelled")]
55
  if not annots:
56
+ rows.append(
57
+ {
58
+ "project_id": pid,
59
+ "project": name,
60
+ "project_group": group,
61
+ "date": None,
62
+ "state": "Not Annotated",
63
+ "words": int(words),
64
+ "category": category,
65
+ }
66
+ )
67
  continue
68
 
69
  ann = annots[0]
 
85
  else:
86
  state = "Acceptable"
87
 
88
+ rows.append(
89
+ {"project_id": pid, "project": name, "project_group": group, "date": date, "state": state, "words": int(words), "category": category}
90
+ )
 
 
91
 
92
  if isinstance(data, list) and len(data) < 100:
93
  break
 
173
  return df
174
 
175
 
 
 
 
 
176
  def anonymize(name):
177
  """Convert '26 [Name Lastname]' to 'N.L. (26)'"""
178
  if name == "Others":
 
186
  return name
187
 
188
 
 
 
 
 
189
  st.title("📊 Annotation Progress Dashboard")
190
  st.markdown("---")
191
 
 
200
 
201
  col1, col2 = st.columns(2)
202
  col1.metric("Progress toward 2.2M", f"{total:,}", f"{progress:.1f}%")
203
+ col2.metric("Remaining", f"{remaining:,}", f"{100 - progress:.1f}%")
204
 
205
  st.markdown("---")
206
 
 
216
  # Filter data - use GOAL_STATES to match progress metrics
217
  df_week = df[df["is_goal_state"] & df["date"].notna()].copy()
218
  df_week["week_start"] = df_week["date"] - pd.to_timedelta(df_week["date"].dt.dayofweek, unit="d")
219
+ df_week["member"] = df_week.apply(lambda r: anonymize(r["project"]) if r["project_group"] == "Our Team" else "Others", axis=1)
 
 
 
220
 
221
  # Weekly pivot (all data)
222
+ weekly_all = df_week.pivot_table(index="week_start", columns="member", values="words", aggfunc="sum", fill_value=0).astype(int)
 
 
223
 
224
  # Split into before and after cutoff
225
  weekly_before = weekly_all[weekly_all.index < cutoff_date]
 
247
 
248
  # Format weekly data for display
249
  display = weekly_after.reset_index()
250
+ display["Week"] = display["week_start"].dt.strftime("%Y-%m-%d") + " - " + (display["week_start"] + pd.Timedelta(days=6)).dt.strftime("%Y-%m-%d")
 
 
 
251
  display = display.drop("week_start", axis=1)
252
  display = display[["Week"] + list(totals.index) + ["Total"]]
253
 
 
300
  total_fixing = mok_fixing + zin_fixing
301
  total_all = total_ready + total_fixing
302
 
303
+ cat_df = pd.DataFrame(
304
+ {
305
+ "Category": ["mokslinis", "ziniasklaida", "TOTAL"],
306
+ "Ready": [f"{mok_ready:,}", f"{zin_ready:,}", f"{total_ready:,}"],
307
+ "Needs Fixing": [f"{mok_fixing:,}", f"{zin_fixing:,}", f"{total_fixing:,}"],
308
+ "Total": [f"{mok_total:,}", f"{zin_total:,}", f"{total_all:,}"],
309
+ "Goal": [f"{CATEGORY_GOAL:,}", f"{CATEGORY_GOAL:,}", f"{GOAL_WORDS:,}"],
310
+ "Progress": [
311
+ f"{mok_total / CATEGORY_GOAL * 100:.1f}%",
312
+ f"{zin_total / CATEGORY_GOAL * 100:.1f}%",
313
+ f"{total_all / GOAL_WORDS * 100:.1f}%",
314
+ ],
315
+ }
316
+ )
317
  st.dataframe(cat_df, hide_index=True, use_container_width=True)
318
 
319
  st.markdown("---")
 
321
 
322
  # Cumulative data
323
  df_cum = df[df["is_goal_state"] & df["date"].notna()].copy()
324
+ df_cum["member"] = df_cum.apply(lambda r: anonymize(r["project"]) if r["project_group"] == "Our Team" else "Others", axis=1)
 
 
 
325
 
326
  daily = df_cum.groupby(["date", "member"])["words"].sum().reset_index()
327
  pivot = daily.pivot_table(index="date", columns="member", values="words", fill_value=0)
 
348
  fig = go.Figure()
349
 
350
  # Goal lines
351
+ fig.add_hline(y=1_100_000, line_dash="dot", line_color="orange", annotation_text="Midpoint: 1.1M", annotation_position="top left")
352
+ fig.add_hline(y=GOAL_WORDS, line_dash="dot", line_color="red", annotation_text="Goal: 2.2M", annotation_position="top left")
 
 
353
 
354
  # Members
355
  members = [c for c in cumulative.columns if c not in ["Total", "Others"]]
356
  members = sorted(members, key=lambda x: cumulative[x].iloc[-1], reverse=True)
357
 
358
  if "Others" in cumulative.columns:
359
+ fig.add_trace(
360
+ go.Scatter(
361
+ x=cumulative.index,
362
+ y=cumulative["Others"],
363
+ name=f"Others: {cumulative['Others'].iloc[-1]:,.0f}",
364
+ mode="lines",
365
+ line=dict(width=2, color="#7f8c8d"),
366
+ )
367
+ )
368
 
369
  for m in members:
370
  color = TEAM_COLORS.get(m, "#34495e")
371
+ fig.add_trace(
372
+ go.Scatter(x=cumulative.index, y=cumulative[m], name=f"{m}: {cumulative[m].iloc[-1]:,.0f}", mode="lines", line=dict(width=2, color=color))
373
+ )
 
 
374
 
375
  # Total
376
+ fig.add_trace(
377
+ go.Scatter(
378
+ x=cumulative.index,
379
+ y=cumulative["Total"],
380
+ name=f"Total: {cumulative['Total'].iloc[-1]:,.0f}",
381
+ mode="lines",
382
+ line=dict(width=3, color="#d4af37"),
383
+ fill="tozeroy",
384
+ fillcolor="rgba(212, 175, 55, 0.1)",
385
+ )
386
+ )
387
 
388
  # Projection
389
  if completion:
390
  proj_dates = pd.date_range(last_date, completion, freq="D")
391
  proj_vals = current + rate * (proj_dates - last_date).days
392
+ fig.add_trace(
393
+ go.Scatter(
394
+ x=proj_dates, y=proj_vals, name=f"Projection ({int(weekly_rate):,}/wk)", mode="lines", line=dict(width=3, color="#d4af37", dash="dot")
395
+ )
396
+ )
397
+ fig.add_trace(
398
+ go.Scatter(
399
+ x=[completion],
400
+ y=[GOAL_WORDS],
401
+ mode="markers+text",
402
+ marker=dict(size=14, color="#d4af37", symbol="diamond"),
403
+ text=[completion.strftime("%b %d")],
404
+ textposition="top center",
405
+ showlegend=False,
406
+ )
407
+ )
408
  title = f"Cumulative Progress → Est. {completion.strftime('%B %d, %Y')}"
409
  else:
410
  title = "Cumulative Progress"
411
 
412
+ fig.update_layout(title=title, xaxis_title="Date", yaxis_title="Cumulative Words", height=600, hovermode="x unified", template="plotly_white")
 
 
 
413
  fig.update_yaxes(tickformat=".2s")
414
 
415
  st.plotly_chart(fig, use_container_width=True)
 
419
  st.markdown("### Pacing Estimates")
420
  c1, c2, c3 = st.columns(3)
421
  c1.metric("Per Week Rate", f"{int(weekly_rate):,} words")
422
+ c2.metric("Weeks Remaining", f"{days_left / 7:.1f} weeks")
423
  c3.metric("Est. Completion", completion.strftime("%Y-%m-%d"))
424
 
425
  # Footer
426
  st.markdown("---")
427
+ st.caption(f"Updated: {pd.Timestamp.now(tz='Europe/Vilnius').strftime('%Y-%m-%d %H:%M:%S')} | Auto-refresh: 5 min | Press 'R' to refresh")