Gintarė Zokaitytė commited on
Commit
c4ef01c
·
1 Parent(s): 1a492df

Cache validation logic fix

Browse files
Files changed (1) hide show
  1. app.py +38 -3
app.py CHANGED
@@ -103,8 +103,12 @@ def fetch_project_data(proj, url, headers):
103
 
104
 
105
  @st.cache_data(ttl=300)
106
- def load_data():
107
- """Load annotation data from Label Studio with disk cache."""
 
 
 
 
108
  try:
109
  url = st.secrets.get("LABEL_STUDIO_URL", os.getenv("LABEL_STUDIO_URL", "")).rstrip("/")
110
  key = st.secrets.get("LABEL_STUDIO_API_KEY", os.getenv("LABEL_STUDIO_API_KEY", ""))
@@ -191,6 +195,36 @@ def load_data():
191
  return df
192
 
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  def anonymize(name):
195
  """Convert '26 [Name Lastname]' to 'N.L. (26)'"""
196
  if name == "Others":
@@ -209,7 +243,8 @@ st.markdown("---")
209
 
210
  # Load data
211
  with st.spinner("Loading..."):
212
- df = load_data()
 
213
 
214
  # Overview metrics
215
  total = df[df["is_goal_state"]]["words"].sum()
 
103
 
104
 
105
  @st.cache_data(ttl=300)
106
+ def load_data(projects_hash):
107
+ """Load annotation data from Label Studio with disk cache.
108
+
109
+ Args:
110
+ projects_hash: Hash of project states to invalidate Streamlit cache when projects change
111
+ """
112
  try:
113
  url = st.secrets.get("LABEL_STUDIO_URL", os.getenv("LABEL_STUDIO_URL", "")).rstrip("/")
114
  key = st.secrets.get("LABEL_STUDIO_API_KEY", os.getenv("LABEL_STUDIO_API_KEY", ""))
 
195
  return df
196
 
197
 
198
+ def get_projects_hash():
199
+ """Fetch projects and return a hash of their states for cache invalidation."""
200
+ import hashlib
201
+
202
+ try:
203
+ url = st.secrets.get("LABEL_STUDIO_URL", os.getenv("LABEL_STUDIO_URL", "")).rstrip("/")
204
+ key = st.secrets.get("LABEL_STUDIO_API_KEY", os.getenv("LABEL_STUDIO_API_KEY", ""))
205
+ except (KeyError, FileNotFoundError, AttributeError):
206
+ url = os.getenv("LABEL_STUDIO_URL", "").rstrip("/")
207
+ key = os.getenv("LABEL_STUDIO_API_KEY", "")
208
+
209
+ if not url or not key:
210
+ return "no-credentials"
211
+
212
+ headers = {"Authorization": f"Token {key}"}
213
+ resp = requests.get(f"{url}/api/projects", headers=headers, timeout=30)
214
+ resp.raise_for_status()
215
+ projects = resp.json().get("results", [])
216
+
217
+ # Create hash from project states (id, task_number, num_tasks_with_annotations)
218
+ state_string = ""
219
+ for proj in projects:
220
+ pid = proj["id"]
221
+ task_count = proj.get("task_number", 0)
222
+ submitted_count = proj.get("num_tasks_with_annotations", 0)
223
+ state_string += f"{pid}:{task_count}:{submitted_count};"
224
+
225
+ return hashlib.md5(state_string.encode()).hexdigest()
226
+
227
+
228
  def anonymize(name):
229
  """Convert '26 [Name Lastname]' to 'N.L. (26)'"""
230
  if name == "Others":
 
243
 
244
  # Load data
245
  with st.spinner("Loading..."):
246
+ projects_hash = get_projects_hash()
247
+ df = load_data(projects_hash)
248
 
249
  # Overview metrics
250
  total = df[df["is_goal_state"]]["words"].sum()