Gintarė Zokaitytė commited on
Commit ·
c4ef01c
1
Parent(s): 1a492df
Cache validation logic fix
Browse files
app.py
CHANGED
|
@@ -103,8 +103,12 @@ def fetch_project_data(proj, url, headers):
|
|
| 103 |
|
| 104 |
|
| 105 |
@st.cache_data(ttl=300)
|
| 106 |
-
def load_data():
|
| 107 |
-
"""Load annotation data from Label Studio with disk cache.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
try:
|
| 109 |
url = st.secrets.get("LABEL_STUDIO_URL", os.getenv("LABEL_STUDIO_URL", "")).rstrip("/")
|
| 110 |
key = st.secrets.get("LABEL_STUDIO_API_KEY", os.getenv("LABEL_STUDIO_API_KEY", ""))
|
|
@@ -191,6 +195,36 @@ def load_data():
|
|
| 191 |
return df
|
| 192 |
|
| 193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
def anonymize(name):
|
| 195 |
"""Convert '26 [Name Lastname]' to 'N.L. (26)'"""
|
| 196 |
if name == "Others":
|
|
@@ -209,7 +243,8 @@ st.markdown("---")
|
|
| 209 |
|
| 210 |
# Load data
|
| 211 |
with st.spinner("Loading..."):
|
| 212 |
-
|
|
|
|
| 213 |
|
| 214 |
# Overview metrics
|
| 215 |
total = df[df["is_goal_state"]]["words"].sum()
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
@st.cache_data(ttl=300)
|
| 106 |
+
def load_data(projects_hash):
|
| 107 |
+
"""Load annotation data from Label Studio with disk cache.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
projects_hash: Hash of project states to invalidate Streamlit cache when projects change
|
| 111 |
+
"""
|
| 112 |
try:
|
| 113 |
url = st.secrets.get("LABEL_STUDIO_URL", os.getenv("LABEL_STUDIO_URL", "")).rstrip("/")
|
| 114 |
key = st.secrets.get("LABEL_STUDIO_API_KEY", os.getenv("LABEL_STUDIO_API_KEY", ""))
|
|
|
|
| 195 |
return df
|
| 196 |
|
| 197 |
|
| 198 |
+
def get_projects_hash():
|
| 199 |
+
"""Fetch projects and return a hash of their states for cache invalidation."""
|
| 200 |
+
import hashlib
|
| 201 |
+
|
| 202 |
+
try:
|
| 203 |
+
url = st.secrets.get("LABEL_STUDIO_URL", os.getenv("LABEL_STUDIO_URL", "")).rstrip("/")
|
| 204 |
+
key = st.secrets.get("LABEL_STUDIO_API_KEY", os.getenv("LABEL_STUDIO_API_KEY", ""))
|
| 205 |
+
except (KeyError, FileNotFoundError, AttributeError):
|
| 206 |
+
url = os.getenv("LABEL_STUDIO_URL", "").rstrip("/")
|
| 207 |
+
key = os.getenv("LABEL_STUDIO_API_KEY", "")
|
| 208 |
+
|
| 209 |
+
if not url or not key:
|
| 210 |
+
return "no-credentials"
|
| 211 |
+
|
| 212 |
+
headers = {"Authorization": f"Token {key}"}
|
| 213 |
+
resp = requests.get(f"{url}/api/projects", headers=headers, timeout=30)
|
| 214 |
+
resp.raise_for_status()
|
| 215 |
+
projects = resp.json().get("results", [])
|
| 216 |
+
|
| 217 |
+
# Create hash from project states (id, task_number, num_tasks_with_annotations)
|
| 218 |
+
state_string = ""
|
| 219 |
+
for proj in projects:
|
| 220 |
+
pid = proj["id"]
|
| 221 |
+
task_count = proj.get("task_number", 0)
|
| 222 |
+
submitted_count = proj.get("num_tasks_with_annotations", 0)
|
| 223 |
+
state_string += f"{pid}:{task_count}:{submitted_count};"
|
| 224 |
+
|
| 225 |
+
return hashlib.md5(state_string.encode()).hexdigest()
|
| 226 |
+
|
| 227 |
+
|
| 228 |
def anonymize(name):
|
| 229 |
"""Convert '26 [Name Lastname]' to 'N.L. (26)'"""
|
| 230 |
if name == "Others":
|
|
|
|
| 243 |
|
| 244 |
# Load data
|
| 245 |
with st.spinner("Loading..."):
|
| 246 |
+
projects_hash = get_projects_hash()
|
| 247 |
+
df = load_data(projects_hash)
|
| 248 |
|
| 249 |
# Overview metrics
|
| 250 |
total = df[df["is_goal_state"]]["words"].sum()
|