MichaelDeutges commited on
Commit
80f2682
·
verified ·
1 Parent(s): 9386c60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -48
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import glob
3
  from pathlib import Path
@@ -9,57 +10,54 @@ from PIL import Image
9
  from filelock import FileLock
10
  from huggingface_hub import HfApi
11
 
12
- # ---------------- Config ----------------
13
- IMAGE_DIR = os.getenv("IMAGE_DIR", "images")
 
 
 
14
  SUPPORTED_EXTS = (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tif", ".tiff", ".webp")
15
 
16
- # pick a writable place for labels.csv
17
- DATA_DIR = "/data"
18
- DEFAULT_LABELS = (
19
- f"{DATA_DIR}/labels.csv"
20
- if os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.W_OK)
21
- else "labels.csv"
22
- )
23
- LABELS_CSV = os.getenv("LABELS_CSV", DEFAULT_LABELS)
24
-
25
- # Label names
26
  LABEL_NONBLAST = os.getenv("LABEL_NONBLAST", "NON-BLAST")
27
  LABEL_BLAST = os.getenv("LABEL_BLAST", "BLAST")
28
  LABEL_UNCERTAIN = os.getenv("LABEL_UNCERTAIN", "UNCERTAIN")
29
  LABEL_TRASH = os.getenv("LABEL_TRASH", "LOW_QUALITY")
30
 
31
- # (Optional) Hugging Face sync
32
- HF_TOKEN = os.getenv("HF_TOKEN") # set as a Secret in Space settings
33
- SPACE_ID = os.getenv("SPACE_ID", "MichaelDeutges/LabelingTest") # set to your Space id
 
 
34
  api = HfApi()
35
 
36
- st.set_page_config(page_title="Two-Button Image Labeler", layout="centered")
37
 
38
- # --------------- Helpers ----------------
39
- def list_images():
40
- paths = []
 
 
41
  for p in glob.glob(os.path.join(IMAGE_DIR, "**", "*"), recursive=True):
42
  if p.lower().endswith(SUPPORTED_EXTS):
43
  paths.append(p)
44
  return sorted(paths)
45
 
46
- def read_labels():
47
  if os.path.exists(LABELS_CSV):
48
  try:
49
  return pd.read_csv(LABELS_CSV)
50
  except Exception:
51
- return pd.DataFrame(columns=["image", "label", "annotator", "timestamp"])
52
  return pd.DataFrame(columns=["image", "label", "annotator", "timestamp"])
53
 
54
- def rel_to_image_dir(p: str):
55
  try:
56
  return str(Path(p).resolve().relative_to(Path(IMAGE_DIR).resolve()))
57
  except Exception:
58
  return p
59
 
60
- def write_label(image_path: str, label: str, annotator: str):
61
- """Append one row to labels.csv and (optionally) push to the Space repo."""
62
- # only create a directory if LABELS_CSV actually has one, and it's not already there
63
  dirpath = os.path.dirname(LABELS_CSV)
64
  if dirpath and not os.path.isdir(dirpath):
65
  os.makedirs(dirpath, exist_ok=True)
@@ -68,35 +66,65 @@ def write_label(image_path: str, label: str, annotator: str):
68
  "image": rel_to_image_dir(image_path),
69
  "label": label,
70
  "annotator": annotator,
71
- "timestamp": datetime.now(timezone.utc).isoformat()
72
  }
73
  with FileLock(LABELS_CSV + ".lock", timeout=10):
74
  exists = os.path.exists(LABELS_CSV)
75
- pd.DataFrame([record]).to_csv(LABELS_CSV, mode="a", header=not exists, index=False)
 
 
76
 
77
- # best-effort sync to Hugging Face repo (safe to skip if no token/space id)
78
- if HF_TOKEN and SPACE_ID:
79
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  api.upload_file(
81
  path_or_fileobj=LABELS_CSV,
82
  path_in_repo="labels.csv",
83
  repo_id=SPACE_ID,
84
  repo_type="space",
85
- token=HF_TOKEN
86
  )
87
- except Exception as e:
88
- print("Upload failed:", e)
 
 
 
 
 
89
 
90
- # --------------- UI ---------------------
 
 
91
  st.title("🏷️ Blast Cell Labeling App")
92
  st.write(
93
  "Enter your name, click **Start**, then classify each image. "
94
- "Use **UNCERTAIN** if you’re not sure. "
95
- "Use the 🗑️ icon for **low quality** images."
96
  )
97
 
98
  with st.sidebar:
99
- # remember last annotator
100
  default_name = st.session_state.get("annotator", "")
101
  annotator = st.text_input("Your name*", value=default_name, placeholder="e.g., Dr. Smith")
102
 
@@ -112,18 +140,28 @@ with st.sidebar:
112
  help="Continue where you left off, based on your name in labels.csv",
113
  )
114
 
115
- # session state
 
 
 
 
116
  st.session_state.setdefault("order", [])
117
  st.session_state.setdefault("idx", 0)
118
  st.session_state.setdefault("total", 0)
119
  st.session_state.setdefault("started", False)
 
 
 
 
 
 
 
 
120
 
121
- # reset
122
  if reset_btn:
123
  st.session_state.update({"started": False, "order": [], "idx": 0, "total": 0})
124
  st.rerun()
125
 
126
- # start
127
  if start_btn:
128
  if not annotator.strip():
129
  st.sidebar.error("Please enter your name.")
@@ -132,7 +170,7 @@ if start_btn:
132
  imgs = list_images()
133
  labels_df = read_labels()
134
 
135
- if continue_by_name and len(labels_df) > 0:
136
  already = set(
137
  labels_df.query("annotator == @annotator")["image"].astype(str).tolist()
138
  )
@@ -140,23 +178,28 @@ if start_btn:
140
  imgs = [p for p, r in zip(imgs, rel_imgs) if r not in already]
141
 
142
  if not imgs:
143
- st.warning("No images found (or all labeled). Upload to the `images/` folder.")
144
  else:
145
- st.session_state.update({"order": imgs, "idx": 0, "total": len(imgs), "started": True})
 
 
146
 
147
- # main panel
 
 
148
  if not st.session_state.started:
149
  st.info("Fill your name on the left and press **Start**.")
150
  else:
151
  idx = st.session_state.idx
152
  total = st.session_state.total
 
153
  if idx >= total:
154
  st.success("All done 🎉 Thank you!")
155
  else:
156
  current_image = st.session_state.order[idx]
157
  st.caption(f"{idx+1} / {total}")
158
 
159
- # top-right trash
160
  spacer, trash_col = st.columns([9, 1])
161
  with trash_col:
162
  if st.button("🗑️", help=f"Mark as {LABEL_TRASH}", use_container_width=True):
@@ -164,7 +207,7 @@ else:
164
  st.session_state.idx += 1
165
  st.rerun()
166
 
167
- # image
168
  try:
169
  img = Image.open(current_image)
170
  if getattr(img, "n_frames", 1) > 1:
@@ -175,7 +218,7 @@ else:
175
  except Exception as e:
176
  st.warning(f"Could not display image: {current_image}\n{e}")
177
 
178
- # main classification row: NON-BLAST | UNCERTAIN | BLAST
179
  c_left, c_mid, c_right = st.columns([1, 1, 1])
180
 
181
  with c_left:
@@ -196,5 +239,13 @@ else:
196
  st.session_state.idx += 1
197
  st.rerun()
198
 
 
 
 
199
  st.divider()
200
- st.caption(f"Labels are saved to `{LABELS_CSV}` (and synced to this Space if HF_TOKEN is set).")
 
 
 
 
 
 
1
+ # app.py
2
  import os
3
  import glob
4
  from pathlib import Path
 
10
  from filelock import FileLock
11
  from huggingface_hub import HfApi
12
 
13
+ # =========================
14
+ # Config (via env variables)
15
+ # =========================
16
+ IMAGE_DIR = os.getenv("IMAGE_DIR", "images")
17
+ LABELS_CSV = os.getenv("LABELS_CSV", "labels.csv")
18
  SUPPORTED_EXTS = (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tif", ".tiff", ".webp")
19
 
20
+ # Button labels
 
 
 
 
 
 
 
 
 
21
  LABEL_NONBLAST = os.getenv("LABEL_NONBLAST", "NON-BLAST")
22
  LABEL_BLAST = os.getenv("LABEL_BLAST", "BLAST")
23
  LABEL_UNCERTAIN = os.getenv("LABEL_UNCERTAIN", "UNCERTAIN")
24
  LABEL_TRASH = os.getenv("LABEL_TRASH", "LOW_QUALITY")
25
 
26
+ # Optional Hub sync
27
+ HF_TOKEN = os.getenv("HF_TOKEN", "") # set in Space Secrets
28
+ SPACE_ID = os.getenv("SPACE_ID", "") # e.g. "org-or-user/your-space"
29
+ DATASET_REPO = os.getenv("DATASET_REPO", "") # e.g. "org-or-user/blast-labels" (recommended)
30
+
31
  api = HfApi()
32
 
33
+ st.set_page_config(page_title="Blast Cell Labeling", layout="centered")
34
 
35
+ # ==========
36
+ # Utilities
37
+ # ==========
38
+ def list_images() -> list[str]:
39
+ paths: list[str] = []
40
  for p in glob.glob(os.path.join(IMAGE_DIR, "**", "*"), recursive=True):
41
  if p.lower().endswith(SUPPORTED_EXTS):
42
  paths.append(p)
43
  return sorted(paths)
44
 
45
+ def read_labels() -> pd.DataFrame:
46
  if os.path.exists(LABELS_CSV):
47
  try:
48
  return pd.read_csv(LABELS_CSV)
49
  except Exception:
50
+ pass
51
  return pd.DataFrame(columns=["image", "label", "annotator", "timestamp"])
52
 
53
+ def rel_to_image_dir(p: str) -> str:
54
  try:
55
  return str(Path(p).resolve().relative_to(Path(IMAGE_DIR).resolve()))
56
  except Exception:
57
  return p
58
 
59
+ def write_label(image_path: str, label: str, annotator: str) -> None:
60
+ """Append one row to labels.csv (local only)."""
 
61
  dirpath = os.path.dirname(LABELS_CSV)
62
  if dirpath and not os.path.isdir(dirpath):
63
  os.makedirs(dirpath, exist_ok=True)
 
66
  "image": rel_to_image_dir(image_path),
67
  "label": label,
68
  "annotator": annotator,
69
+ "timestamp": datetime.now(timezone.utc).isoformat(),
70
  }
71
  with FileLock(LABELS_CSV + ".lock", timeout=10):
72
  exists = os.path.exists(LABELS_CSV)
73
+ pd.DataFrame([record]).to_csv(
74
+ LABELS_CSV, mode="a", header=not exists, index=False
75
+ )
76
 
77
+ st.session_state["unsynced"] = True # mark as needing sync
78
+
79
+ def sync_to_hub() -> tuple[bool, str]:
80
+ """
81
+ Manually upload labels.csv to the Hub.
82
+ Prefer pushing to a dataset repo to avoid Space restarts.
83
+ """
84
+ if not os.path.exists(LABELS_CSV):
85
+ return False, "labels.csv not found — nothing to sync."
86
+
87
+ if not HF_TOKEN:
88
+ return False, "HF_TOKEN not set in Space secrets."
89
+
90
+ try:
91
+ if DATASET_REPO:
92
+ # Recommended: push to dataset repo (does NOT restart the Space)
93
+ api.upload_file(
94
+ path_or_fileobj=LABELS_CSV,
95
+ path_in_repo="labels.csv",
96
+ repo_id=DATASET_REPO,
97
+ repo_type="dataset",
98
+ token=HF_TOKEN,
99
+ )
100
+ elif SPACE_ID:
101
+ # Fallback: push to Space repo (can trigger a restart)
102
  api.upload_file(
103
  path_or_fileobj=LABELS_CSV,
104
  path_in_repo="labels.csv",
105
  repo_id=SPACE_ID,
106
  repo_type="space",
107
+ token=HF_TOKEN,
108
  )
109
+ else:
110
+ return False, "Set DATASET_REPO (preferred) or SPACE_ID to enable uploads."
111
+
112
+ st.session_state["unsynced"] = False
113
+ return True, "Synced labels.csv to the Hub."
114
+ except Exception as e:
115
+ return False, f"Sync failed: {e}"
116
 
117
+ # =================
118
+ # Sidebar / Header
119
+ # =================
120
  st.title("🏷️ Blast Cell Labeling App")
121
  st.write(
122
  "Enter your name, click **Start**, then classify each image. "
123
+ f"Use **{LABEL_UNCERTAIN}** if you’re not sure. "
124
+ f"Use the 🗑️ icon for **{LABEL_TRASH}** images."
125
  )
126
 
127
  with st.sidebar:
 
128
  default_name = st.session_state.get("annotator", "")
129
  annotator = st.text_input("Your name*", value=default_name, placeholder="e.g., Dr. Smith")
130
 
 
140
  help="Continue where you left off, based on your name in labels.csv",
141
  )
142
 
143
+ sync_now = st.button("📤 Sync to Hub", help="Upload labels.csv to the Hub")
144
+
145
+ # =====================
146
+ # Session state defaults
147
+ # =====================
148
  st.session_state.setdefault("order", [])
149
  st.session_state.setdefault("idx", 0)
150
  st.session_state.setdefault("total", 0)
151
  st.session_state.setdefault("started", False)
152
+ st.session_state.setdefault("unsynced", False)
153
+
154
+ # =========
155
+ # Actions
156
+ # =========
157
+ if sync_now:
158
+ ok, msg = sync_to_hub()
159
+ (st.sidebar.success if ok else st.sidebar.error)(msg)
160
 
 
161
  if reset_btn:
162
  st.session_state.update({"started": False, "order": [], "idx": 0, "total": 0})
163
  st.rerun()
164
 
 
165
  if start_btn:
166
  if not annotator.strip():
167
  st.sidebar.error("Please enter your name.")
 
170
  imgs = list_images()
171
  labels_df = read_labels()
172
 
173
+ if continue_by_name and not labels_df.empty:
174
  already = set(
175
  labels_df.query("annotator == @annotator")["image"].astype(str).tolist()
176
  )
 
178
  imgs = [p for p, r in zip(imgs, rel_imgs) if r not in already]
179
 
180
  if not imgs:
181
+ st.warning("No images found (or all labeled). Upload images to the `images/` folder.")
182
  else:
183
+ st.session_state.update(
184
+ {"order": imgs, "idx": 0, "total": len(imgs), "started": True}
185
+ )
186
 
187
+ # =========
188
+ # Main area
189
+ # =========
190
  if not st.session_state.started:
191
  st.info("Fill your name on the left and press **Start**.")
192
  else:
193
  idx = st.session_state.idx
194
  total = st.session_state.total
195
+
196
  if idx >= total:
197
  st.success("All done 🎉 Thank you!")
198
  else:
199
  current_image = st.session_state.order[idx]
200
  st.caption(f"{idx+1} / {total}")
201
 
202
+ # top-right trash button
203
  spacer, trash_col = st.columns([9, 1])
204
  with trash_col:
205
  if st.button("🗑️", help=f"Mark as {LABEL_TRASH}", use_container_width=True):
 
207
  st.session_state.idx += 1
208
  st.rerun()
209
 
210
+ # image display
211
  try:
212
  img = Image.open(current_image)
213
  if getattr(img, "n_frames", 1) > 1:
 
218
  except Exception as e:
219
  st.warning(f"Could not display image: {current_image}\n{e}")
220
 
221
+ # three main buttons: NON-BLAST | UNCERTAIN | BLAST
222
  c_left, c_mid, c_right = st.columns([1, 1, 1])
223
 
224
  with c_left:
 
239
  st.session_state.idx += 1
240
  st.rerun()
241
 
242
+ # ======
243
+ # Footer
244
+ # ======
245
  st.divider()
246
+ sync_note = " (unsynced changes)" if st.session_state.get("unsynced") else ""
247
+ target_repo = DATASET_REPO or SPACE_ID or "—"
248
+ st.caption(
249
+ f"Labels are saved locally to `{LABELS_CSV}`{sync_note}. "
250
+ f"Use **📤 Sync to Hub** to upload (target: `{target_repo}`)."
251
+ )