Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
import glob
|
| 3 |
from pathlib import Path
|
|
@@ -9,57 +10,54 @@ from PIL import Image
|
|
| 9 |
from filelock import FileLock
|
| 10 |
from huggingface_hub import HfApi
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
| 14 |
SUPPORTED_EXTS = (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tif", ".tiff", ".webp")
|
| 15 |
|
| 16 |
-
#
|
| 17 |
-
DATA_DIR = "/data"
|
| 18 |
-
DEFAULT_LABELS = (
|
| 19 |
-
f"{DATA_DIR}/labels.csv"
|
| 20 |
-
if os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.W_OK)
|
| 21 |
-
else "labels.csv"
|
| 22 |
-
)
|
| 23 |
-
LABELS_CSV = os.getenv("LABELS_CSV", DEFAULT_LABELS)
|
| 24 |
-
|
| 25 |
-
# Label names
|
| 26 |
LABEL_NONBLAST = os.getenv("LABEL_NONBLAST", "NON-BLAST")
|
| 27 |
LABEL_BLAST = os.getenv("LABEL_BLAST", "BLAST")
|
| 28 |
LABEL_UNCERTAIN = os.getenv("LABEL_UNCERTAIN", "UNCERTAIN")
|
| 29 |
LABEL_TRASH = os.getenv("LABEL_TRASH", "LOW_QUALITY")
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
HF_TOKEN
|
| 33 |
-
SPACE_ID
|
|
|
|
|
|
|
| 34 |
api = HfApi()
|
| 35 |
|
| 36 |
-
st.set_page_config(page_title="
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
for p in glob.glob(os.path.join(IMAGE_DIR, "**", "*"), recursive=True):
|
| 42 |
if p.lower().endswith(SUPPORTED_EXTS):
|
| 43 |
paths.append(p)
|
| 44 |
return sorted(paths)
|
| 45 |
|
| 46 |
-
def read_labels():
|
| 47 |
if os.path.exists(LABELS_CSV):
|
| 48 |
try:
|
| 49 |
return pd.read_csv(LABELS_CSV)
|
| 50 |
except Exception:
|
| 51 |
-
|
| 52 |
return pd.DataFrame(columns=["image", "label", "annotator", "timestamp"])
|
| 53 |
|
| 54 |
-
def rel_to_image_dir(p: str):
|
| 55 |
try:
|
| 56 |
return str(Path(p).resolve().relative_to(Path(IMAGE_DIR).resolve()))
|
| 57 |
except Exception:
|
| 58 |
return p
|
| 59 |
|
| 60 |
-
def write_label(image_path: str, label: str, annotator: str):
|
| 61 |
-
"""Append one row to labels.csv
|
| 62 |
-
# only create a directory if LABELS_CSV actually has one, and it's not already there
|
| 63 |
dirpath = os.path.dirname(LABELS_CSV)
|
| 64 |
if dirpath and not os.path.isdir(dirpath):
|
| 65 |
os.makedirs(dirpath, exist_ok=True)
|
|
@@ -68,35 +66,65 @@ def write_label(image_path: str, label: str, annotator: str):
|
|
| 68 |
"image": rel_to_image_dir(image_path),
|
| 69 |
"label": label,
|
| 70 |
"annotator": annotator,
|
| 71 |
-
"timestamp": datetime.now(timezone.utc).isoformat()
|
| 72 |
}
|
| 73 |
with FileLock(LABELS_CSV + ".lock", timeout=10):
|
| 74 |
exists = os.path.exists(LABELS_CSV)
|
| 75 |
-
pd.DataFrame([record]).to_csv(
|
|
|
|
|
|
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
api.upload_file(
|
| 81 |
path_or_fileobj=LABELS_CSV,
|
| 82 |
path_in_repo="labels.csv",
|
| 83 |
repo_id=SPACE_ID,
|
| 84 |
repo_type="space",
|
| 85 |
-
token=HF_TOKEN
|
| 86 |
)
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
-
#
|
|
|
|
|
|
|
| 91 |
st.title("🏷️ Blast Cell Labeling App")
|
| 92 |
st.write(
|
| 93 |
"Enter your name, click **Start**, then classify each image. "
|
| 94 |
-
"Use **
|
| 95 |
-
"Use the 🗑️ icon for **
|
| 96 |
)
|
| 97 |
|
| 98 |
with st.sidebar:
|
| 99 |
-
# remember last annotator
|
| 100 |
default_name = st.session_state.get("annotator", "")
|
| 101 |
annotator = st.text_input("Your name*", value=default_name, placeholder="e.g., Dr. Smith")
|
| 102 |
|
|
@@ -112,18 +140,28 @@ with st.sidebar:
|
|
| 112 |
help="Continue where you left off, based on your name in labels.csv",
|
| 113 |
)
|
| 114 |
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
st.session_state.setdefault("order", [])
|
| 117 |
st.session_state.setdefault("idx", 0)
|
| 118 |
st.session_state.setdefault("total", 0)
|
| 119 |
st.session_state.setdefault("started", False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
# reset
|
| 122 |
if reset_btn:
|
| 123 |
st.session_state.update({"started": False, "order": [], "idx": 0, "total": 0})
|
| 124 |
st.rerun()
|
| 125 |
|
| 126 |
-
# start
|
| 127 |
if start_btn:
|
| 128 |
if not annotator.strip():
|
| 129 |
st.sidebar.error("Please enter your name.")
|
|
@@ -132,7 +170,7 @@ if start_btn:
|
|
| 132 |
imgs = list_images()
|
| 133 |
labels_df = read_labels()
|
| 134 |
|
| 135 |
-
if continue_by_name and
|
| 136 |
already = set(
|
| 137 |
labels_df.query("annotator == @annotator")["image"].astype(str).tolist()
|
| 138 |
)
|
|
@@ -140,23 +178,28 @@ if start_btn:
|
|
| 140 |
imgs = [p for p, r in zip(imgs, rel_imgs) if r not in already]
|
| 141 |
|
| 142 |
if not imgs:
|
| 143 |
-
st.warning("No images found (or all labeled). Upload to the `images/` folder.")
|
| 144 |
else:
|
| 145 |
-
st.session_state.update(
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
#
|
|
|
|
|
|
|
| 148 |
if not st.session_state.started:
|
| 149 |
st.info("Fill your name on the left and press **Start**.")
|
| 150 |
else:
|
| 151 |
idx = st.session_state.idx
|
| 152 |
total = st.session_state.total
|
|
|
|
| 153 |
if idx >= total:
|
| 154 |
st.success("All done 🎉 Thank you!")
|
| 155 |
else:
|
| 156 |
current_image = st.session_state.order[idx]
|
| 157 |
st.caption(f"{idx+1} / {total}")
|
| 158 |
|
| 159 |
-
# top-right trash
|
| 160 |
spacer, trash_col = st.columns([9, 1])
|
| 161 |
with trash_col:
|
| 162 |
if st.button("🗑️", help=f"Mark as {LABEL_TRASH}", use_container_width=True):
|
|
@@ -164,7 +207,7 @@ else:
|
|
| 164 |
st.session_state.idx += 1
|
| 165 |
st.rerun()
|
| 166 |
|
| 167 |
-
# image
|
| 168 |
try:
|
| 169 |
img = Image.open(current_image)
|
| 170 |
if getattr(img, "n_frames", 1) > 1:
|
|
@@ -175,7 +218,7 @@ else:
|
|
| 175 |
except Exception as e:
|
| 176 |
st.warning(f"Could not display image: {current_image}\n{e}")
|
| 177 |
|
| 178 |
-
# main
|
| 179 |
c_left, c_mid, c_right = st.columns([1, 1, 1])
|
| 180 |
|
| 181 |
with c_left:
|
|
@@ -196,5 +239,13 @@ else:
|
|
| 196 |
st.session_state.idx += 1
|
| 197 |
st.rerun()
|
| 198 |
|
|
|
|
|
|
|
|
|
|
| 199 |
st.divider()
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
import os
|
| 3 |
import glob
|
| 4 |
from pathlib import Path
|
|
|
|
| 10 |
from filelock import FileLock
|
| 11 |
from huggingface_hub import HfApi
|
| 12 |
|
| 13 |
+
# =========================
|
| 14 |
+
# Config (via env variables)
|
| 15 |
+
# =========================
|
| 16 |
+
IMAGE_DIR = os.getenv("IMAGE_DIR", "images")
|
| 17 |
+
LABELS_CSV = os.getenv("LABELS_CSV", "labels.csv")
|
| 18 |
SUPPORTED_EXTS = (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tif", ".tiff", ".webp")
|
| 19 |
|
| 20 |
+
# Button labels
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
LABEL_NONBLAST = os.getenv("LABEL_NONBLAST", "NON-BLAST")
|
| 22 |
LABEL_BLAST = os.getenv("LABEL_BLAST", "BLAST")
|
| 23 |
LABEL_UNCERTAIN = os.getenv("LABEL_UNCERTAIN", "UNCERTAIN")
|
| 24 |
LABEL_TRASH = os.getenv("LABEL_TRASH", "LOW_QUALITY")
|
| 25 |
|
| 26 |
+
# Optional Hub sync
|
| 27 |
+
HF_TOKEN = os.getenv("HF_TOKEN", "") # set in Space Secrets
|
| 28 |
+
SPACE_ID = os.getenv("SPACE_ID", "") # e.g. "org-or-user/your-space"
|
| 29 |
+
DATASET_REPO = os.getenv("DATASET_REPO", "") # e.g. "org-or-user/blast-labels" (recommended)
|
| 30 |
+
|
| 31 |
api = HfApi()
|
| 32 |
|
| 33 |
+
st.set_page_config(page_title="Blast Cell Labeling", layout="centered")
|
| 34 |
|
| 35 |
+
# ==========
|
| 36 |
+
# Utilities
|
| 37 |
+
# ==========
|
| 38 |
+
def list_images() -> list[str]:
|
| 39 |
+
paths: list[str] = []
|
| 40 |
for p in glob.glob(os.path.join(IMAGE_DIR, "**", "*"), recursive=True):
|
| 41 |
if p.lower().endswith(SUPPORTED_EXTS):
|
| 42 |
paths.append(p)
|
| 43 |
return sorted(paths)
|
| 44 |
|
| 45 |
+
def read_labels() -> pd.DataFrame:
|
| 46 |
if os.path.exists(LABELS_CSV):
|
| 47 |
try:
|
| 48 |
return pd.read_csv(LABELS_CSV)
|
| 49 |
except Exception:
|
| 50 |
+
pass
|
| 51 |
return pd.DataFrame(columns=["image", "label", "annotator", "timestamp"])
|
| 52 |
|
| 53 |
+
def rel_to_image_dir(p: str) -> str:
|
| 54 |
try:
|
| 55 |
return str(Path(p).resolve().relative_to(Path(IMAGE_DIR).resolve()))
|
| 56 |
except Exception:
|
| 57 |
return p
|
| 58 |
|
| 59 |
+
def write_label(image_path: str, label: str, annotator: str) -> None:
|
| 60 |
+
"""Append one row to labels.csv (local only)."""
|
|
|
|
| 61 |
dirpath = os.path.dirname(LABELS_CSV)
|
| 62 |
if dirpath and not os.path.isdir(dirpath):
|
| 63 |
os.makedirs(dirpath, exist_ok=True)
|
|
|
|
| 66 |
"image": rel_to_image_dir(image_path),
|
| 67 |
"label": label,
|
| 68 |
"annotator": annotator,
|
| 69 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 70 |
}
|
| 71 |
with FileLock(LABELS_CSV + ".lock", timeout=10):
|
| 72 |
exists = os.path.exists(LABELS_CSV)
|
| 73 |
+
pd.DataFrame([record]).to_csv(
|
| 74 |
+
LABELS_CSV, mode="a", header=not exists, index=False
|
| 75 |
+
)
|
| 76 |
|
| 77 |
+
st.session_state["unsynced"] = True # mark as needing sync
|
| 78 |
+
|
| 79 |
+
def sync_to_hub() -> tuple[bool, str]:
|
| 80 |
+
"""
|
| 81 |
+
Manually upload labels.csv to the Hub.
|
| 82 |
+
Prefer pushing to a dataset repo to avoid Space restarts.
|
| 83 |
+
"""
|
| 84 |
+
if not os.path.exists(LABELS_CSV):
|
| 85 |
+
return False, "labels.csv not found — nothing to sync."
|
| 86 |
+
|
| 87 |
+
if not HF_TOKEN:
|
| 88 |
+
return False, "HF_TOKEN not set in Space secrets."
|
| 89 |
+
|
| 90 |
+
try:
|
| 91 |
+
if DATASET_REPO:
|
| 92 |
+
# Recommended: push to dataset repo (does NOT restart the Space)
|
| 93 |
+
api.upload_file(
|
| 94 |
+
path_or_fileobj=LABELS_CSV,
|
| 95 |
+
path_in_repo="labels.csv",
|
| 96 |
+
repo_id=DATASET_REPO,
|
| 97 |
+
repo_type="dataset",
|
| 98 |
+
token=HF_TOKEN,
|
| 99 |
+
)
|
| 100 |
+
elif SPACE_ID:
|
| 101 |
+
# Fallback: push to Space repo (can trigger a restart)
|
| 102 |
api.upload_file(
|
| 103 |
path_or_fileobj=LABELS_CSV,
|
| 104 |
path_in_repo="labels.csv",
|
| 105 |
repo_id=SPACE_ID,
|
| 106 |
repo_type="space",
|
| 107 |
+
token=HF_TOKEN,
|
| 108 |
)
|
| 109 |
+
else:
|
| 110 |
+
return False, "Set DATASET_REPO (preferred) or SPACE_ID to enable uploads."
|
| 111 |
+
|
| 112 |
+
st.session_state["unsynced"] = False
|
| 113 |
+
return True, "Synced labels.csv to the Hub."
|
| 114 |
+
except Exception as e:
|
| 115 |
+
return False, f"Sync failed: {e}"
|
| 116 |
|
| 117 |
+
# =================
|
| 118 |
+
# Sidebar / Header
|
| 119 |
+
# =================
|
| 120 |
st.title("🏷️ Blast Cell Labeling App")
|
| 121 |
st.write(
|
| 122 |
"Enter your name, click **Start**, then classify each image. "
|
| 123 |
+
f"Use **{LABEL_UNCERTAIN}** if you’re not sure. "
|
| 124 |
+
f"Use the 🗑️ icon for **{LABEL_TRASH}** images."
|
| 125 |
)
|
| 126 |
|
| 127 |
with st.sidebar:
|
|
|
|
| 128 |
default_name = st.session_state.get("annotator", "")
|
| 129 |
annotator = st.text_input("Your name*", value=default_name, placeholder="e.g., Dr. Smith")
|
| 130 |
|
|
|
|
| 140 |
help="Continue where you left off, based on your name in labels.csv",
|
| 141 |
)
|
| 142 |
|
| 143 |
+
sync_now = st.button("📤 Sync to Hub", help="Upload labels.csv to the Hub")
|
| 144 |
+
|
| 145 |
+
# =====================
|
| 146 |
+
# Session state defaults
|
| 147 |
+
# =====================
|
| 148 |
st.session_state.setdefault("order", [])
|
| 149 |
st.session_state.setdefault("idx", 0)
|
| 150 |
st.session_state.setdefault("total", 0)
|
| 151 |
st.session_state.setdefault("started", False)
|
| 152 |
+
st.session_state.setdefault("unsynced", False)
|
| 153 |
+
|
| 154 |
+
# =========
|
| 155 |
+
# Actions
|
| 156 |
+
# =========
|
| 157 |
+
if sync_now:
|
| 158 |
+
ok, msg = sync_to_hub()
|
| 159 |
+
(st.sidebar.success if ok else st.sidebar.error)(msg)
|
| 160 |
|
|
|
|
| 161 |
if reset_btn:
|
| 162 |
st.session_state.update({"started": False, "order": [], "idx": 0, "total": 0})
|
| 163 |
st.rerun()
|
| 164 |
|
|
|
|
| 165 |
if start_btn:
|
| 166 |
if not annotator.strip():
|
| 167 |
st.sidebar.error("Please enter your name.")
|
|
|
|
| 170 |
imgs = list_images()
|
| 171 |
labels_df = read_labels()
|
| 172 |
|
| 173 |
+
if continue_by_name and not labels_df.empty:
|
| 174 |
already = set(
|
| 175 |
labels_df.query("annotator == @annotator")["image"].astype(str).tolist()
|
| 176 |
)
|
|
|
|
| 178 |
imgs = [p for p, r in zip(imgs, rel_imgs) if r not in already]
|
| 179 |
|
| 180 |
if not imgs:
|
| 181 |
+
st.warning("No images found (or all labeled). Upload images to the `images/` folder.")
|
| 182 |
else:
|
| 183 |
+
st.session_state.update(
|
| 184 |
+
{"order": imgs, "idx": 0, "total": len(imgs), "started": True}
|
| 185 |
+
)
|
| 186 |
|
| 187 |
+
# =========
|
| 188 |
+
# Main area
|
| 189 |
+
# =========
|
| 190 |
if not st.session_state.started:
|
| 191 |
st.info("Fill your name on the left and press **Start**.")
|
| 192 |
else:
|
| 193 |
idx = st.session_state.idx
|
| 194 |
total = st.session_state.total
|
| 195 |
+
|
| 196 |
if idx >= total:
|
| 197 |
st.success("All done 🎉 Thank you!")
|
| 198 |
else:
|
| 199 |
current_image = st.session_state.order[idx]
|
| 200 |
st.caption(f"{idx+1} / {total}")
|
| 201 |
|
| 202 |
+
# top-right trash button
|
| 203 |
spacer, trash_col = st.columns([9, 1])
|
| 204 |
with trash_col:
|
| 205 |
if st.button("🗑️", help=f"Mark as {LABEL_TRASH}", use_container_width=True):
|
|
|
|
| 207 |
st.session_state.idx += 1
|
| 208 |
st.rerun()
|
| 209 |
|
| 210 |
+
# image display
|
| 211 |
try:
|
| 212 |
img = Image.open(current_image)
|
| 213 |
if getattr(img, "n_frames", 1) > 1:
|
|
|
|
| 218 |
except Exception as e:
|
| 219 |
st.warning(f"Could not display image: {current_image}\n{e}")
|
| 220 |
|
| 221 |
+
# three main buttons: NON-BLAST | UNCERTAIN | BLAST
|
| 222 |
c_left, c_mid, c_right = st.columns([1, 1, 1])
|
| 223 |
|
| 224 |
with c_left:
|
|
|
|
| 239 |
st.session_state.idx += 1
|
| 240 |
st.rerun()
|
| 241 |
|
| 242 |
+
# ======
|
| 243 |
+
# Footer
|
| 244 |
+
# ======
|
| 245 |
st.divider()
|
| 246 |
+
sync_note = " (unsynced changes)" if st.session_state.get("unsynced") else ""
|
| 247 |
+
target_repo = DATASET_REPO or SPACE_ID or "—"
|
| 248 |
+
st.caption(
|
| 249 |
+
f"Labels are saved locally to `{LABELS_CSV}`{sync_note}. "
|
| 250 |
+
f"Use **📤 Sync to Hub** to upload (target: `{target_repo}`)."
|
| 251 |
+
)
|