Spaces:
Sleeping
Sleeping
Commit
ยท
246c74f
1
Parent(s):
b7c9230
Initial commit
Browse files- Dockerfile +2 -5
- app.py +4 -15
- requirements.txt +3 -3
Dockerfile
CHANGED
|
@@ -5,6 +5,7 @@ WORKDIR /app
|
|
| 5 |
RUN useradd -m -u 1000 user
|
| 6 |
USER user
|
| 7 |
ENV PATH="/home/user/.local/bin:$PATH"
|
|
|
|
| 8 |
|
| 9 |
COPY --chown=user requirements.txt requirements.txt
|
| 10 |
RUN pip install --no-cache-dir --upgrade pip && \
|
|
@@ -12,8 +13,4 @@ RUN pip install --no-cache-dir --upgrade pip && \
|
|
| 12 |
|
| 13 |
COPY --chown=user . .
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
EXPOSE 7860
|
| 18 |
-
|
| 19 |
-
CMD ["python", "app.py"]
|
|
|
|
| 5 |
RUN useradd -m -u 1000 user
|
| 6 |
USER user
|
| 7 |
ENV PATH="/home/user/.local/bin:$PATH"
|
| 8 |
+
ENV PYTHONUNBUFFERED=1
|
| 9 |
|
| 10 |
COPY --chown=user requirements.txt requirements.txt
|
| 11 |
RUN pip install --no-cache-dir --upgrade pip && \
|
|
|
|
| 13 |
|
| 14 |
COPY --chown=user . .
|
| 15 |
|
| 16 |
+
CMD ["python", "app.py"]
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -13,15 +13,12 @@ from huggingface_hub import HfApi, hf_hub_download
|
|
| 13 |
|
| 14 |
DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "fast-stager/property-labels")
|
| 15 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 16 |
-
|
| 17 |
CACHE_DIR = "/tmp"
|
| 18 |
URL_FILE = "urls.txt"
|
| 19 |
-
|
| 20 |
LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
|
| 21 |
VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
|
| 22 |
SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
|
| 23 |
LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")
|
| 24 |
-
|
| 25 |
MAX_IMAGES = 6
|
| 26 |
THUMB_SIZE = (350, 350)
|
| 27 |
ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
|
|
@@ -32,7 +29,6 @@ def sync_pull():
|
|
| 32 |
return
|
| 33 |
|
| 34 |
print(f"๐ Syncing from {DATASET_REPO_ID}...")
|
| 35 |
-
|
| 36 |
for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
|
| 37 |
try:
|
| 38 |
hf_hub_download(
|
|
@@ -44,7 +40,7 @@ def sync_pull():
|
|
| 44 |
)
|
| 45 |
print(f"โ
Loaded {filename}")
|
| 46 |
except Exception:
|
| 47 |
-
print(f"โน๏ธ {filename}
|
| 48 |
|
| 49 |
def sync_push_background(local_path, remote_filename):
|
| 50 |
if not HF_TOKEN: return
|
|
@@ -68,7 +64,6 @@ def sync_push_background(local_path, remote_filename):
|
|
| 68 |
|
| 69 |
def init_files():
|
| 70 |
sync_pull()
|
| 71 |
-
|
| 72 |
for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
|
| 73 |
if not os.path.exists(f):
|
| 74 |
if f == LABEL_FILE: cols = ["timestamp", "user", "group_id", "url", "score", "label"]
|
|
@@ -77,18 +72,14 @@ def init_files():
|
|
| 77 |
pd.DataFrame(columns=cols).to_csv(f, index=False)
|
| 78 |
|
| 79 |
if not os.path.exists(URL_FILE):
|
| 80 |
-
print("โ ๏ธ urls.txt not found
|
| 81 |
-
else:
|
| 82 |
-
print(f"โ
urls.txt found ({len(open(URL_FILE).readlines())} lines)")
|
| 83 |
|
| 84 |
init_files()
|
| 85 |
|
| 86 |
def get_image_optimized(url):
|
| 87 |
if not url: return None
|
| 88 |
try:
|
| 89 |
-
headers = {
|
| 90 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 91 |
-
}
|
| 92 |
response = requests.get(url, headers=headers, timeout=3)
|
| 93 |
if response.status_code == 200:
|
| 94 |
img = Image.open(BytesIO(response.content))
|
|
@@ -174,7 +165,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 174 |
found = False
|
| 175 |
for i, gid in enumerate(all_groups):
|
| 176 |
if gid in s_done: continue
|
| 177 |
-
|
| 178 |
is_ready = False
|
| 179 |
if mode == "label" and gid not in l_done: is_ready = True
|
| 180 |
elif mode == "verify" and gid in l_done and gid not in v_done: is_ready = True
|
|
@@ -223,7 +213,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 223 |
if i < len(urls):
|
| 224 |
u = urls[i]
|
| 225 |
img_data = processed_images[i]
|
| 226 |
-
|
| 227 |
updates[img_c] = gr.update(value=img_data, visible=True)
|
| 228 |
|
| 229 |
v_sc = saved_vals.get(u, {}).get('score', 5)
|
|
@@ -313,7 +302,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Labeling Tool") as demo:
|
|
| 313 |
with gr.Tab("Workspace", id=0):
|
| 314 |
with gr.Group() as screen_menu:
|
| 315 |
gr.Markdown("# Welcome! ๐")
|
| 316 |
-
gr.Markdown("Connected to HF Dataset for
|
| 317 |
with gr.Row():
|
| 318 |
b_start_l = gr.Button("Start Labeling", variant="primary")
|
| 319 |
b_start_v = gr.Button("Start Verification")
|
|
|
|
| 13 |
|
| 14 |
DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "fast-stager/property-labels")
|
| 15 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
|
| 16 |
CACHE_DIR = "/tmp"
|
| 17 |
URL_FILE = "urls.txt"
|
|
|
|
| 18 |
LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
|
| 19 |
VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
|
| 20 |
SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
|
| 21 |
LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")
|
|
|
|
| 22 |
MAX_IMAGES = 6
|
| 23 |
THUMB_SIZE = (350, 350)
|
| 24 |
ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
|
|
|
|
| 29 |
return
|
| 30 |
|
| 31 |
print(f"๐ Syncing from {DATASET_REPO_ID}...")
|
|
|
|
| 32 |
for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
|
| 33 |
try:
|
| 34 |
hf_hub_download(
|
|
|
|
| 40 |
)
|
| 41 |
print(f"โ
Loaded {filename}")
|
| 42 |
except Exception:
|
| 43 |
+
print(f"โน๏ธ {filename} missing on Hub. Starting fresh.")
|
| 44 |
|
| 45 |
def sync_push_background(local_path, remote_filename):
|
| 46 |
if not HF_TOKEN: return
|
|
|
|
| 64 |
|
| 65 |
def init_files():
|
| 66 |
sync_pull()
|
|
|
|
| 67 |
for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
|
| 68 |
if not os.path.exists(f):
|
| 69 |
if f == LABEL_FILE: cols = ["timestamp", "user", "group_id", "url", "score", "label"]
|
|
|
|
| 72 |
pd.DataFrame(columns=cols).to_csv(f, index=False)
|
| 73 |
|
| 74 |
if not os.path.exists(URL_FILE):
|
| 75 |
+
print("โ ๏ธ urls.txt not found! Please upload it to the repo.")
|
|
|
|
|
|
|
| 76 |
|
| 77 |
init_files()
|
| 78 |
|
| 79 |
def get_image_optimized(url):
|
| 80 |
if not url: return None
|
| 81 |
try:
|
| 82 |
+
headers = {'User-Agent': 'Mozilla/5.0'}
|
|
|
|
|
|
|
| 83 |
response = requests.get(url, headers=headers, timeout=3)
|
| 84 |
if response.status_code == 200:
|
| 85 |
img = Image.open(BytesIO(response.content))
|
|
|
|
| 165 |
found = False
|
| 166 |
for i, gid in enumerate(all_groups):
|
| 167 |
if gid in s_done: continue
|
|
|
|
| 168 |
is_ready = False
|
| 169 |
if mode == "label" and gid not in l_done: is_ready = True
|
| 170 |
elif mode == "verify" and gid in l_done and gid not in v_done: is_ready = True
|
|
|
|
| 213 |
if i < len(urls):
|
| 214 |
u = urls[i]
|
| 215 |
img_data = processed_images[i]
|
|
|
|
| 216 |
updates[img_c] = gr.update(value=img_data, visible=True)
|
| 217 |
|
| 218 |
v_sc = saved_vals.get(u, {}).get('score', 5)
|
|
|
|
| 302 |
with gr.Tab("Workspace", id=0):
|
| 303 |
with gr.Group() as screen_menu:
|
| 304 |
gr.Markdown("# Welcome! ๐")
|
| 305 |
+
gr.Markdown("Connected to HF Dataset for persistence.")
|
| 306 |
with gr.Row():
|
| 307 |
b_start_l = gr.Button("Start Labeling", variant="primary")
|
| 308 |
b_start_v = gr.Button("Start Verification")
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
|
| 2 |
-
gradio==5.0.0
|
| 3 |
pandas
|
| 4 |
requests
|
| 5 |
Pillow
|
| 6 |
-
filelock
|
|
|
|
|
|
| 1 |
+
gradio==4.44.1
|
|
|
|
| 2 |
pandas
|
| 3 |
requests
|
| 4 |
Pillow
|
| 5 |
+
filelock
|
| 6 |
+
huggingface_hub>=0.25.0
|