Nightfury16 commited on
Commit
246c74f
ยท
1 Parent(s): b7c9230

Initial commit

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -5
  2. app.py +4 -15
  3. requirements.txt +3 -3
Dockerfile CHANGED
@@ -5,6 +5,7 @@ WORKDIR /app
5
  RUN useradd -m -u 1000 user
6
  USER user
7
  ENV PATH="/home/user/.local/bin:$PATH"
 
8
 
9
  COPY --chown=user requirements.txt requirements.txt
10
  RUN pip install --no-cache-dir --upgrade pip && \
@@ -12,8 +13,4 @@ RUN pip install --no-cache-dir --upgrade pip && \
12
 
13
  COPY --chown=user . .
14
 
15
- ENV DATA_DIR=/data
16
-
17
- EXPOSE 7860
18
-
19
- CMD ["python", "app.py"]
 
5
  RUN useradd -m -u 1000 user
6
  USER user
7
  ENV PATH="/home/user/.local/bin:$PATH"
8
+ ENV PYTHONUNBUFFERED=1
9
 
10
  COPY --chown=user requirements.txt requirements.txt
11
  RUN pip install --no-cache-dir --upgrade pip && \
 
13
 
14
  COPY --chown=user . .
15
 
16
+ CMD ["python", "app.py"]
 
 
 
 
app.py CHANGED
@@ -13,15 +13,12 @@ from huggingface_hub import HfApi, hf_hub_download
13
 
14
  DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "fast-stager/property-labels")
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
16
-
17
  CACHE_DIR = "/tmp"
18
  URL_FILE = "urls.txt"
19
-
20
  LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
21
  VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
22
  SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
23
  LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")
24
-
25
  MAX_IMAGES = 6
26
  THUMB_SIZE = (350, 350)
27
  ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
@@ -32,7 +29,6 @@ def sync_pull():
32
  return
33
 
34
  print(f"๐Ÿ”„ Syncing from {DATASET_REPO_ID}...")
35
-
36
  for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
37
  try:
38
  hf_hub_download(
@@ -44,7 +40,7 @@ def sync_pull():
44
  )
45
  print(f"โœ… Loaded {filename}")
46
  except Exception:
47
- print(f"โ„น๏ธ {filename} not found on Hub. Creating empty in /tmp.")
48
 
49
  def sync_push_background(local_path, remote_filename):
50
  if not HF_TOKEN: return
@@ -68,7 +64,6 @@ def sync_push_background(local_path, remote_filename):
68
 
69
  def init_files():
70
  sync_pull()
71
-
72
  for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
73
  if not os.path.exists(f):
74
  if f == LABEL_FILE: cols = ["timestamp", "user", "group_id", "url", "score", "label"]
@@ -77,18 +72,14 @@ def init_files():
77
  pd.DataFrame(columns=cols).to_csv(f, index=False)
78
 
79
  if not os.path.exists(URL_FILE):
80
- print("โš ๏ธ urls.txt not found in root directory! Please upload it to your Space.")
81
- else:
82
- print(f"โœ… urls.txt found ({len(open(URL_FILE).readlines())} lines)")
83
 
84
  init_files()
85
 
86
  def get_image_optimized(url):
87
  if not url: return None
88
  try:
89
- headers = {
90
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
91
- }
92
  response = requests.get(url, headers=headers, timeout=3)
93
  if response.status_code == 200:
94
  img = Image.open(BytesIO(response.content))
@@ -174,7 +165,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
174
  found = False
175
  for i, gid in enumerate(all_groups):
176
  if gid in s_done: continue
177
-
178
  is_ready = False
179
  if mode == "label" and gid not in l_done: is_ready = True
180
  elif mode == "verify" and gid in l_done and gid not in v_done: is_ready = True
@@ -223,7 +213,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
223
  if i < len(urls):
224
  u = urls[i]
225
  img_data = processed_images[i]
226
-
227
  updates[img_c] = gr.update(value=img_data, visible=True)
228
 
229
  v_sc = saved_vals.get(u, {}).get('score', 5)
@@ -313,7 +302,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Labeling Tool") as demo:
313
  with gr.Tab("Workspace", id=0):
314
  with gr.Group() as screen_menu:
315
  gr.Markdown("# Welcome! ๐Ÿ‘‹")
316
- gr.Markdown("Connected to HF Dataset for free persistence.")
317
  with gr.Row():
318
  b_start_l = gr.Button("Start Labeling", variant="primary")
319
  b_start_v = gr.Button("Start Verification")
 
13
 
14
  DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "fast-stager/property-labels")
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
16
  CACHE_DIR = "/tmp"
17
  URL_FILE = "urls.txt"
 
18
  LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
19
  VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
20
  SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
21
  LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")
 
22
  MAX_IMAGES = 6
23
  THUMB_SIZE = (350, 350)
24
  ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
 
29
  return
30
 
31
  print(f"๐Ÿ”„ Syncing from {DATASET_REPO_ID}...")
 
32
  for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
33
  try:
34
  hf_hub_download(
 
40
  )
41
  print(f"โœ… Loaded {filename}")
42
  except Exception:
43
+ print(f"โ„น๏ธ {filename} missing on Hub. Starting fresh.")
44
 
45
  def sync_push_background(local_path, remote_filename):
46
  if not HF_TOKEN: return
 
64
 
65
  def init_files():
66
  sync_pull()
 
67
  for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
68
  if not os.path.exists(f):
69
  if f == LABEL_FILE: cols = ["timestamp", "user", "group_id", "url", "score", "label"]
 
72
  pd.DataFrame(columns=cols).to_csv(f, index=False)
73
 
74
  if not os.path.exists(URL_FILE):
75
+ print("โš ๏ธ urls.txt not found! Please upload it to the repo.")
 
 
76
 
77
  init_files()
78
 
79
  def get_image_optimized(url):
80
  if not url: return None
81
  try:
82
+ headers = {'User-Agent': 'Mozilla/5.0'}
 
 
83
  response = requests.get(url, headers=headers, timeout=3)
84
  if response.status_code == 200:
85
  img = Image.open(BytesIO(response.content))
 
165
  found = False
166
  for i, gid in enumerate(all_groups):
167
  if gid in s_done: continue
 
168
  is_ready = False
169
  if mode == "label" and gid not in l_done: is_ready = True
170
  elif mode == "verify" and gid in l_done and gid not in v_done: is_ready = True
 
213
  if i < len(urls):
214
  u = urls[i]
215
  img_data = processed_images[i]
 
216
  updates[img_c] = gr.update(value=img_data, visible=True)
217
 
218
  v_sc = saved_vals.get(u, {}).get('score', 5)
 
302
  with gr.Tab("Workspace", id=0):
303
  with gr.Group() as screen_menu:
304
  gr.Markdown("# Welcome! ๐Ÿ‘‹")
305
+ gr.Markdown("Connected to HF Dataset for persistence.")
306
  with gr.Row():
307
  b_start_l = gr.Button("Start Labeling", variant="primary")
308
  b_start_v = gr.Button("Start Verification")
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- huggingface-hub==0.34.3
2
- gradio==5.0.0
3
  pandas
4
  requests
5
  Pillow
6
- filelock
 
 
1
+ gradio==4.44.1
 
2
  pandas
3
  requests
4
  Pillow
5
+ filelock
6
+ huggingface_hub>=0.25.0