Nightfury16 commited on
Commit
8a439bd
·
1 Parent(s): 3f4e464

added new_urls.json

Browse files
Files changed (2) hide show
  1. app.py +14 -10
  2. new_urls.json +0 -0
app.py CHANGED
@@ -27,7 +27,8 @@ DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "fast-stager/property-labels
27
  HF_TOKEN = os.environ.get("HF_TOKEN")
28
 
29
  CACHE_DIR = "/tmp"
30
- URL_FILE = "urls.json"
 
31
  LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
32
  VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
33
  SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
@@ -101,6 +102,7 @@ def get_image_optimized(url):
101
  except: pass
102
  return url
103
 
 
104
  def load_all_urls():
105
  urls = []
106
  if not os.path.exists(URL_FILE):
@@ -108,15 +110,17 @@ def load_all_urls():
108
 
109
  try:
110
  with open(URL_FILE, 'r') as f:
111
- if URL_FILE.endswith('.txt'):
112
- for line in f:
113
- if line.strip(): urls.append(line.strip())
114
- else:
115
- data = json.load(f)
 
116
  for query_key, rows in data.items():
117
- for row in rows:
118
- if "unstaged_images" in row:
119
- urls.append(row["unstaged_images"])
 
120
  except Exception as e:
121
  print(f"Error loading URLs: {e}")
122
 
@@ -379,4 +383,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Labeling Tool") as demo:
379
  b_ref_cat.click(refresh_cat, None, df_cat)
380
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
381
 
382
- demo.queue().launch(server_name="0.0.0.0", server_port=7860)
 
27
  HF_TOKEN = os.environ.get("HF_TOKEN")
28
 
29
  CACHE_DIR = "/tmp"
30
+ # CHANGED: Updated file name
31
+ URL_FILE = "new_urls.json"
32
  LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
33
  VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
34
  SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
 
102
  except: pass
103
  return url
104
 
105
+ # CHANGED: Updated parsing logic for new_urls.json structure
106
  def load_all_urls():
107
  urls = []
108
  if not os.path.exists(URL_FILE):
 
110
 
111
  try:
112
  with open(URL_FILE, 'r') as f:
113
+ data = json.load(f)
114
+ if "groups" in data:
115
+ for group in data["groups"]:
116
+ if "images" in group:
117
+ urls.extend(group["images"])
118
+ elif isinstance(data, dict):
119
  for query_key, rows in data.items():
120
+ if isinstance(rows, list):
121
+ for row in rows:
122
+ if "unstaged_images" in row:
123
+ urls.append(row["unstaged_images"])
124
  except Exception as e:
125
  print(f"Error loading URLs: {e}")
126
 
 
383
  b_ref_cat.click(refresh_cat, None, df_cat)
384
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
385
 
386
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)
new_urls.json ADDED
The diff for this file is too large to render. See raw diff