wuhp commited on
Commit
e5beb8a
·
verified ·
1 Parent(s): 594bb51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -27
app.py CHANGED
@@ -35,14 +35,19 @@ def fetch_metadata(api_key, workspace, project, version):
35
  return total_images, classes
36
 
37
 
38
- def aggregate_datasets(api_key, urls):
39
  """
40
- Given API key and list of dataset URLs, returns total images and aggregated class counts.
 
 
41
  """
42
  total_images = 0
43
  aggregated = {}
44
- for url in urls:
45
- ws, proj, ver = parse_roboflow_url(url)
 
 
 
46
  imgs, cls_counts = fetch_metadata(api_key, ws, proj, ver)
47
  total_images += imgs
48
  for cls, cnt in cls_counts.items():
@@ -65,34 +70,38 @@ def make_bar_chart(counts):
65
 
66
  def load_datasets(api_key, file_objs):
67
  """
68
- Read multiple .txt uploads, parse URLs, and aggregate metadata.
 
69
  Returns total images, dataframe data, plot, and JSON mapping.
70
  """
71
- urls = []
72
- # Read each uploaded file from various possible types
 
73
  for file_obj in file_objs:
74
- # Attempt to read bytes
 
 
75
  try:
76
- content_bytes = file_obj.read()
77
  except Exception:
78
- # Try dict-like interface
79
- try:
80
- content_bytes = file_obj['data']
81
- except Exception:
82
- # Assume file_obj is file path
83
- with open(file_obj, 'rb') as f:
84
- content_bytes = f.read()
85
- # Decode if bytes
86
- if isinstance(content_bytes, (bytes, bytearray)):
87
- content = content_bytes.decode('utf-8')
88
  else:
89
- content = content_bytes
90
- # Split lines into URLs
91
- lines = [line.strip() for line in content.splitlines() if line.strip()]
92
- urls.extend(lines)
93
- # Remove duplicate URLs while preserving order
94
- unique_urls = list(dict.fromkeys(urls))
95
- total_images, aggregated = aggregate_datasets(api_key, unique_urls)
 
 
 
 
96
  df_data = [[cls, cnt] for cls, cnt in aggregated.items()]
97
  fig = make_bar_chart(aggregated)
98
  return f"{total_images}", df_data, fig, json.dumps(aggregated, indent=2)
@@ -131,4 +140,4 @@ def build_ui():
131
 
132
  if __name__ == "__main__":
133
  demo = build_ui()
134
- demo.launch(share=True)
 
35
  return total_images, classes
36
 
37
 
38
+ def aggregate_datasets(api_key, entries):
39
  """
40
+ Given API key and list of (url, file_name, line_no) tuples,
41
+ returns total images and aggregated class counts.
42
+ Raises ValueError with file and line for invalid URLs.
43
  """
44
  total_images = 0
45
  aggregated = {}
46
+ for url, file_name, line_no in entries:
47
+ try:
48
+ ws, proj, ver = parse_roboflow_url(url)
49
+ except ValueError:
50
+ raise ValueError(f"Invalid URL '{url}' in file '{file_name}', line {line_no}")
51
  imgs, cls_counts = fetch_metadata(api_key, ws, proj, ver)
52
  total_images += imgs
53
  for cls, cnt in cls_counts.items():
 
70
 
71
  def load_datasets(api_key, file_objs):
72
  """
73
+ Read multiple .txt uploads, parse URLs with file/line info,
74
+ and aggregate metadata. Reports precise errors for invalid URLs.
75
  Returns total images, dataframe data, plot, and JSON mapping.
76
  """
77
+ entries = []
78
+ seen = set()
79
+
80
  for file_obj in file_objs:
81
+ # Determine file name
82
+ file_name = getattr(file_obj, 'name', None) or file_obj.get('name', 'unknown')
83
+ # Read raw bytes or data
84
  try:
85
+ raw = file_obj.read()
86
  except Exception:
87
+ raw = file_obj.get('data') if isinstance(file_obj, dict) else None
88
+ if raw is None and isinstance(file_obj, str):
89
+ with open(file_obj, 'rb') as f:
90
+ raw = f.read()
91
+ if isinstance(raw, (bytes, bytearray)):
92
+ content = raw.decode('utf-8')
 
 
 
 
93
  else:
94
+ content = raw
95
+ lines = content.splitlines()
96
+ for i, line in enumerate(lines, start=1):
97
+ url = line.strip()
98
+ if not url:
99
+ continue
100
+ if url not in seen:
101
+ seen.add(url)
102
+ entries.append((url, file_name, i))
103
+
104
+ total_images, aggregated = aggregate_datasets(api_key, entries)
105
  df_data = [[cls, cnt] for cls, cnt in aggregated.items()]
106
  fig = make_bar_chart(aggregated)
107
  return f"{total_images}", df_data, fig, json.dumps(aggregated, indent=2)
 
140
 
141
  if __name__ == "__main__":
142
  demo = build_ui()
143
+ demo.launch()