rethinks commited on
Commit
2df441b
·
verified ·
1 Parent(s): 5986bee

Upload supabase_storage.py

Browse files
Files changed (1) hide show
  1. supabase_storage.py +54 -22
supabase_storage.py CHANGED
@@ -239,31 +239,63 @@ def list_datasets_from_supabase() -> List[Dict[str, Any]]:
239
 
240
  try:
241
  storage = client.storage.from_(BUCKET_NAME)
 
242
 
243
- # List all items in the bucket root (pass empty string for root path)
244
- items = storage.list(path="")
245
- print(f"[Supabase] Bucket contains {len(items)} items")
 
 
 
 
 
 
 
 
 
 
 
246
 
247
- # Debug: print what we got
248
- for item in items:
249
- print(f"[Supabase] Item: {item.get('name')} - metadata: {item.get('metadata')}")
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
- # Filter to get only folders (datasets), excluding files like _registry.json
252
- # In Supabase Storage, folders typically have id=None or no mimetype
253
- dataset_names = []
254
- for item in items:
255
- name = item.get('name', '')
256
- # Skip hidden files and registry
257
- if name.startswith('_') or name.startswith('.'):
258
- continue
259
- # Skip if it's a file (has mimetype in metadata)
260
- # Folders in Supabase typically don't have mimetype
261
- item_metadata = item.get('metadata')
262
- is_file = item_metadata and item_metadata.get('mimetype')
263
-
264
- if not is_file:
265
- dataset_names.append(name)
266
- print(f"[Supabase] Found dataset folder: {name}")
 
 
 
 
 
 
 
 
267
 
268
  datasets = []
269
  for folder_name in dataset_names:
 
239
 
240
  try:
241
  storage = client.storage.from_(BUCKET_NAME)
242
+ dataset_names = []
243
 
244
+ # Method 1: Try storage.list() with empty path
245
+ try:
246
+ items = storage.list(path="")
247
+ print(f"[Supabase] Method 1 (list path=''): {len(items)} items")
248
+ for item in items:
249
+ name = item.get('name', '')
250
+ if name and not name.startswith('_') and not name.startswith('.'):
251
+ item_metadata = item.get('metadata')
252
+ is_file = item_metadata and item_metadata.get('mimetype')
253
+ if not is_file:
254
+ dataset_names.append(name)
255
+ print(f"[Supabase] Found via list(): {name}")
256
+ except Exception as e:
257
+ print(f"[Supabase] Method 1 failed: {e}")
258
 
259
+ # Method 2: If list() returned nothing, try listing without path argument
260
+ if not dataset_names:
261
+ try:
262
+ items = storage.list()
263
+ print(f"[Supabase] Method 2 (list no args): {len(items)} items")
264
+ for item in items:
265
+ name = item.get('name', '')
266
+ if name and not name.startswith('_') and not name.startswith('.'):
267
+ item_metadata = item.get('metadata')
268
+ is_file = item_metadata and item_metadata.get('mimetype')
269
+ if not is_file:
270
+ dataset_names.append(name)
271
+ print(f"[Supabase] Found via list(): {name}")
272
+ except Exception as e:
273
+ print(f"[Supabase] Method 2 failed: {e}")
274
 
275
+ # Method 3: Fallback - read from registry file if exists
276
+ if not dataset_names:
277
+ print("[Supabase] List methods returned empty, trying registry fallback...")
278
+ try:
279
+ response = storage.download("_registry.json")
280
+ registry = json.loads(response.decode('utf-8'))
281
+ dataset_names = registry.get('datasets', [])
282
+ print(f"[Supabase] Found via registry: {dataset_names}")
283
+ except Exception as e:
284
+ print(f"[Supabase] Registry fallback failed: {e}")
285
+
286
+ # Method 4: Ultimate fallback - probe for known dataset names
287
+ if not dataset_names:
288
+ print("[Supabase] Trying direct probe for datasets...")
289
+ potential_names = ['testing', 'only_ariya___siglip', 'onlyariya_clip']
290
+ for name in potential_names:
291
+ try:
292
+ storage.download(f"{name}/metadata.json")
293
+ dataset_names.append(name)
294
+ print(f"[Supabase] Found via probe: {name}")
295
+ except:
296
+ pass
297
+
298
+ print(f"[Supabase] Total datasets found: {len(dataset_names)} - {dataset_names}")
299
 
300
  datasets = []
301
  for folder_name in dataset_names: