rethinks commited on
Commit
8dfb243
·
verified ·
1 Parent(s): 2df441b

Upload supabase_storage.py

Browse files
Files changed (1) hide show
  1. supabase_storage.py +46 -84
supabase_storage.py CHANGED
@@ -11,8 +11,10 @@ import hashlib
11
  import numpy as np
12
  from typing import Optional, List, Dict, Any
13
 
14
- # Supabase credentials
15
- SUPABASE_URL = os.environ.get('SUPABASE_URL', 'https://cqnyibiopjcwuxmyqbgy.supabase.co')
 
 
16
  SUPABASE_KEY = os.environ.get('SUPABASE_KEY', '')
17
  BUCKET_NAME = 'datasets'
18
 
@@ -49,9 +51,9 @@ def is_supabase_available() -> bool:
49
 
50
  def _get_dataset_registry(client) -> List[str]:
51
  """
52
- DEPRECATED: Registry file is no longer used.
53
- Datasets are now discovered by listing folders directly via list_datasets_from_supabase().
54
- Keeping this function for backwards compatibility only.
55
  """
56
  try:
57
  storage = client.storage.from_(BUCKET_NAME)
@@ -71,11 +73,7 @@ def _get_dataset_registry(client) -> List[str]:
71
 
72
 
73
  def _update_dataset_registry(client, dataset_name: str, action: str = 'add'):
74
- """
75
- DEPRECATED: Registry file is no longer used.
76
- Datasets are now discovered by listing folders directly.
77
- Keeping this function for backwards compatibility only.
78
- """
79
  try:
80
  storage = client.storage.from_(BUCKET_NAME)
81
 
@@ -170,7 +168,8 @@ def save_dataset_to_supabase(
170
  )
171
  print(f"[Supabase] Uploaded {metadata_path}")
172
 
173
- # No registry update needed - datasets are discovered by listing folders directly
 
174
 
175
  print(f"[Supabase] Dataset '{dataset_name}' saved successfully")
176
  return True
@@ -226,103 +225,65 @@ def load_dataset_from_supabase(dataset_name: str) -> Optional[Dict[str, Any]]:
226
 
227
  def list_datasets_from_supabase() -> List[Dict[str, Any]]:
228
  """
229
- List all datasets stored in Supabase by listing folders directly.
230
- No registry file needed - just lists all folders in the bucket.
231
 
232
  Returns:
233
  List of dataset metadata dictionaries
234
  """
235
  client = get_supabase_client()
236
  if not client:
237
- print("[Supabase] Client not available")
238
  return []
239
 
240
  try:
241
  storage = client.storage.from_(BUCKET_NAME)
242
- dataset_names = []
243
 
244
- # Method 1: Try storage.list() with empty path
245
- try:
246
- items = storage.list(path="")
247
- print(f"[Supabase] Method 1 (list path=''): {len(items)} items")
248
- for item in items:
249
- name = item.get('name', '')
250
- if name and not name.startswith('_') and not name.startswith('.'):
251
- item_metadata = item.get('metadata')
252
- is_file = item_metadata and item_metadata.get('mimetype')
253
- if not is_file:
254
- dataset_names.append(name)
255
- print(f"[Supabase] Found via list(): {name}")
256
- except Exception as e:
257
- print(f"[Supabase] Method 1 failed: {e}")
258
 
259
- # Method 2: If list() returned nothing, try listing without path argument
260
- if not dataset_names:
261
- try:
262
- items = storage.list()
263
- print(f"[Supabase] Method 2 (list no args): {len(items)} items")
264
- for item in items:
265
- name = item.get('name', '')
266
- if name and not name.startswith('_') and not name.startswith('.'):
267
- item_metadata = item.get('metadata')
268
- is_file = item_metadata and item_metadata.get('mimetype')
269
- if not is_file:
270
- dataset_names.append(name)
271
- print(f"[Supabase] Found via list(): {name}")
272
- except Exception as e:
273
- print(f"[Supabase] Method 2 failed: {e}")
274
 
275
- # Method 3: Fallback - read from registry file if exists
276
- if not dataset_names:
277
- print("[Supabase] List methods returned empty, trying registry fallback...")
278
- try:
279
- response = storage.download("_registry.json")
280
- registry = json.loads(response.decode('utf-8'))
281
- dataset_names = registry.get('datasets', [])
282
- print(f"[Supabase] Found via registry: {dataset_names}")
283
- except Exception as e:
284
- print(f"[Supabase] Registry fallback failed: {e}")
285
-
286
- # Method 4: Ultimate fallback - probe for known dataset names
287
- if not dataset_names:
288
- print("[Supabase] Trying direct probe for datasets...")
289
- potential_names = ['testing', 'only_ariya___siglip', 'onlyariya_clip']
290
- for name in potential_names:
291
- try:
292
- storage.download(f"{name}/metadata.json")
293
- dataset_names.append(name)
294
- print(f"[Supabase] Found via probe: {name}")
295
- except:
296
- pass
297
-
298
- print(f"[Supabase] Total datasets found: {len(dataset_names)} - {dataset_names}")
299
 
300
- datasets = []
301
- for folder_name in dataset_names:
302
  try:
 
303
  metadata_path = f"{folder_name}/metadata.json"
304
  metadata_response = storage.download(metadata_path)
305
- metadata = json.loads(metadata_response.decode('utf-8'))
306
- metadata['folder_name'] = folder_name
307
- metadata['source'] = 'supabase'
308
  datasets.append(metadata)
309
- print(f"[Supabase] Loaded metadata for {folder_name}")
 
310
  except Exception as e:
311
- print(f"[Supabase] Could not load metadata for {folder_name}: {e}")
312
- # Add basic info without full metadata
313
  datasets.append({
314
- 'name': folder_name,
315
- 'folder_name': folder_name,
316
- 'source': 'supabase',
317
- 'total_photos': 0,
318
- 'created_at': None
319
  })
320
 
321
- print(f"[Supabase] Found {len(datasets)} datasets")
322
  return datasets
323
 
324
  except Exception as e:
325
- print(f"[Supabase] Error listing datasets: {e}")
326
  import traceback
327
  traceback.print_exc()
328
  return []
@@ -354,7 +315,8 @@ def delete_dataset_from_supabase(dataset_name: str) -> bool:
354
  client.storage.from_(BUCKET_NAME).remove(file_paths)
355
  print(f"[Supabase] Deleted {len(file_paths)} files from '{dataset_name}'")
356
 
357
- # No registry update needed - datasets are discovered by listing folders directly
 
358
 
359
  print(f"[Supabase] Dataset '{dataset_name}' deleted successfully")
360
  return True
 
11
  import numpy as np
12
  from typing import Optional, List, Dict, Any
13
 
14
+ # Supabase credentials - URL needs trailing slash for storage API
15
+ SUPABASE_URL = os.environ.get('SUPABASE_URL', 'https://cqnyibiopjcwuxmyqbgy.supabase.co/')
16
+ if not SUPABASE_URL.endswith('/'):
17
+ SUPABASE_URL = SUPABASE_URL + '/'
18
  SUPABASE_KEY = os.environ.get('SUPABASE_KEY', '')
19
  BUCKET_NAME = 'datasets'
20
 
 
51
 
52
  def _get_dataset_registry(client) -> List[str]:
53
  """
54
+ Get the list of dataset names from the registry file.
55
+ Returns None if there's an error reading (to prevent accidental overwrite).
56
+ Returns [] only if file doesn't exist yet.
57
  """
58
  try:
59
  storage = client.storage.from_(BUCKET_NAME)
 
73
 
74
 
75
  def _update_dataset_registry(client, dataset_name: str, action: str = 'add'):
76
+ """Update the registry file with dataset names."""
 
 
 
 
77
  try:
78
  storage = client.storage.from_(BUCKET_NAME)
79
 
 
168
  )
169
  print(f"[Supabase] Uploaded {metadata_path}")
170
 
171
+ # 4. Update the registry file (list of all dataset names)
172
+ _update_dataset_registry(client, dataset_name, action='add')
173
 
174
  print(f"[Supabase] Dataset '{dataset_name}' saved successfully")
175
  return True
 
225
 
226
  def list_datasets_from_supabase() -> List[Dict[str, Any]]:
227
  """
228
+ List all datasets by scanning folders directly in Supabase Storage.
229
+ No registry file needed - each folder = a dataset.
230
 
231
  Returns:
232
  List of dataset metadata dictionaries
233
  """
234
  client = get_supabase_client()
235
  if not client:
236
+ print("[Supabase] Client not available", flush=True)
237
  return []
238
 
239
  try:
240
  storage = client.storage.from_(BUCKET_NAME)
 
241
 
242
+ # List root of bucket - each folder is a dataset
243
+ items = storage.list(path="")
244
+ print(f"[Supabase] storage.list() returned {len(items)} items", flush=True)
 
 
 
 
 
 
 
 
 
 
 
245
 
246
+ datasets = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
+ for item in items:
249
+ name = item.get("name", "")
250
+ if not name:
251
+ continue
252
+
253
+ # Skip hidden files and registry file
254
+ if name.startswith("_") or name.startswith("."):
255
+ print(f"[Supabase] Skipping: {name}", flush=True)
256
+ continue
257
+
258
+ folder_name = name
259
+ print(f"[Supabase] Found folder: {folder_name}", flush=True)
 
 
 
 
 
 
 
 
 
 
 
 
260
 
 
 
261
  try:
262
+ # Load metadata.json to verify it's a valid dataset
263
  metadata_path = f"{folder_name}/metadata.json"
264
  metadata_response = storage.download(metadata_path)
265
+ metadata = json.loads(metadata_response.decode("utf-8"))
266
+ metadata["folder_name"] = folder_name
267
+ metadata["source"] = "supabase"
268
  datasets.append(metadata)
269
+ print(f"[Supabase] Loaded dataset: {folder_name}", flush=True)
270
+
271
  except Exception as e:
272
+ # Still add as dataset even without metadata
273
+ print(f"[Supabase] No metadata for {folder_name}: {e}", flush=True)
274
  datasets.append({
275
+ "name": folder_name,
276
+ "folder_name": folder_name,
277
+ "source": "supabase",
278
+ "total_photos": 0,
279
+ "created_at": None
280
  })
281
 
282
+ print(f"[Supabase] Total datasets found: {len(datasets)}", flush=True)
283
  return datasets
284
 
285
  except Exception as e:
286
+ print(f"[Supabase] Error listing datasets: {e}", flush=True)
287
  import traceback
288
  traceback.print_exc()
289
  return []
 
315
  client.storage.from_(BUCKET_NAME).remove(file_paths)
316
  print(f"[Supabase] Deleted {len(file_paths)} files from '{dataset_name}'")
317
 
318
+ # Remove from registry
319
+ _update_dataset_registry(client, dataset_name, action='remove')
320
 
321
  print(f"[Supabase] Dataset '{dataset_name}' deleted successfully")
322
  return True