rethinks commited on
Commit
c0a1bbe
·
verified ·
1 Parent(s): cd40c5f

Upload supabase_storage.py

Browse files
Files changed (1) hide show
  1. supabase_storage.py +29 -31
supabase_storage.py CHANGED
@@ -49,9 +49,9 @@ def is_supabase_available() -> bool:
49
 
50
  def _get_dataset_registry(client) -> List[str]:
51
  """
52
- Get the list of dataset names from the registry file.
53
- Returns None if there's an error reading (to prevent accidental overwrite).
54
- Returns [] only if file doesn't exist yet.
55
  """
56
  try:
57
  storage = client.storage.from_(BUCKET_NAME)
@@ -71,7 +71,11 @@ def _get_dataset_registry(client) -> List[str]:
71
 
72
 
73
  def _update_dataset_registry(client, dataset_name: str, action: str = 'add'):
74
- """Update the registry file with dataset names."""
 
 
 
 
75
  try:
76
  storage = client.storage.from_(BUCKET_NAME)
77
 
@@ -166,8 +170,7 @@ def save_dataset_to_supabase(
166
  )
167
  print(f"[Supabase] Uploaded {metadata_path}")
168
 
169
- # 4. Update the registry file (list of all dataset names)
170
- _update_dataset_registry(client, dataset_name, action='add')
171
 
172
  print(f"[Supabase] Dataset '{dataset_name}' saved successfully")
173
  return True
@@ -223,7 +226,8 @@ def load_dataset_from_supabase(dataset_name: str) -> Optional[Dict[str, Any]]:
223
 
224
  def list_datasets_from_supabase() -> List[Dict[str, Any]]:
225
  """
226
- List all datasets stored in Supabase.
 
227
 
228
  Returns:
229
  List of dataset metadata dictionaries
@@ -236,28 +240,23 @@ def list_datasets_from_supabase() -> List[Dict[str, Any]]:
236
  try:
237
  storage = client.storage.from_(BUCKET_NAME)
238
 
239
- # Get dataset names from registry
240
- dataset_names = _get_dataset_registry(client)
241
- print(f"[Supabase] Registry contains: {dataset_names}")
242
-
243
- # If registry read failed (None), return empty to be safe
244
- if dataset_names is None:
245
- print("[Supabase] Could not read registry, returning empty list")
246
- return []
247
-
248
- # If registry is empty, try to find existing datasets by checking known names
249
- # This handles the case where datasets were saved before registry was implemented
250
- if not dataset_names:
251
- print("[Supabase] Registry empty, checking for existing datasets...")
252
- # Try some known/common dataset names
253
- potential_names = ['testing']
254
- for name in potential_names:
255
- try:
256
- storage.download(f"{name}/metadata.json")
257
- dataset_names.append(name)
258
- print(f"[Supabase] Found existing dataset: {name}")
259
- except Exception:
260
- pass
261
 
262
  datasets = []
263
  for folder_name in dataset_names:
@@ -316,8 +315,7 @@ def delete_dataset_from_supabase(dataset_name: str) -> bool:
316
  client.storage.from_(BUCKET_NAME).remove(file_paths)
317
  print(f"[Supabase] Deleted {len(file_paths)} files from '{dataset_name}'")
318
 
319
- # Remove from registry
320
- _update_dataset_registry(client, dataset_name, action='remove')
321
 
322
  print(f"[Supabase] Dataset '{dataset_name}' deleted successfully")
323
  return True
 
49
 
50
  def _get_dataset_registry(client) -> List[str]:
51
  """
52
+ DEPRECATED: Registry file is no longer used.
53
+ Datasets are now discovered by listing folders directly via list_datasets_from_supabase().
54
+ Keeping this function for backwards compatibility only.
55
  """
56
  try:
57
  storage = client.storage.from_(BUCKET_NAME)
 
71
 
72
 
73
  def _update_dataset_registry(client, dataset_name: str, action: str = 'add'):
74
+ """
75
+ DEPRECATED: Registry file is no longer used.
76
+ Datasets are now discovered by listing folders directly.
77
+ Keeping this function for backwards compatibility only.
78
+ """
79
  try:
80
  storage = client.storage.from_(BUCKET_NAME)
81
 
 
170
  )
171
  print(f"[Supabase] Uploaded {metadata_path}")
172
 
173
+ # No registry update needed - datasets are discovered by listing folders directly
 
174
 
175
  print(f"[Supabase] Dataset '{dataset_name}' saved successfully")
176
  return True
 
226
 
227
  def list_datasets_from_supabase() -> List[Dict[str, Any]]:
228
  """
229
+ List all datasets stored in Supabase by listing folders directly.
230
+ No registry file needed - just lists all folders in the bucket.
231
 
232
  Returns:
233
  List of dataset metadata dictionaries
 
240
  try:
241
  storage = client.storage.from_(BUCKET_NAME)
242
 
243
+ # List all items in the bucket root
244
+ items = storage.list()
245
+ print(f"[Supabase] Bucket contains {len(items)} items")
246
+
247
+ # Filter to get only folders (datasets), excluding files like _registry.json
248
+ # Folders have metadata.id = None or have no 'metadata' key with 'mimetype'
249
+ dataset_names = []
250
+ for item in items:
251
+ name = item.get('name', '')
252
+ # Skip hidden files and registry
253
+ if name.startswith('_') or name.startswith('.'):
254
+ continue
255
+ # Check if it's a folder (no metadata.mimetype means it's a folder)
256
+ item_metadata = item.get('metadata')
257
+ if item_metadata is None or item_metadata.get('mimetype') is None:
258
+ dataset_names.append(name)
259
+ print(f"[Supabase] Found dataset folder: {name}")
 
 
 
 
 
260
 
261
  datasets = []
262
  for folder_name in dataset_names:
 
315
  client.storage.from_(BUCKET_NAME).remove(file_paths)
316
  print(f"[Supabase] Deleted {len(file_paths)} files from '{dataset_name}'")
317
 
318
+ # No registry update needed - datasets are discovered by listing folders directly
 
319
 
320
  print(f"[Supabase] Dataset '{dataset_name}' deleted successfully")
321
  return True