Spaces:
Sleeping
Sleeping
Ara Yeroyan
commited on
Commit
Β·
39edab4
1
Parent(s):
54bf55f
add upload debug
Browse files- upload_to_gemini_filestore.py +65 -17
upload_to_gemini_filestore.py
CHANGED
|
@@ -74,29 +74,53 @@ def extract_metadata_from_path(file_path: Path) -> Dict[str, Any]:
|
|
| 74 |
|
| 75 |
def get_or_create_filestore(client: genai.Client, store_name: Optional[str] = None) -> str:
|
| 76 |
"""Get existing file search store or create a new one."""
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
display_name = store_name or "Audit Reports"
|
| 90 |
-
print(f"π Creating new file search store: '{display_name}'...")
|
| 91 |
|
| 92 |
try:
|
| 93 |
file_search_store = client.file_search_stores.create(
|
| 94 |
config={'display_name': display_name}
|
| 95 |
)
|
| 96 |
-
|
|
|
|
|
|
|
| 97 |
return file_search_store.name
|
| 98 |
except Exception as e:
|
| 99 |
-
print(f"β Failed to create store: {e}")
|
| 100 |
raise
|
| 101 |
|
| 102 |
|
|
@@ -207,7 +231,21 @@ def upload_file_to_store(
|
|
| 207 |
# Metadata would need to be added via a separate API call after upload, if supported
|
| 208 |
# For now, we upload without metadata - the filename in display_name contains the info
|
| 209 |
upload_params['config'] = config
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
# Wait for import to complete
|
| 213 |
max_wait = 300 # 5 minutes max per file
|
|
@@ -324,7 +362,17 @@ def main():
|
|
| 324 |
# Get or create file search store
|
| 325 |
print(f"\nπ¦ Setting up file search store...")
|
| 326 |
try:
|
| 327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
except Exception as e:
|
| 329 |
print(f" β Failed to setup store: {e}")
|
| 330 |
return 1
|
|
|
|
| 74 |
|
| 75 |
def get_or_create_filestore(client: genai.Client, store_name: Optional[str] = None) -> str:
|
| 76 |
"""Get existing file search store or create a new one."""
|
| 77 |
+
# First, try to list all existing stores
|
| 78 |
+
try:
|
| 79 |
+
stores = list(client.file_search_stores.list())
|
| 80 |
+
print(f" π Found {len(stores)} existing store(s)")
|
| 81 |
+
|
| 82 |
+
if stores:
|
| 83 |
+
# If store_name is provided, try to match it
|
| 84 |
+
if store_name:
|
| 85 |
+
for store in stores:
|
| 86 |
+
# Check both name (full path like "fileSearchStores/xxx") and display_name
|
| 87 |
+
store_name_match = store.name == store_name or store.name.endswith(store_name)
|
| 88 |
+
display_name_match = store.display_name == store_name
|
| 89 |
+
|
| 90 |
+
# Also check if store_name is just the ID part
|
| 91 |
+
store_id = store.name.split("/")[-1] if "/" in store.name else store.name
|
| 92 |
+
id_match = store_id == store_name
|
| 93 |
+
|
| 94 |
+
if store_name_match or display_name_match or id_match:
|
| 95 |
+
print(f" β
Using existing store: {store.display_name} ({store.name})")
|
| 96 |
+
print(f" π‘ Store ID: {store_id} (use this in GEMINI_FILESTORE_NAME)")
|
| 97 |
+
return store.name
|
| 98 |
+
|
| 99 |
+
# If no store_name provided, use the most recent store
|
| 100 |
+
if not store_name and stores:
|
| 101 |
+
latest_store = stores[-1] # Most recent
|
| 102 |
+
store_id = latest_store.name.split("/")[-1] if "/" in latest_store.name else latest_store.name
|
| 103 |
+
print(f" β
Using most recent store: {latest_store.display_name} ({latest_store.name})")
|
| 104 |
+
print(f" π‘ Store ID: {store_id} (use this in GEMINI_FILESTORE_NAME)")
|
| 105 |
+
return latest_store.name
|
| 106 |
+
except Exception as e:
|
| 107 |
+
print(f" β οΈ Could not list stores: {e}")
|
| 108 |
+
print(f" π Will create new store...")
|
| 109 |
+
|
| 110 |
+
# Create new store only if no existing store found
|
| 111 |
display_name = store_name or "Audit Reports"
|
| 112 |
+
print(f" π Creating new file search store: '{display_name}'...")
|
| 113 |
|
| 114 |
try:
|
| 115 |
file_search_store = client.file_search_stores.create(
|
| 116 |
config={'display_name': display_name}
|
| 117 |
)
|
| 118 |
+
store_id = file_search_store.name.split("/")[-1] if "/" in file_search_store.name else file_search_store.name
|
| 119 |
+
print(f" β
Created store: {file_search_store.display_name} ({file_search_store.name})")
|
| 120 |
+
print(f" π‘ Store ID: {store_id} (use this in GEMINI_FILESTORE_NAME)")
|
| 121 |
return file_search_store.name
|
| 122 |
except Exception as e:
|
| 123 |
+
print(f" β Failed to create store: {e}")
|
| 124 |
raise
|
| 125 |
|
| 126 |
|
|
|
|
| 231 |
# Metadata would need to be added via a separate API call after upload, if supported
|
| 232 |
# For now, we upload without metadata - the filename in display_name contains the info
|
| 233 |
upload_params['config'] = config
|
| 234 |
+
|
| 235 |
+
try:
|
| 236 |
+
operation = client.file_search_stores.upload_to_file_search_store(**upload_params)
|
| 237 |
+
except Exception as upload_error:
|
| 238 |
+
error_str = str(upload_error).lower()
|
| 239 |
+
error_msg = str(upload_error)
|
| 240 |
+
|
| 241 |
+
# Check if it's a "terminated" or "already exists" error
|
| 242 |
+
if 'terminated' in error_str or 'already' in error_str or '400' in error_str:
|
| 243 |
+
print(f" β οΈ Upload error: File may already exist or upload was interrupted")
|
| 244 |
+
print(f" π‘ Error details: {error_msg}")
|
| 245 |
+
print(f" π‘ Skipping this file")
|
| 246 |
+
return None # Return None to indicate "skipped"
|
| 247 |
+
# Re-raise if it's a different error
|
| 248 |
+
raise
|
| 249 |
|
| 250 |
# Wait for import to complete
|
| 251 |
max_wait = 300 # 5 minutes max per file
|
|
|
|
| 362 |
# Get or create file search store
|
| 363 |
print(f"\nπ¦ Setting up file search store...")
|
| 364 |
try:
|
| 365 |
+
store_name_full = get_or_create_filestore(client, store_name)
|
| 366 |
+
# Store the full name for upload, but also save just the ID for .env reference
|
| 367 |
+
# The full name is like "fileSearchStores/audit-reports-xxx"
|
| 368 |
+
# For API calls, we need just the ID part (after fileSearchStores/)
|
| 369 |
+
if store_name_full.startswith("fileSearchStores/"):
|
| 370 |
+
store_id = store_name_full.split("/", 1)[1]
|
| 371 |
+
print(f" π‘ Store ID (for GEMINI_FILESTORE_NAME env var): {store_id}")
|
| 372 |
+
else:
|
| 373 |
+
store_id = store_name_full
|
| 374 |
+
# Use full name for upload operations
|
| 375 |
+
store_name = store_name_full
|
| 376 |
except Exception as e:
|
| 377 |
print(f" β Failed to setup store: {e}")
|
| 378 |
return 1
|