James Edmunds commited on
Commit ·
a0ddd95
1
Parent(s): fecd36c
revert: restore original upload script
Browse files- scripts/upload_embeddings.py +9 -73
scripts/upload_embeddings.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
| 1 |
"""Upload embeddings to HuggingFace Space"""
|
| 2 |
import sys
|
| 3 |
-
import os
|
| 4 |
-
import time
|
| 5 |
from pathlib import Path
|
| 6 |
-
from huggingface_hub import HfApi
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
|
| 9 |
# Add parent directory to path
|
|
@@ -33,57 +31,6 @@ def verify_space_access():
|
|
| 33 |
return False
|
| 34 |
|
| 35 |
|
| 36 |
-
def wait_for_file_upload(api: HfApi, file_path: Path, repo_path: str):
|
| 37 |
-
"""Wait for a file to appear in the repository"""
|
| 38 |
-
max_attempts = 30 # 5 minutes total
|
| 39 |
-
for attempt in range(max_attempts):
|
| 40 |
-
try:
|
| 41 |
-
# Check if file exists in repo
|
| 42 |
-
files = api.list_repo_files(
|
| 43 |
-
repo_id=Settings.HF_SPACE,
|
| 44 |
-
repo_type="space"
|
| 45 |
-
)
|
| 46 |
-
if repo_path in files:
|
| 47 |
-
return True
|
| 48 |
-
|
| 49 |
-
print(f"Waiting for {file_path.name} to appear in repository... ({attempt + 1}/{max_attempts})")
|
| 50 |
-
time.sleep(10) # Wait 10 seconds between checks
|
| 51 |
-
|
| 52 |
-
except Exception as e:
|
| 53 |
-
print(f"Error checking file status: {str(e)}")
|
| 54 |
-
|
| 55 |
-
return False
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def upload_large_file(api: HfApi, file_path: Path, repo_path: str):
|
| 59 |
-
"""Upload a single large file to the Space"""
|
| 60 |
-
file_size = file_path.stat().st_size / (1024 * 1024) # Size in MB
|
| 61 |
-
print(f"Uploading {file_path.name} ({file_size:.2f} MB)...")
|
| 62 |
-
|
| 63 |
-
# Create the commit operation
|
| 64 |
-
operation = CommitOperationAdd(
|
| 65 |
-
path_in_repo=str(repo_path),
|
| 66 |
-
path_or_fileobj=str(file_path)
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
# Upload the file
|
| 70 |
-
api.create_commit(
|
| 71 |
-
repo_id=Settings.HF_SPACE,
|
| 72 |
-
repo_type="space",
|
| 73 |
-
operations=[operation],
|
| 74 |
-
commit_message=f"Upload {file_path.name}" # Removed async flag
|
| 75 |
-
)
|
| 76 |
-
|
| 77 |
-
# Wait for file to appear in repository
|
| 78 |
-
if file_size > 100: # Only wait for files larger than 100MB
|
| 79 |
-
if wait_for_file_upload(api, file_path, repo_path):
|
| 80 |
-
print(f"Successfully uploaded and verified {file_path.name}")
|
| 81 |
-
else:
|
| 82 |
-
raise RuntimeError(f"Failed to verify upload of {file_path.name}")
|
| 83 |
-
else:
|
| 84 |
-
print(f"Successfully uploaded {file_path.name}")
|
| 85 |
-
|
| 86 |
-
|
| 87 |
def main():
|
| 88 |
"""Upload embeddings directory to HuggingFace Space"""
|
| 89 |
if not Settings.HF_TOKEN:
|
|
@@ -105,25 +52,14 @@ def main():
|
|
| 105 |
|
| 106 |
print(f"Uploading to Space: {Settings.HF_SPACE}...")
|
| 107 |
try:
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
repo_path = f"data/processed/embeddings/{rel_path}"
|
| 117 |
-
|
| 118 |
-
try:
|
| 119 |
-
# Upload the file
|
| 120 |
-
upload_large_file(api, file_path, repo_path)
|
| 121 |
-
except Exception as e:
|
| 122 |
-
print(f"Error uploading {file_path.name}: {str(e)}")
|
| 123 |
-
if "storage" in str(e).lower():
|
| 124 |
-
print(f"\nFile size: {file_path.stat().st_size / (1024*1024):.2f} MB")
|
| 125 |
-
raise
|
| 126 |
-
|
| 127 |
print("Upload complete!")
|
| 128 |
except Exception as e:
|
| 129 |
print(f"Error during upload: {str(e)}")
|
|
|
|
| 1 |
"""Upload embeddings to HuggingFace Space"""
|
| 2 |
import sys
|
|
|
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
+
from huggingface_hub import HfApi
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
|
| 7 |
# Add parent directory to path
|
|
|
|
| 31 |
return False
|
| 32 |
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
def main():
|
| 35 |
"""Upload embeddings directory to HuggingFace Space"""
|
| 36 |
if not Settings.HF_TOKEN:
|
|
|
|
| 52 |
|
| 53 |
print(f"Uploading to Space: {Settings.HF_SPACE}...")
|
| 54 |
try:
|
| 55 |
+
api.upload_folder(
|
| 56 |
+
folder_path=str(Settings.EMBEDDINGS_DIR),
|
| 57 |
+
repo_id=Settings.HF_SPACE,
|
| 58 |
+
repo_type="space",
|
| 59 |
+
path_in_repo="data/processed/embeddings",
|
| 60 |
+
ignore_patterns=["*.pyc", "__pycache__", ".DS_Store"],
|
| 61 |
+
commit_message="Upload embeddings to Space storage"
|
| 62 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
print("Upload complete!")
|
| 64 |
except Exception as e:
|
| 65 |
print(f"Error during upload: {str(e)}")
|