James Edmunds commited on
Commit
a0ddd95
·
1 Parent(s): fecd36c

revert: restore original upload script

Browse files
Files changed (1) hide show
  1. scripts/upload_embeddings.py +9 -73
scripts/upload_embeddings.py CHANGED
@@ -1,9 +1,7 @@
1
  """Upload embeddings to HuggingFace Space"""
2
  import sys
3
- import os
4
- import time
5
  from pathlib import Path
6
- from huggingface_hub import HfApi, CommitOperationAdd
7
  from dotenv import load_dotenv
8
 
9
  # Add parent directory to path
@@ -33,57 +31,6 @@ def verify_space_access():
33
  return False
34
 
35
 
36
- def wait_for_file_upload(api: HfApi, file_path: Path, repo_path: str):
37
- """Wait for a file to appear in the repository"""
38
- max_attempts = 30 # 5 minutes total
39
- for attempt in range(max_attempts):
40
- try:
41
- # Check if file exists in repo
42
- files = api.list_repo_files(
43
- repo_id=Settings.HF_SPACE,
44
- repo_type="space"
45
- )
46
- if repo_path in files:
47
- return True
48
-
49
- print(f"Waiting for {file_path.name} to appear in repository... ({attempt + 1}/{max_attempts})")
50
- time.sleep(10) # Wait 10 seconds between checks
51
-
52
- except Exception as e:
53
- print(f"Error checking file status: {str(e)}")
54
-
55
- return False
56
-
57
-
58
- def upload_large_file(api: HfApi, file_path: Path, repo_path: str):
59
- """Upload a single large file to the Space"""
60
- file_size = file_path.stat().st_size / (1024 * 1024) # Size in MB
61
- print(f"Uploading {file_path.name} ({file_size:.2f} MB)...")
62
-
63
- # Create the commit operation
64
- operation = CommitOperationAdd(
65
- path_in_repo=str(repo_path),
66
- path_or_fileobj=str(file_path)
67
- )
68
-
69
- # Upload the file
70
- api.create_commit(
71
- repo_id=Settings.HF_SPACE,
72
- repo_type="space",
73
- operations=[operation],
74
- commit_message=f"Upload {file_path.name}" # Removed async flag
75
- )
76
-
77
- # Wait for file to appear in repository
78
- if file_size > 100: # Only wait for files larger than 100MB
79
- if wait_for_file_upload(api, file_path, repo_path):
80
- print(f"Successfully uploaded and verified {file_path.name}")
81
- else:
82
- raise RuntimeError(f"Failed to verify upload of {file_path.name}")
83
- else:
84
- print(f"Successfully uploaded {file_path.name}")
85
-
86
-
87
  def main():
88
  """Upload embeddings directory to HuggingFace Space"""
89
  if not Settings.HF_TOKEN:
@@ -105,25 +52,14 @@ def main():
105
 
106
  print(f"Uploading to Space: {Settings.HF_SPACE}...")
107
  try:
108
- # Upload each file individually
109
- for file_path in Settings.EMBEDDINGS_DIR.rglob('*'):
110
- if file_path.is_file() and not any(
111
- pattern in str(file_path)
112
- for pattern in ['*.pyc', '__pycache__', '.DS_Store']
113
- ):
114
- # Calculate relative path for the repo
115
- rel_path = file_path.relative_to(Settings.EMBEDDINGS_DIR)
116
- repo_path = f"data/processed/embeddings/{rel_path}"
117
-
118
- try:
119
- # Upload the file
120
- upload_large_file(api, file_path, repo_path)
121
- except Exception as e:
122
- print(f"Error uploading {file_path.name}: {str(e)}")
123
- if "storage" in str(e).lower():
124
- print(f"\nFile size: {file_path.stat().st_size / (1024*1024):.2f} MB")
125
- raise
126
-
127
  print("Upload complete!")
128
  except Exception as e:
129
  print(f"Error during upload: {str(e)}")
 
1
  """Upload embeddings to HuggingFace Space"""
2
  import sys
 
 
3
  from pathlib import Path
4
+ from huggingface_hub import HfApi
5
  from dotenv import load_dotenv
6
 
7
  # Add parent directory to path
 
31
  return False
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def main():
35
  """Upload embeddings directory to HuggingFace Space"""
36
  if not Settings.HF_TOKEN:
 
52
 
53
  print(f"Uploading to Space: {Settings.HF_SPACE}...")
54
  try:
55
+ api.upload_folder(
56
+ folder_path=str(Settings.EMBEDDINGS_DIR),
57
+ repo_id=Settings.HF_SPACE,
58
+ repo_type="space",
59
+ path_in_repo="data/processed/embeddings",
60
+ ignore_patterns=["*.pyc", "__pycache__", ".DS_Store"],
61
+ commit_message="Upload embeddings to Space storage"
62
+ )
 
 
 
 
 
 
 
 
 
 
 
63
  print("Upload complete!")
64
  except Exception as e:
65
  print(f"Error during upload: {str(e)}")