| | |
| | """ |
| | Upload KeyVID model to Hugging Face Hub (Optimized for Speed) |
| | """ |
| |
|
| | from pathlib import Path |
| | from huggingface_hub import HfApi, login, upload_folder |
| | import os |
| | from concurrent.futures import ThreadPoolExecutor, as_completed |
| | from tqdm import tqdm |
| | import time |
| |
|
| | |
| | MODEL_ID = "RyanWW/KeyVID" |
| |
|
| | |
| | KEYVID_PATH = "/dockerx/groups/KeyVID_hf_model" |
| |
|
| | |
| | MAX_WORKERS = 8 |
| | CHUNK_SIZE = 100 * 1024 * 1024 |
| |
|
| | def should_exclude_file(file_path, exclude_patterns): |
| | """检查文件是否应该被排除""" |
| | file_str = str(file_path) |
| | for pattern in exclude_patterns: |
| | |
| | if '*' in pattern: |
| | pattern_parts = pattern.split('*') |
| | if all(part in file_str for part in pattern_parts if part): |
| | return True |
| | elif pattern in file_str: |
| | return True |
| | return False |
| |
|
| | def get_files_to_upload(keyvid_dir, exclude_patterns): |
| | """获取需要上传的文件列表""" |
| | files = [] |
| | total_size = 0 |
| | |
| | print("🔍 Scanning files...") |
| | for file_path in tqdm(keyvid_dir.rglob("*"), desc="Scanning"): |
| | if file_path.is_file(): |
| | relative_path = file_path.relative_to(keyvid_dir) |
| | |
| | if not should_exclude_file(relative_path, exclude_patterns): |
| | file_size = file_path.stat().st_size |
| | files.append((relative_path, file_size)) |
| | total_size += file_size |
| | |
| | return files, total_size |
| |
|
| | def format_size(size_bytes): |
| | """格式化文件大小""" |
| | for unit in ['B', 'KB', 'MB', 'GB', 'TB']: |
| | if size_bytes < 1024.0: |
| | return f"{size_bytes:.2f} {unit}" |
| | size_bytes /= 1024.0 |
| | return f"{size_bytes:.2f} PB" |
| |
|
| | def upload_file_wrapper(args): |
| | """包装文件上传函数用于并发""" |
| | api, keyvid_dir, file_path, repo_id = args |
| | try: |
| | full_path = keyvid_dir / file_path |
| | api.upload_file( |
| | path_or_fileobj=str(full_path), |
| | path_in_repo=str(file_path), |
| | repo_id=repo_id, |
| | repo_type="model", |
| | commit_message="Upload KeyVID model files", |
| | |
| | ) |
| | return (file_path, True, None) |
| | except Exception as e: |
| | return (file_path, False, str(e)) |
| |
|
| | def main(): |
| | print("🚀 Starting KeyVID upload to Hugging Face (Optimized)...") |
| | print(f"Repository: {MODEL_ID}") |
| | |
| | |
| | try: |
| | api = HfApi() |
| | print("✅ Hugging Face authentication found") |
| | except Exception as e: |
| | print("⚠️ Need to authenticate with Hugging Face") |
| | print("Please run: huggingface-cli login") |
| | print("Or set HF_TOKEN environment variable") |
| | return |
| | |
| | |
| | keyvid_dir = Path(KEYVID_PATH) |
| | |
| | if not keyvid_dir.exists(): |
| | print(f"❌ Error: KeyVID directory not found at {KEYVID_PATH}") |
| | return |
| | |
| | print(f"\n📁 Directory: {keyvid_dir}") |
| | |
| | |
| | exclude_patterns = [ |
| | "__pycache__", |
| | ".git", |
| | "*.pyc", |
| | ".DS_Store", |
| | "save_results/", |
| | "*.log", |
| | "*.tmp", |
| | "error.txt", |
| | ".bash_history", |
| | ".gitignore", |
| | "upload.py", |
| | ] |
| | |
| | |
| | files_to_upload, total_size = get_files_to_upload(keyvid_dir, exclude_patterns) |
| | |
| | print(f"\n📊 Statistics:") |
| | print(f" Files to upload: {len(files_to_upload)}") |
| | print(f" Total size: {format_size(total_size)}") |
| | |
| | if len(files_to_upload) == 0: |
| | print("⚠️ No files to upload!") |
| | return |
| | |
| | |
| | response = input(f"\n❓ Proceed with upload? (y/n): ").strip().lower() |
| | if response != 'y': |
| | print("❌ Upload cancelled") |
| | return |
| | |
| | |
| | large_files = [f for f, s in files_to_upload if s > 100 * 1024 * 1024] |
| | small_files = [f for f, s in files_to_upload if s <= 100 * 1024 * 1024] |
| | |
| | print(f"\n📦 Upload strategy:") |
| | print(f" Large files (>100MB): {len(large_files)}") |
| | print(f" Small files: {len(small_files)}") |
| | print(f" Concurrent workers: {MAX_WORKERS}") |
| | |
| | |
| | print(f"\n⬆️ Uploading using optimized upload_folder...") |
| | try: |
| | start_time = time.time() |
| | |
| | |
| | ignore_patterns = [ |
| | "**/__pycache__/**", |
| | "**/.git/**", |
| | "**/*.pyc", |
| | "**/.DS_Store", |
| | "**/save_results/**", |
| | "**/*.log", |
| | "**/*.tmp", |
| | "**/upload.py", |
| | ] |
| | |
| | upload_folder( |
| | folder_path=str(keyvid_dir), |
| | repo_id=MODEL_ID, |
| | repo_type="model", |
| | ignore_patterns=ignore_patterns, |
| | commit_message="Upload KeyVID model files", |
| | multi_commits=True, |
| | multi_commits_verbose=True, |
| | ) |
| | |
| | elapsed_time = time.time() - start_time |
| | print(f"\n✅ Upload complete!") |
| | print(f"⏱️ Time taken: {elapsed_time/60:.2f} minutes") |
| | print(f"🔗 View model at: https://huggingface.co/{MODEL_ID}") |
| | |
| | except Exception as e: |
| | print(f"❌ Error with upload_folder: {e}") |
| | print("\n📝 Falling back to concurrent file upload...") |
| | |
| | |
| | start_time = time.time() |
| | failed_files = [] |
| | |
| | with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: |
| | |
| | tasks = [ |
| | (api, keyvid_dir, file_path, MODEL_ID) |
| | for file_path, _ in files_to_upload |
| | ] |
| | |
| | |
| | futures = {executor.submit(upload_file_wrapper, task): task[2] |
| | for task in tasks} |
| | |
| | with tqdm(total=len(files_to_upload), desc="Uploading") as pbar: |
| | for future in as_completed(futures): |
| | file_path, success, error = future.result() |
| | if success: |
| | pbar.update(1) |
| | else: |
| | failed_files.append((file_path, error)) |
| | pbar.update(1) |
| | |
| | elapsed_time = time.time() - start_time |
| | |
| | if failed_files: |
| | print(f"\n⚠️ {len(failed_files)} files failed to upload:") |
| | for file_path, error in failed_files[:10]: |
| | print(f" - {file_path}: {error}") |
| | else: |
| | print(f"\n✅ All files uploaded successfully!") |
| | print(f"⏱️ Time taken: {elapsed_time/60:.2f} minutes") |
| | print(f"🔗 View model at: https://huggingface.co/{MODEL_ID}") |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|
| |
|