kmcode commited on
Commit
669cedf
·
verified ·
1 Parent(s): f48e0e0

Delete trash2.py

Browse files
Files changed (1) hide show
  1. trash2.py +0 -113
trash2.py DELETED
@@ -1,113 +0,0 @@
1
- import os
2
- import subprocess
3
- from concurrent.futures import ThreadPoolExecutor
4
-
5
- # --- Configuration ---
6
- MANIFEST_ROOT = "/root/hf_repo/weights"
7
- FSX_ROOT = "/fsx_scanline/from_eyeline/koichi/motion-aware-vid-stylization/train_ckpt/Netflix65K"
8
- DEST_ROOT = "/hf_repo/weights"
9
-
10
- def find_copy_tasks():
11
- """
12
- 1. Scans MANIFEST_ROOT for model folders.
13
- 2. Scans inside those folders for specific .safetensors files.
14
- Returns: list of (model_alias, ckpt_folder, filename)
15
- """
16
- tasks = []
17
-
18
- if not os.path.exists(MANIFEST_ROOT):
19
- print(f"Error: Manifest root {MANIFEST_ROOT} does not exist.")
20
- return []
21
-
22
- print(f"Scanning {MANIFEST_ROOT} for specific safetensor files...")
23
-
24
- try:
25
- model_aliases = [d for d in os.listdir(MANIFEST_ROOT) if os.path.isdir(os.path.join(MANIFEST_ROOT, d))]
26
- except OSError as e:
27
- print(f"Error reading manifest root: {e}")
28
- return []
29
-
30
- for alias in model_aliases:
31
- alias_path = os.path.join(MANIFEST_ROOT, alias)
32
-
33
- try:
34
- # Find checkpoint folders (e.g. AllShrink_...)
35
- checkpoints = [d for d in os.listdir(alias_path) if os.path.isdir(os.path.join(alias_path, d))]
36
- except OSError:
37
- continue
38
-
39
- for ckpt_folder in checkpoints:
40
- ckpt_full_path = os.path.join(alias_path, ckpt_folder)
41
-
42
- # --- CRITICAL CHANGE ---
43
- # We look inside the folder to see WHICH safetensors file is there
44
- try:
45
- files_in_ckpt = os.listdir(ckpt_full_path)
46
- target_files = [f for f in files_in_ckpt if f.endswith('.safetensors')]
47
-
48
- if not target_files:
49
- # Optional: warning if a folder is empty
50
- # print(f"Warning: No safetensors found in {alias}/{ckpt_folder}")
51
- pass
52
-
53
- for filename in target_files:
54
- # We create a task for this SPECIFIC file
55
- tasks.append((alias, ckpt_folder, filename))
56
-
57
- except OSError:
58
- continue
59
-
60
- print(f"Found {len(tasks)} specific files to copy.")
61
- return tasks
62
-
63
- def process_copy(task):
64
- """
65
- Copies a SINGLE file from FSX to Destination using rclone copyto
66
- """
67
- alias, ckpt_folder, filename = task
68
-
69
- # 1. Source File (Flat FSX structure)
70
- # /fsx/.../Netflix65K/{ckpt_folder}/{filename}
71
- src_file_path = os.path.join(FSX_ROOT, ckpt_folder, filename)
72
-
73
- # 2. Dest File (Nested structure)
74
- # /hf_repo/weights/{alias}/{ckpt_folder}/{filename}
75
- dest_file_path = os.path.join(DEST_ROOT, alias, ckpt_folder, filename)
76
-
77
- # Check if the specific file exists on FSX source
78
- if not os.path.exists(src_file_path):
79
- print(f"[{alias}] MISSING SOURCE: {filename} (Expected at {src_file_path})")
80
- return
81
-
82
- # 3. Use 'copyto' for single files
83
- # This is more precise than 'copy'
84
- cmd = [
85
- "rclone", "copyto",
86
- src_file_path,
87
- dest_file_path,
88
- "--transfers", "4",
89
- "--progress"
90
- ]
91
-
92
- try:
93
- result = subprocess.run(cmd, capture_output=True, text=True)
94
-
95
- if result.returncode == 0:
96
- print(f"[{alias}] SUCCESS: {filename}")
97
- else:
98
- print(f"[{alias}] ERROR copying {filename}: {result.stderr.strip()}")
99
-
100
- except Exception as e:
101
- print(f"[{alias}] CRASHED on {filename}: {e}")
102
-
103
- if __name__ == "__main__":
104
- all_tasks = find_copy_tasks()
105
-
106
- # Parallel execution
107
- # Since we are copying individual large files, 4-8 workers is usually optimal
108
- if all_tasks:
109
- print(f"Starting parallel copy with {min(len(all_tasks), 8)} workers...")
110
- with ThreadPoolExecutor(max_workers=8) as executor:
111
- executor.map(process_copy, all_tasks)
112
-
113
- print("All tasks completed.")