samfred2 commited on
Commit
a9d12bc
·
verified ·
1 Parent(s): 983b612

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +154 -0
main.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import zipfile
5
+ import shutil
6
+ import cv2
7
+ from pathlib import Path
8
+ from huggingface_hub import HfApi, list_repo_files, hf_hub_download
9
+
10
+ # ==== CONFIGURATION ====
11
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
12
+ SOURCE_REPO_ID = "factorstudios/movs"
13
+ TARGET_REPO_ID = "factorstudios/movzip"
14
+
15
+ # Path Configuration
16
+ DOWNLOAD_DIR = "downloads"
17
+ FRAMES_DIR = "frames"
18
+ ZIPS_DIR = "zips"
19
+
20
+ # Processing Parameters
21
+ TARGET_FPS = 10
22
+ STATE_FILE = "processing_state.json"
23
+
24
+ # Ensure directories exist
25
+ for d in [DOWNLOAD_DIR, FRAMES_DIR, ZIPS_DIR]:
26
+ os.makedirs(d, exist_ok=True)
27
+
28
+ api = HfApi(token=HF_TOKEN)
29
+
30
+ def log(message):
31
+ print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {message}")
32
+
33
+ def load_state():
34
+ if os.path.exists(STATE_FILE):
35
+ with open(STATE_FILE, 'r') as f:
36
+ return json.load(f)
37
+ return {"processed_files": []}
38
+
39
+ def save_state(state):
40
+ with open(STATE_FILE, 'w') as f:
41
+ json.dump(state, f, indent=2)
42
+
43
+ def extract_frames(video_path, output_dir, fps=TARGET_FPS):
44
+ """Extract frames from video at specified FPS"""
45
+ os.makedirs(output_dir, exist_ok=True)
46
+ cap = cv2.VideoCapture(str(video_path))
47
+ if not cap.isOpened():
48
+ log(f"Error: Could not open video {video_path}")
49
+ return 0
50
+
51
+ video_fps = cap.get(cv2.CAP_PROP_FPS)
52
+ if not video_fps or video_fps <= 0:
53
+ video_fps = 30
54
+
55
+ frame_interval = max(1, int(round(video_fps / fps)))
56
+
57
+ frame_idx = 0
58
+ saved_count = 0
59
+
60
+ while True:
61
+ ret, frame = cap.read()
62
+ if not ret:
63
+ break
64
+
65
+ if frame_idx % frame_interval == 0:
66
+ saved_count += 1
67
+ frame_name = f"{saved_count:06d}.jpg"
68
+ cv2.imwrite(os.path.join(output_dir, frame_name), frame, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
69
+
70
+ frame_idx += 1
71
+
72
+ cap.release()
73
+ return saved_count
74
+
75
+ def zip_folder(folder_path, zip_path):
76
+ """Zip all files in a folder"""
77
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
78
+ for root, _, files in os.walk(folder_path):
79
+ for file in files:
80
+ zipf.write(os.path.join(root, file), arcname=file)
81
+
82
+ def process_movies():
83
+ state = load_state()
84
+ log(f"Starting movie processing. Source: {SOURCE_REPO_ID}, Target: {TARGET_REPO_ID}")
85
+
86
+ try:
87
+ # Get list of files in source repo
88
+ files = list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset")
89
+ # Filter for video files
90
+ video_extensions = ('.mp4', '.mkv', '.avi', '.mov', '.webm')
91
+ videos = [f for f in files if f.lower().endswith(video_extensions)]
92
+
93
+ log(f"Found {len(videos)} videos in source repository.")
94
+
95
+ for video_file in videos:
96
+ if video_file in state["processed_files"]:
97
+ log(f"Skipping already processed file: {video_file}")
98
+ continue
99
+
100
+ log(f"Processing: {video_file}")
101
+
102
+ # 1. Download
103
+ local_video_path = hf_hub_download(
104
+ repo_id=SOURCE_REPO_ID,
105
+ filename=video_file,
106
+ repo_type="dataset",
107
+ local_dir=DOWNLOAD_DIR,
108
+ token=HF_TOKEN
109
+ )
110
+
111
+ # 2. Extract Frames
112
+ video_name = Path(video_file).stem
113
+ video_frames_dir = os.path.join(FRAMES_DIR, video_name)
114
+ log(f"Extracting frames for {video_name}...")
115
+ frame_count = extract_frames(local_video_path, video_frames_dir)
116
+ log(f"Extracted {frame_count} frames.")
117
+
118
+ if frame_count > 0:
119
+ # 3. Zip
120
+ zip_filename = f"{video_name}_frames.zip"
121
+ zip_path = os.path.join(ZIPS_DIR, zip_filename)
122
+ log(f"Zipping frames to {zip_filename}...")
123
+ zip_folder(video_frames_dir, zip_path)
124
+
125
+ # 4. Upload
126
+ log(f"Uploading {zip_filename} to {TARGET_REPO_ID}...")
127
+ api.upload_file(
128
+ path_or_fileobj=zip_path,
129
+ path_in_repo=zip_filename,
130
+ repo_id=TARGET_REPO_ID,
131
+ repo_type="dataset"
132
+ )
133
+ log(f"Successfully uploaded {zip_filename}")
134
+
135
+ # Update state
136
+ state["processed_files"].append(video_file)
137
+ save_state(state)
138
+
139
+ # 5. Cleanup
140
+ log(f"Cleaning up local files for {video_name}...")
141
+ if os.path.exists(video_frames_dir):
142
+ shutil.rmtree(video_frames_dir)
143
+ if os.path.exists(local_video_path):
144
+ os.remove(local_video_path)
145
+ if 'zip_path' in locals() and os.path.exists(zip_path):
146
+ os.remove(zip_path)
147
+
148
+ log("All available videos processed.")
149
+
150
+ except Exception as e:
151
+ log(f"Fatal error during processing: {str(e)}")
152
+
153
+ if __name__ == "__main__":
154
+ process_movies()