factorstudios commited on
Commit
16bb6c6
Β·
verified Β·
1 Parent(s): 964ff65

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +141 -97
server.py CHANGED
@@ -35,10 +35,13 @@ if not HF_TOKEN:
35
  # Configuration
36
  HF_DATASET_REPO = "factorstudios/movs"
37
  READY_VIDEOS_FOLDER = "ready_videos"
 
38
  SIZE_THRESHOLD_MB = 100 # Compress videos above 100MB
39
  CACHE_DIR = "/tmp/video_compress_cache"
40
  TEMP_DIR = Path("/tmp/video_compression_server")
41
  TEMP_DIR.mkdir(exist_ok=True)
 
 
42
 
43
  # Global state
44
  compression_state = {
@@ -91,7 +94,7 @@ def get_video_bitrate(video_path: str) -> float:
91
 
92
  def compress_video(input_path: str, output_path: str) -> Dict:
93
  """
94
- Compress video using H.265 with maximum quality preservation.
95
  Returns dict with compression stats.
96
  """
97
  print(f"\n{'='*80}")
@@ -103,50 +106,117 @@ def compress_video(input_path: str, output_path: str) -> Dict:
103
  original_size = os.path.getsize(input_path) / (1024**2)
104
 
105
  print(f"Original: {original_size:.1f} MB | Duration: {duration:.1f}s")
 
106
 
107
- # H.265 encoding with maximum quality
 
 
 
 
 
 
 
108
  ffmpeg_cmd = [
109
  "ffmpeg", "-y",
110
  "-i", input_path,
111
  "-c:v", "libx265",
112
- "-preset", "veryslow",
113
- "-crf", "20",
114
- "-x265-params", "aq-mode=3",
115
- "-pix_fmt", "yuv420p10le",
 
 
 
116
  "-c:a", "aac",
117
- "-b:a", "192k",
 
118
  output_path
119
  ]
120
 
121
- result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
122
-
123
- if result.returncode != 0:
124
- print(f"βœ— Encoding failed: {result.stderr[:500]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  return None
126
-
127
- if not os.path.exists(output_path):
128
- print(f"βœ— Output file not created")
129
  return None
130
-
131
- compressed_size = os.path.getsize(output_path) / (1024**2)
132
- compression_ratio = original_size / compressed_size
133
- saved_mb = original_size - compressed_size
134
-
135
- print(f"βœ“ Compressed: {original_size:.1f}MB β†’ {compressed_size:.1f}MB")
136
- print(f" Saved: {saved_mb:.1f}MB ({(saved_mb/original_size*100):.1f}%)")
137
- print(f" Ratio: {compression_ratio:.2f}x")
138
-
139
- return {
140
- "original_size_mb": round(original_size, 2),
141
- "compressed_size_mb": round(compressed_size, 2),
142
- "saved_mb": round(saved_mb, 2),
143
- "compression_ratio": round(compression_ratio, 2),
144
- "duration_seconds": duration
145
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
 
148
  async def scan_and_compress_videos():
149
- """Scan ready_videos folder and compress large videos."""
150
  if compression_state["is_polling"]:
151
  print("Already polling, skipping...")
152
  return
@@ -154,6 +224,15 @@ async def scan_and_compress_videos():
154
  compression_state["is_polling"] = True
155
 
156
  try:
 
 
 
 
 
 
 
 
 
157
  print("\n" + "="*80)
158
  print("SCANNING FOR LARGE VIDEOS")
159
  print("="*80)
@@ -165,11 +244,18 @@ async def scan_and_compress_videos():
165
  token=HF_TOKEN
166
  )
167
 
168
- # Find video files > 100MB in ready_videos folder
169
  video_files = []
170
  for f in files:
171
  if f.startswith(f"{READY_VIDEOS_FOLDER}/") and f.endswith(".mp4"):
172
- # Get file info from HF Hub
 
 
 
 
 
 
 
173
  try:
174
  # Parse file path: ready_videos/moviename/segment-XX.mp4
175
  parts = f.split("/")
@@ -185,33 +271,16 @@ async def scan_and_compress_videos():
185
  print(f"Error parsing file {f}: {e}")
186
  continue
187
 
188
- print(f"Found {len(video_files)} video files")
189
-
190
- # Check sizes and filter large ones
191
- large_videos = []
192
- for video_info in video_files:
193
- # Skip if already processed
194
- if any(pv["path"] == video_info["path"] for pv in compression_state["processed_videos"]):
195
- print(f" ⊘ {video_info['file_name']} (already processed)")
196
- continue
197
-
198
- # Skip if already failed
199
- if any(fv["path"] == video_info["path"] for fv in compression_state["failed_videos"]):
200
- print(f" βœ— {video_info['file_name']} (previously failed)")
201
- continue
202
-
203
- large_videos.append(video_info)
204
-
205
- compression_state["total_found"] = len(large_videos)
206
- print(f"Unprocessed videos: {len(large_videos)}")
207
 
208
- if not large_videos:
209
  print("βœ“ All videos already processed!")
210
  compression_state["is_polling"] = False
211
  return
212
 
213
  # Process each video
214
- for video_info in large_videos:
215
  compression_state["current_video"] = video_info["file_name"]
216
 
217
  try:
@@ -231,18 +300,21 @@ async def scan_and_compress_videos():
231
  file_size_mb = os.path.getsize(video_path) / (1024**2)
232
  print(f"βœ“ Downloaded: {file_size_mb:.1f} MB")
233
 
234
- # Skip if smaller than threshold
235
  if file_size_mb < SIZE_THRESHOLD_MB:
236
- print(f"⊘ File size {file_size_mb:.1f}MB < {SIZE_THRESHOLD_MB}MB threshold, skipping")
237
- compression_state["processed_videos"].append({
238
  "path": video_info["path"],
239
  "file_name": video_info["file_name"],
240
  "status": "skipped",
241
- "reason": f"File size {file_size_mb:.1f}MB below threshold",
242
  "timestamp": datetime.now().isoformat()
243
  })
 
244
  continue
245
 
 
 
246
  # Compress
247
  output_path = TEMP_DIR / f"{Path(video_info['file_name']).stem}_compressed.mp4"
248
  compression_stats = compress_video(video_path, str(output_path))
@@ -252,9 +324,9 @@ async def scan_and_compress_videos():
252
 
253
  compression_state["total_compressed"] += 1
254
 
255
- # Upload compressed video
256
  upload_filename = f"{Path(video_info['file_name']).stem}_compressed.mp4"
257
- upload_path = f"{READY_VIDEOS_FOLDER}/{video_info['movie_name']}/{upload_filename}"
258
 
259
  print(f"Uploading: {upload_path}")
260
  upload_file(
@@ -269,8 +341,8 @@ async def scan_and_compress_videos():
269
  compression_state["total_uploaded"] += 1
270
  compression_state["total_space_saved_mb"] += compression_stats["saved_mb"]
271
 
272
- # Track processed video
273
- compression_state["processed_videos"].append({
274
  "path": video_info["path"],
275
  "file_name": video_info["file_name"],
276
  "upload_path": upload_path,
@@ -279,7 +351,8 @@ async def scan_and_compress_videos():
279
  "timestamp": datetime.now().isoformat()
280
  })
281
 
282
- compression_state["compression_stats"][video_info["file_name"]] = compression_stats
 
283
 
284
  # Cleanup
285
  try:
@@ -290,50 +363,21 @@ async def scan_and_compress_videos():
290
  except Exception as e:
291
  print(f"βœ— Error: {e}")
292
  compression_state["last_error"] = str(e)
293
- compression_state["failed_videos"].append({
294
  "path": video_info["path"],
295
  "file_name": video_info["file_name"],
296
  "error": str(e),
297
  "timestamp": datetime.now().isoformat()
298
  })
 
299
 
300
- # Save JSON tracking file
301
- print(f"\n{'='*80}")
302
- print("SAVING PROCESSING REPORT")
303
- print(f"{'='*80}")
304
-
305
- report = {
306
- "timestamp": datetime.now().isoformat(),
307
- "total_videos_found": compression_state["total_found"],
308
- "total_compressed": compression_state["total_compressed"],
309
- "total_uploaded": compression_state["total_uploaded"],
310
- "total_space_saved_mb": compression_state["total_space_saved_mb"],
311
- "processed_videos": compression_state["processed_videos"],
312
- "failed_videos": compression_state["failed_videos"],
313
- "compression_stats": compression_state["compression_stats"]
314
- }
315
-
316
- report_path = TEMP_DIR / "compression_report.json"
317
- with open(report_path, "w") as f:
318
- json.dump(report, f, indent=2)
319
-
320
- # Upload JSON report
321
- print(f"Uploading report...")
322
- upload_file(
323
- path_or_fileobj=str(report_path),
324
- path_in_repo=f"{READY_VIDEOS_FOLDER}/compression_report.json",
325
- repo_id=HF_DATASET_REPO,
326
- repo_type="dataset",
327
- token=HF_TOKEN,
328
- commit_message="Update compression processing report"
329
- )
330
-
331
- print(f"βœ“ Report uploaded")
332
  print(f"\n{'='*80}")
333
  print("COMPRESSION COMPLETE")
334
  print(f" Compressed: {compression_state['total_compressed']}")
335
  print(f" Uploaded: {compression_state['total_uploaded']}")
336
  print(f" Space saved: {compression_state['total_space_saved_mb']:.1f} MB")
 
 
337
  print(f"{'='*80}\n")
338
 
339
  except Exception as e:
@@ -432,6 +476,6 @@ async def reset_state():
432
 
433
 
434
  if __name__ == "__main__":
435
- print("Starting Video Compression Server on port 7861...")
436
  print("Scanning for videos on startup...")
437
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
35
  # Configuration
36
  HF_DATASET_REPO = "factorstudios/movs"
37
  READY_VIDEOS_FOLDER = "ready_videos"
38
+ COMPRESSED_FOLDER = "compressed"
39
  SIZE_THRESHOLD_MB = 100 # Compress videos above 100MB
40
  CACHE_DIR = "/tmp/video_compress_cache"
41
  TEMP_DIR = Path("/tmp/video_compression_server")
42
  TEMP_DIR.mkdir(exist_ok=True)
43
+ PROGRESS_FILE = TEMP_DIR / "compression_progress.json"
44
+ PROGRESS_FILE_REPO = f"{COMPRESSED_FOLDER}/compression_progress.json"
45
 
46
  # Global state
47
  compression_state = {
 
94
 
95
  def compress_video(input_path: str, output_path: str) -> Dict:
96
  """
97
+ Compress video using H.265 targeting ~50MB with quality preservation.
98
  Returns dict with compression stats.
99
  """
100
  print(f"\n{'='*80}")
 
106
  original_size = os.path.getsize(input_path) / (1024**2)
107
 
108
  print(f"Original: {original_size:.1f} MB | Duration: {duration:.1f}s")
109
+ print(f"Target: ~50MB with high quality preservation")
110
 
111
+ # Calculate target bitrate for ~50MB file
112
+ target_mb = 50
113
+ target_bitrate_kbps = int((target_mb * 8 * 1024) / max(duration, 1))
114
+ video_bitrate = max(400, min(target_bitrate_kbps, 1200))
115
+
116
+ print(f"Calculated bitrate: {video_bitrate}kbps")
117
+
118
+ # H.265 encoding with aggressive compression targeting ~50MB
119
  ffmpeg_cmd = [
120
  "ffmpeg", "-y",
121
  "-i", input_path,
122
  "-c:v", "libx265",
123
+ "-preset", "slow",
124
+ "-crf", "28",
125
+ "-b:v", f"{video_bitrate}k",
126
+ "-maxrate", f"{int(video_bitrate * 1.2)}k",
127
+ "-bufsize", f"{int(video_bitrate * 2)}k",
128
+ "-x265-params", "aq-mode=3:log-level=error",
129
+ "-pix_fmt", "yuv420p",
130
  "-c:a", "aac",
131
+ "-b:a", "128k",
132
+ "-progress", "pipe:1",
133
  output_path
134
  ]
135
 
136
+ try:
137
+ result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True, timeout=3600)
138
+
139
+ if result.returncode != 0:
140
+ print(f"βœ— Encoding failed with code {result.returncode}")
141
+ if result.stderr:
142
+ print(f"Error output: {result.stderr[:500]}")
143
+ return None
144
+
145
+ if not os.path.exists(output_path):
146
+ print(f"βœ— Output file not created")
147
+ return None
148
+
149
+ compressed_size = os.path.getsize(output_path) / (1024**2)
150
+ compression_ratio = original_size / compressed_size
151
+ saved_mb = original_size - compressed_size
152
+
153
+ print(f"βœ“ Compressed: {original_size:.1f}MB β†’ {compressed_size:.1f}MB")
154
+ print(f" Saved: {saved_mb:.1f}MB ({(saved_mb/original_size*100):.1f}%)")
155
+ print(f" Ratio: {compression_ratio:.2f}x")
156
+
157
+ return {
158
+ "original_size_mb": round(original_size, 2),
159
+ "compressed_size_mb": round(compressed_size, 2),
160
+ "saved_mb": round(saved_mb, 2),
161
+ "compression_ratio": round(compression_ratio, 2),
162
+ "duration_seconds": duration
163
+ }
164
+ except subprocess.TimeoutExpired:
165
+ print(f"βœ— Compression timed out (>1 hour)")
166
  return None
167
+ except Exception as e:
168
+ print(f"βœ— Compression error: {e}")
 
169
  return None
170
+
171
+
172
+ async def load_progress_file() -> Dict:
173
+ """Load compression progress from JSON file in HF dataset."""
174
+ try:
175
+ # Try to download existing progress file
176
+ progress_path = hf_hub_download(
177
+ repo_id=HF_DATASET_REPO,
178
+ filename=PROGRESS_FILE_REPO,
179
+ repo_type="dataset",
180
+ token=HF_TOKEN,
181
+ cache_dir=CACHE_DIR
182
+ )
183
+
184
+ if os.path.islink(progress_path):
185
+ progress_path = os.path.realpath(progress_path)
186
+
187
+ with open(progress_path, 'r') as f:
188
+ progress = json.load(f)
189
+ print(f"βœ“ Loaded progress file: {len(progress.get('compressed', []))} files already processed")
190
+ return progress
191
+ except Exception as e:
192
+ print(f"No existing progress file found, starting fresh: {e}")
193
+ return {"compressed": [], "failed": [], "last_updated": datetime.now().isoformat()}
194
+
195
+
196
+ async def save_progress_file(progress: Dict):
197
+ """Save and upload compression progress to HF dataset."""
198
+ try:
199
+ progress["last_updated"] = datetime.now().isoformat()
200
+
201
+ with open(PROGRESS_FILE, 'w') as f:
202
+ json.dump(progress, f, indent=2)
203
+
204
+ print(f"Uploading progress file...")
205
+ upload_file(
206
+ path_or_fileobj=str(PROGRESS_FILE),
207
+ path_in_repo=PROGRESS_FILE_REPO,
208
+ repo_id=HF_DATASET_REPO,
209
+ repo_type="dataset",
210
+ token=HF_TOKEN,
211
+ commit_message="Update compression progress"
212
+ )
213
+ print(f"βœ“ Progress file uploaded")
214
+ except Exception as e:
215
+ print(f"Warning: Could not save progress file: {e}")
216
 
217
 
218
  async def scan_and_compress_videos():
219
+ """Scan ready_videos folder and compress large videos using persistent progress tracking."""
220
  if compression_state["is_polling"]:
221
  print("Already polling, skipping...")
222
  return
 
224
  compression_state["is_polling"] = True
225
 
226
  try:
227
+ print("\n" + "="*80)
228
+ print("LOADING COMPRESSION PROGRESS")
229
+ print("="*80)
230
+
231
+ # Load progress file
232
+ progress = await load_progress_file()
233
+ compressed_files = {item["path"] for item in progress.get("compressed", [])}
234
+ failed_files = {item["path"] for item in progress.get("failed", [])}
235
+
236
  print("\n" + "="*80)
237
  print("SCANNING FOR LARGE VIDEOS")
238
  print("="*80)
 
244
  token=HF_TOKEN
245
  )
246
 
247
+ # Find unprocessed video files
248
  video_files = []
249
  for f in files:
250
  if f.startswith(f"{READY_VIDEOS_FOLDER}/") and f.endswith(".mp4"):
251
+ # Skip if already handled
252
+ if f in compressed_files:
253
+ print(f" ⊘ {f.split('/')[-1]} (already compressed)")
254
+ continue
255
+ if f in failed_files:
256
+ print(f" βœ— {f.split('/')[-1]} (previously failed)")
257
+ continue
258
+
259
  try:
260
  # Parse file path: ready_videos/moviename/segment-XX.mp4
261
  parts = f.split("/")
 
271
  print(f"Error parsing file {f}: {e}")
272
  continue
273
 
274
+ compression_state["total_found"] = len(video_files)
275
+ print(f"Found {len(video_files)} unprocessed video files")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
+ if not video_files:
278
  print("βœ“ All videos already processed!")
279
  compression_state["is_polling"] = False
280
  return
281
 
282
  # Process each video
283
+ for video_info in video_files:
284
  compression_state["current_video"] = video_info["file_name"]
285
 
286
  try:
 
300
  file_size_mb = os.path.getsize(video_path) / (1024**2)
301
  print(f"βœ“ Downloaded: {file_size_mb:.1f} MB")
302
 
303
+ # Check if meets compression threshold
304
  if file_size_mb < SIZE_THRESHOLD_MB:
305
+ print(f"⊘ Below threshold ({file_size_mb:.1f}MB < {SIZE_THRESHOLD_MB}MB), skipping")
306
+ progress["compressed"].append({
307
  "path": video_info["path"],
308
  "file_name": video_info["file_name"],
309
  "status": "skipped",
310
+ "reason": f"Below {SIZE_THRESHOLD_MB}MB threshold",
311
  "timestamp": datetime.now().isoformat()
312
  })
313
+ await save_progress_file(progress)
314
  continue
315
 
316
+ print(f"βœ“ Starting H.265 compression...")
317
+
318
  # Compress
319
  output_path = TEMP_DIR / f"{Path(video_info['file_name']).stem}_compressed.mp4"
320
  compression_stats = compress_video(video_path, str(output_path))
 
324
 
325
  compression_state["total_compressed"] += 1
326
 
327
+ # Upload compressed video to COMPRESSED folder with same structure
328
  upload_filename = f"{Path(video_info['file_name']).stem}_compressed.mp4"
329
+ upload_path = f"{COMPRESSED_FOLDER}/{video_info['movie_name']}/{upload_filename}"
330
 
331
  print(f"Uploading: {upload_path}")
332
  upload_file(
 
341
  compression_state["total_uploaded"] += 1
342
  compression_state["total_space_saved_mb"] += compression_stats["saved_mb"]
343
 
344
+ # Track in progress file
345
+ progress["compressed"].append({
346
  "path": video_info["path"],
347
  "file_name": video_info["file_name"],
348
  "upload_path": upload_path,
 
351
  "timestamp": datetime.now().isoformat()
352
  })
353
 
354
+ # Save progress
355
+ await save_progress_file(progress)
356
 
357
  # Cleanup
358
  try:
 
363
  except Exception as e:
364
  print(f"βœ— Error: {e}")
365
  compression_state["last_error"] = str(e)
366
+ progress["failed"].append({
367
  "path": video_info["path"],
368
  "file_name": video_info["file_name"],
369
  "error": str(e),
370
  "timestamp": datetime.now().isoformat()
371
  })
372
+ await save_progress_file(progress)
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  print(f"\n{'='*80}")
375
  print("COMPRESSION COMPLETE")
376
  print(f" Compressed: {compression_state['total_compressed']}")
377
  print(f" Uploaded: {compression_state['total_uploaded']}")
378
  print(f" Space saved: {compression_state['total_space_saved_mb']:.1f} MB")
379
+ print(f" Total compressed: {len(progress['compressed'])}")
380
+ print(f" Total failed: {len(progress['failed'])}")
381
  print(f"{'='*80}\n")
382
 
383
  except Exception as e:
 
476
 
477
 
478
  if __name__ == "__main__":
479
+ print("Starting Video Compression Server on port 7860...")
480
  print("Scanning for videos on startup...")
481
  uvicorn.run(app, host="0.0.0.0", port=7860)