factorstudios commited on
Commit
b4fd570
·
verified ·
1 Parent(s): b8e6e62

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +437 -0
server.py CHANGED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ FastAPI Video Compression Server
4
+ Continuously polls Hugging Face dataset for large videos and compresses them.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import asyncio
11
+ import subprocess
12
+ import tempfile
13
+ from pathlib import Path
14
+ from datetime import datetime
15
+ from dotenv import load_dotenv
16
+ from typing import List, Dict, Optional
17
+
18
+ from fastapi import FastAPI, HTTPException
19
+ from fastapi.responses import JSONResponse
20
+ import uvicorn
21
+
22
+ try:
23
+ from huggingface_hub import list_repo_files, hf_hub_download, upload_file
24
+ except ImportError:
25
+ print("Missing dependency: huggingface_hub")
26
+ sys.exit(1)
27
+
28
+ # Load environment variables
29
+ load_dotenv()
30
+ HF_TOKEN = os.getenv("HF_TOKEN")
31
+ if not HF_TOKEN:
32
+ print("Error: Missing HF_TOKEN in .env")
33
+ sys.exit(1)
34
+
35
+ # Configuration
36
+ HF_DATASET_REPO = "factorstudios/movs"
37
+ READY_VIDEOS_FOLDER = "ready_videos"
38
+ SIZE_THRESHOLD_MB = 100 # Compress videos above 100MB
39
+ CACHE_DIR = "/tmp/video_compress_cache"
40
+ TEMP_DIR = Path("/tmp/video_compression_server")
41
+ TEMP_DIR.mkdir(exist_ok=True)
42
+
43
+ # Global state
44
+ compression_state = {
45
+ "is_running": False,
46
+ "is_polling": False,
47
+ "total_found": 0,
48
+ "total_compressed": 0,
49
+ "total_uploaded": 0,
50
+ "current_video": None,
51
+ "last_error": None,
52
+ "processed_videos": [],
53
+ "failed_videos": [],
54
+ "total_space_saved_mb": 0,
55
+ "compression_stats": {}
56
+ }
57
+
58
+ app = FastAPI(title="Video Compression Server")
59
+
60
+
61
+ def get_video_duration(video_path: str) -> float:
62
+ """Get video duration in seconds using ffprobe."""
63
+ cmd = [
64
+ "ffprobe", "-v", "error",
65
+ "-show_entries", "format=duration",
66
+ "-of", "default=noprint_wrappers=1:nokey=1:nokey=1",
67
+ video_path
68
+ ]
69
+ result = subprocess.run(cmd, capture_output=True, text=True)
70
+ try:
71
+ return float(result.stdout.strip())
72
+ except ValueError:
73
+ return 0
74
+
75
+
76
+ def get_video_bitrate(video_path: str) -> float:
77
+ """Get video bitrate in kbps."""
78
+ cmd = [
79
+ "ffprobe", "-v", "error",
80
+ "-select_streams", "v:0",
81
+ "-show_entries", "stream=bit_rate",
82
+ "-of", "default=noprint_wrappers=1:nokey=1",
83
+ video_path
84
+ ]
85
+ result = subprocess.run(cmd, capture_output=True, text=True)
86
+ try:
87
+ return float(result.stdout.strip()) / 1000
88
+ except ValueError:
89
+ return 0
90
+
91
+
92
+ def compress_video(input_path: str, output_path: str) -> Dict:
93
+ """
94
+ Compress video using H.265 with maximum quality preservation.
95
+ Returns dict with compression stats.
96
+ """
97
+ print(f"\n{'='*80}")
98
+ print(f"COMPRESSING: {Path(input_path).name}")
99
+ print(f"{'='*80}")
100
+
101
+ # Get video info
102
+ duration = get_video_duration(input_path)
103
+ original_size = os.path.getsize(input_path) / (1024**2)
104
+
105
+ print(f"Original: {original_size:.1f} MB | Duration: {duration:.1f}s")
106
+
107
+ # H.265 encoding with maximum quality
108
+ ffmpeg_cmd = [
109
+ "ffmpeg", "-y",
110
+ "-i", input_path,
111
+ "-c:v", "libx265",
112
+ "-preset", "veryslow",
113
+ "-crf", "20",
114
+ "-x265-params", "aq-mode=3",
115
+ "-pix_fmt", "yuv420p10le",
116
+ "-c:a", "aac",
117
+ "-b:a", "192k",
118
+ output_path
119
+ ]
120
+
121
+ result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
122
+
123
+ if result.returncode != 0:
124
+ print(f"✗ Encoding failed: {result.stderr[:500]}")
125
+ return None
126
+
127
+ if not os.path.exists(output_path):
128
+ print(f"✗ Output file not created")
129
+ return None
130
+
131
+ compressed_size = os.path.getsize(output_path) / (1024**2)
132
+ compression_ratio = original_size / compressed_size
133
+ saved_mb = original_size - compressed_size
134
+
135
+ print(f"✓ Compressed: {original_size:.1f}MB → {compressed_size:.1f}MB")
136
+ print(f" Saved: {saved_mb:.1f}MB ({(saved_mb/original_size*100):.1f}%)")
137
+ print(f" Ratio: {compression_ratio:.2f}x")
138
+
139
+ return {
140
+ "original_size_mb": round(original_size, 2),
141
+ "compressed_size_mb": round(compressed_size, 2),
142
+ "saved_mb": round(saved_mb, 2),
143
+ "compression_ratio": round(compression_ratio, 2),
144
+ "duration_seconds": duration
145
+ }
146
+
147
+
148
+ async def scan_and_compress_videos():
149
+ """Scan ready_videos folder and compress large videos."""
150
+ if compression_state["is_polling"]:
151
+ print("Already polling, skipping...")
152
+ return
153
+
154
+ compression_state["is_polling"] = True
155
+
156
+ try:
157
+ print("\n" + "="*80)
158
+ print("SCANNING FOR LARGE VIDEOS")
159
+ print("="*80)
160
+
161
+ # List all files in ready_videos
162
+ files = list_repo_files(
163
+ repo_id=HF_DATASET_REPO,
164
+ repo_type="dataset",
165
+ token=HF_TOKEN
166
+ )
167
+
168
+ # Find video files > 100MB in ready_videos folder
169
+ video_files = []
170
+ for f in files:
171
+ if f.startswith(f"{READY_VIDEOS_FOLDER}/") and f.endswith(".mp4"):
172
+ # Get file info from HF Hub
173
+ try:
174
+ # Parse file path: ready_videos/moviename/segment-XX.mp4
175
+ parts = f.split("/")
176
+ if len(parts) >= 3:
177
+ movie_name = parts[1]
178
+ file_name = parts[2]
179
+ video_files.append({
180
+ "path": f,
181
+ "movie_name": movie_name,
182
+ "file_name": file_name
183
+ })
184
+ except Exception as e:
185
+ print(f"Error parsing file {f}: {e}")
186
+ continue
187
+
188
+ print(f"Found {len(video_files)} video files")
189
+
190
+ # Check sizes and filter large ones
191
+ large_videos = []
192
+ for video_info in video_files:
193
+ # Skip if already processed
194
+ if any(pv["path"] == video_info["path"] for pv in compression_state["processed_videos"]):
195
+ print(f" ⊘ {video_info['file_name']} (already processed)")
196
+ continue
197
+
198
+ # Skip if already failed
199
+ if any(fv["path"] == video_info["path"] for fv in compression_state["failed_videos"]):
200
+ print(f" ✗ {video_info['file_name']} (previously failed)")
201
+ continue
202
+
203
+ large_videos.append(video_info)
204
+
205
+ compression_state["total_found"] = len(large_videos)
206
+ print(f"Unprocessed videos: {len(large_videos)}")
207
+
208
+ if not large_videos:
209
+ print("✓ All videos already processed!")
210
+ compression_state["is_polling"] = False
211
+ return
212
+
213
+ # Process each video
214
+ for video_info in large_videos:
215
+ compression_state["current_video"] = video_info["file_name"]
216
+
217
+ try:
218
+ # Download
219
+ print(f"\nDownloading: {video_info['path']}")
220
+ video_path = hf_hub_download(
221
+ repo_id=HF_DATASET_REPO,
222
+ filename=video_info["path"],
223
+ repo_type="dataset",
224
+ token=HF_TOKEN,
225
+ cache_dir=CACHE_DIR
226
+ )
227
+
228
+ if os.path.islink(video_path):
229
+ video_path = os.path.realpath(video_path)
230
+
231
+ file_size_mb = os.path.getsize(video_path) / (1024**2)
232
+ print(f"✓ Downloaded: {file_size_mb:.1f} MB")
233
+
234
+ # Skip if smaller than threshold
235
+ if file_size_mb < SIZE_THRESHOLD_MB:
236
+ print(f"⊘ File size {file_size_mb:.1f}MB < {SIZE_THRESHOLD_MB}MB threshold, skipping")
237
+ compression_state["processed_videos"].append({
238
+ "path": video_info["path"],
239
+ "file_name": video_info["file_name"],
240
+ "status": "skipped",
241
+ "reason": f"File size {file_size_mb:.1f}MB below threshold",
242
+ "timestamp": datetime.now().isoformat()
243
+ })
244
+ continue
245
+
246
+ # Compress
247
+ output_path = TEMP_DIR / f"{Path(video_info['file_name']).stem}_compressed.mp4"
248
+ compression_stats = compress_video(video_path, str(output_path))
249
+
250
+ if not compression_stats:
251
+ raise Exception("Compression failed")
252
+
253
+ compression_state["total_compressed"] += 1
254
+
255
+ # Upload compressed video
256
+ upload_filename = f"{Path(video_info['file_name']).stem}_compressed.mp4"
257
+ upload_path = f"{READY_VIDEOS_FOLDER}/{video_info['movie_name']}/{upload_filename}"
258
+
259
+ print(f"Uploading: {upload_path}")
260
+ upload_file(
261
+ path_or_fileobj=str(output_path),
262
+ path_in_repo=upload_path,
263
+ repo_id=HF_DATASET_REPO,
264
+ repo_type="dataset",
265
+ token=HF_TOKEN,
266
+ commit_message=f"Add H.265 compressed video: {upload_filename}"
267
+ )
268
+
269
+ compression_state["total_uploaded"] += 1
270
+ compression_state["total_space_saved_mb"] += compression_stats["saved_mb"]
271
+
272
+ # Track processed video
273
+ compression_state["processed_videos"].append({
274
+ "path": video_info["path"],
275
+ "file_name": video_info["file_name"],
276
+ "upload_path": upload_path,
277
+ "status": "compressed_uploaded",
278
+ "stats": compression_stats,
279
+ "timestamp": datetime.now().isoformat()
280
+ })
281
+
282
+ compression_state["compression_stats"][video_info["file_name"]] = compression_stats
283
+
284
+ # Cleanup
285
+ try:
286
+ os.remove(output_path)
287
+ except:
288
+ pass
289
+
290
+ except Exception as e:
291
+ print(f"✗ Error: {e}")
292
+ compression_state["last_error"] = str(e)
293
+ compression_state["failed_videos"].append({
294
+ "path": video_info["path"],
295
+ "file_name": video_info["file_name"],
296
+ "error": str(e),
297
+ "timestamp": datetime.now().isoformat()
298
+ })
299
+
300
+ # Save JSON tracking file
301
+ print(f"\n{'='*80}")
302
+ print("SAVING PROCESSING REPORT")
303
+ print(f"{'='*80}")
304
+
305
+ report = {
306
+ "timestamp": datetime.now().isoformat(),
307
+ "total_videos_found": compression_state["total_found"],
308
+ "total_compressed": compression_state["total_compressed"],
309
+ "total_uploaded": compression_state["total_uploaded"],
310
+ "total_space_saved_mb": compression_state["total_space_saved_mb"],
311
+ "processed_videos": compression_state["processed_videos"],
312
+ "failed_videos": compression_state["failed_videos"],
313
+ "compression_stats": compression_state["compression_stats"]
314
+ }
315
+
316
+ report_path = TEMP_DIR / "compression_report.json"
317
+ with open(report_path, "w") as f:
318
+ json.dump(report, f, indent=2)
319
+
320
+ # Upload JSON report
321
+ print(f"Uploading report...")
322
+ upload_file(
323
+ path_or_fileobj=str(report_path),
324
+ path_in_repo=f"{READY_VIDEOS_FOLDER}/compression_report.json",
325
+ repo_id=HF_DATASET_REPO,
326
+ repo_type="dataset",
327
+ token=HF_TOKEN,
328
+ commit_message="Update compression processing report"
329
+ )
330
+
331
+ print(f"✓ Report uploaded")
332
+ print(f"\n{'='*80}")
333
+ print("COMPRESSION COMPLETE")
334
+ print(f" Compressed: {compression_state['total_compressed']}")
335
+ print(f" Uploaded: {compression_state['total_uploaded']}")
336
+ print(f" Space saved: {compression_state['total_space_saved_mb']:.1f} MB")
337
+ print(f"{'='*80}\n")
338
+
339
+ except Exception as e:
340
+ print(f"Critical error: {e}")
341
+ compression_state["last_error"] = str(e)
342
+ finally:
343
+ compression_state["is_polling"] = False
344
+
345
+
346
+ @app.on_event("startup")
347
+ async def startup_event():
348
+ """Start initial compression scan after 15 seconds."""
349
+ async def delayed_scan():
350
+ await asyncio.sleep(15)
351
+ print("\n" + "="*80)
352
+ print("STARTING COMPRESSION SCAN (15 seconds after startup)")
353
+ print("="*80)
354
+ await scan_and_compress_videos()
355
+
356
+ asyncio.create_task(delayed_scan())
357
+
358
+
359
+ @app.get("/")
360
+ async def health():
361
+ """Health check endpoint."""
362
+ return JSONResponse({
363
+ "status": "running",
364
+ "service": "Video Compression Server",
365
+ "is_polling": compression_state["is_polling"],
366
+ "total_found": compression_state["total_found"],
367
+ "total_compressed": compression_state["total_compressed"],
368
+ "total_uploaded": compression_state["total_uploaded"],
369
+ "current_video": compression_state["current_video"],
370
+ "space_saved_mb": compression_state["total_space_saved_mb"],
371
+ "processed_count": len(compression_state["processed_videos"]),
372
+ "failed_count": len(compression_state["failed_videos"])
373
+ })
374
+
375
+
376
+ @app.get("/status")
377
+ async def get_status():
378
+ """Get detailed status."""
379
+ return JSONResponse({
380
+ "is_polling": compression_state["is_polling"],
381
+ "total_found": compression_state["total_found"],
382
+ "total_compressed": compression_state["total_compressed"],
383
+ "total_uploaded": compression_state["total_uploaded"],
384
+ "total_space_saved_mb": round(compression_state["total_space_saved_mb"], 2),
385
+ "current_video": compression_state["current_video"],
386
+ "processed_videos": compression_state["processed_videos"],
387
+ "failed_videos": compression_state["failed_videos"],
388
+ "last_error": compression_state["last_error"]
389
+ })
390
+
391
+
392
+ @app.post("/scan")
393
+ async def trigger_scan():
394
+ """Manually trigger a scan and compression run."""
395
+ if compression_state["is_polling"]:
396
+ return JSONResponse({
397
+ "status": "already_running",
398
+ "message": "Compression scan already in progress"
399
+ }, status_code=409)
400
+
401
+ asyncio.create_task(scan_and_compress_videos())
402
+ return JSONResponse({
403
+ "status": "started",
404
+ "message": "Compression scan started"
405
+ })
406
+
407
+
408
+ @app.get("/stats")
409
+ async def get_stats():
410
+ """Get compression statistics."""
411
+ return JSONResponse({
412
+ "compression_stats": compression_state["compression_stats"],
413
+ "total_space_saved_mb": round(compression_state["total_space_saved_mb"], 2),
414
+ "average_compression_ratio": round(
415
+ sum(s["compression_ratio"] for s in compression_state["compression_stats"].values()) /
416
+ max(len(compression_state["compression_stats"]), 1), 2
417
+ ) if compression_state["compression_stats"] else 0
418
+ })
419
+
420
+
421
+ @app.post("/reset")
422
+ async def reset_state():
423
+ """Reset all tracking (for testing)."""
424
+ compression_state["total_found"] = 0
425
+ compression_state["total_compressed"] = 0
426
+ compression_state["total_uploaded"] = 0
427
+ compression_state["processed_videos"] = []
428
+ compression_state["failed_videos"] = []
429
+ compression_state["compression_stats"] = {}
430
+ compression_state["total_space_saved_mb"] = 0
431
+ return JSONResponse({"status": "reset"})
432
+
433
+
434
+ if __name__ == "__main__":
435
+ print("Starting Video Compression Server on port 7861...")
436
+ print("Scanning for videos on startup...")
437
+ uvicorn.run(app, host="0.0.0.0", port=7860)