File size: 5,984 Bytes
a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d a9d12bc 7150d5d 3ab2deb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | import os
import json
import asyncio
import zipfile
import shutil
import cv2
import time
from pathlib import Path
from fastapi import FastAPI, Request, BackgroundTasks
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from huggingface_hub import HfApi, list_repo_files, hf_hub_download
app = FastAPI()
templates = Jinja2Templates(directory="templates")
# Configuration from environment variables
HF_TOKEN = os.getenv("HF_TOKEN", "")
SOURCE_REPO_ID = os.getenv("SOURCE_REPO_ID", "factorstudios/movs")
TARGET_REPO_ID = os.getenv("TARGET_REPO_ID", "factorstudios/movzip")
DOWNLOAD_DIR = "downloads"
FRAMES_DIR = "frames"
ZIPS_DIR = "zips"
STATE_FILE = "processing_state.json"
for d in [DOWNLOAD_DIR, FRAMES_DIR, ZIPS_DIR]:
os.makedirs(d, exist_ok=True)
api = HfApi(token=HF_TOKEN)
# Global status for tracking
processing_status = {
"is_running": False,
"last_processed": None,
"total_videos_source": 0,
"processed_count": 0,
"current_action": "Idle",
"logs": []
}
def add_log(msg):
timestamp = time.strftime('%H:%M:%S')
log_msg = f"[{timestamp}] {msg}"
processing_status["logs"].append(log_msg)
if len(processing_status["logs"]) > 50:
processing_status["logs"].pop(0)
print(log_msg)
def load_state():
if os.path.exists(STATE_FILE):
with open(STATE_FILE, 'r') as f:
try:
return json.load(f)
except:
pass
return {"processed_files": []}
def save_state(state):
with open(STATE_FILE, 'w') as f:
json.dump(state, f, indent=2)
def extract_frames(video_path, output_dir, fps=10):
os.makedirs(output_dir, exist_ok=True)
cap = cv2.VideoCapture(str(video_path))
if not cap.isOpened():
return 0
video_fps = cap.get(cv2.CAP_PROP_FPS) or 30
frame_interval = max(1, int(round(video_fps / fps)))
frame_idx = 0
saved_count = 0
while True:
ret, frame = cap.read()
if not ret: break
if frame_idx % frame_interval == 0:
saved_count += 1
cv2.imwrite(os.path.join(output_dir, f"{saved_count:06d}.jpg"), frame, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
frame_idx += 1
cap.release()
return saved_count
def zip_folder(folder_path, zip_path):
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, _, files in os.walk(folder_path):
for file in files:
zipf.write(os.path.join(root, file), arcname=file)
async def run_processor():
if processing_status["is_running"]:
return
processing_status["is_running"] = True
state = load_state()
try:
add_log("Checking source repository...")
files = list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset", token=HF_TOKEN)
video_extensions = ('.mp4', '.mkv', '.avi', '.mov', '.webm')
videos = [f for f in files if f.lower().endswith(video_extensions)]
processing_status["total_videos_source"] = len(videos)
processing_status["processed_count"] = len(state["processed_files"])
for video_file in videos:
if video_file in state["processed_files"]:
continue
processing_status["current_action"] = f"Processing {video_file}"
add_log(f"Downloading {video_file}...")
local_video_path = hf_hub_download(
repo_id=SOURCE_REPO_ID,
filename=video_file,
repo_type="dataset",
local_dir=DOWNLOAD_DIR,
token=HF_TOKEN
)
video_name = Path(video_file).stem
video_frames_dir = os.path.join(FRAMES_DIR, video_name)
add_log(f"Extracting frames for {video_name}...")
frame_count = extract_frames(local_video_path, video_frames_dir)
if frame_count > 0:
zip_filename = f"{video_name}_frames.zip"
zip_path = os.path.join(ZIPS_DIR, zip_filename)
add_log(f"Zipping {frame_count} frames...")
zip_folder(video_frames_dir, zip_path)
add_log(f"Uploading to {TARGET_REPO_ID}...")
api.upload_file(
path_or_fileobj=zip_path,
path_in_repo=zip_filename,
repo_id=TARGET_REPO_ID,
repo_type="dataset"
)
state["processed_files"].append(video_file)
save_state(state)
processing_status["processed_count"] = len(state["processed_files"])
processing_status["last_processed"] = video_file
add_log(f"✅ Finished {video_file}")
# Cleanup
if os.path.exists(video_frames_dir): shutil.rmtree(video_frames_dir)
if os.path.exists(local_video_path): os.remove(local_video_path)
if os.path.exists(zip_path): os.remove(zip_path)
processing_status["current_action"] = "Completed"
add_log("🎉 All available videos processed!")
except Exception as e:
add_log(f"❌ Error: {str(e)}")
processing_status["current_action"] = "Error"
finally:
processing_status["is_running"] = False
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse(request, "index.html")
@app.get("/stats")
async def get_stats():
return processing_status
@app.post("/start")
async def start_processor(background_tasks: BackgroundTasks):
if not processing_status["is_running"]:
background_tasks.add_task(run_processor)
return {"message": "Processor started"}
return {"message": "Processor already running"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |