EXTFrame / modified_video_processor.py
Fred808's picture
Upload modified_video_processor.py
c64d671 verified
import os
import json
import requests
import subprocess
import shutil
import asyncio
import threading
import time
import hashlib
import zipfile
import uvicorn
from typing import Dict, List, Set, Optional
from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
from huggingface_hub import HfApi, list_repo_files
from huggingface_hub.utils import HfHubHTTPError
# ==== CONFIGURATION ====
HF_TOKEN = os.getenv("HF_TOKEN", "")
SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1") # Source for RARs
DEST_REPO_ID_RAR = os.getenv("DEST_REPO_RAR", "") # Destination for extracted RAR contents (set to empty string if not needed)
# DEST_REPO_ID_VIDEO = os.getenv("DEST_REPO_VIDEO", "Fred808/BG3") # Destination for zipped video frames - DISABLED FOR DOWNLOAD MODE
DOWNLOAD_FOLDER = "downloads"
EXTRACT_FOLDER = "extracted_tmp"
VIDEO_FRAMES_EXTRACT_FOLDER = "video_frames_tmp"
ZIPPED_FRAMES_FOLDER = "zipped_frames" # This will now store files for download instead of upload
DOWNLOAD_STATE_FILE = "download_progress.json"
PROCESS_STATE_FILE = "process_progress.json"
UPLOADED_FOLDERS_FILE = "uploaded_folders.json" # Track uploaded folder hashes for BG2
PROCESSED_VIDEO_COURSES_FILE = "processed_video_courses.json" # Track processed video course folders for BG3
FAILED_FILES_LOG = "failed_files.txt"
CHUNK_SIZE = 3 # Smaller chunks for Space environment
PROCESSING_DELAY = 2 # Delay between processing files (seconds)
VIDEO_FRAME_FPS = 3 # Frames per second to extract from videos
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
os.makedirs(EXTRACT_FOLDER, exist_ok=True)
os.makedirs(VIDEO_FRAMES_EXTRACT_FOLDER, exist_ok=True)
os.makedirs(ZIPPED_FRAMES_FOLDER, exist_ok=True)
api = HfApi(token=HF_TOKEN)
# Global state
processing_status = {
"is_running": False,
"current_file": None,
"total_files": 0,
"processed_files": 0,
"failed_files": 0,
"uploaded_rar_folders": 0,
"extracted_video_courses": 0, # Changed from uploaded_video_courses to extracted_video_courses
"last_update": None,
"logs": []
}
app = FastAPI(title="RAR & Video Processing Service", description="Automated RAR extraction and video frame extraction service with download capability")
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def log_message(message: str):
"""Add message to logs and print it"""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"[{timestamp}] {message}"
print(log_entry)
processing_status["logs"].append(log_entry)
processing_status["last_update"] = timestamp
# Keep only last 100 log entries
if len(processing_status["logs"]) > 100:
processing_status["logs"] = processing_status["logs"][-100:]
def log_failed_file(filename: str, error_msg: str):
"""Log failed files to a separate file for later review"""
with open(FAILED_FILES_LOG, "a") as f:
f.write(f"{filename}: {error_msg}\n")
log_message(f"❌ Failed: {filename} - {error_msg}")
def get_folder_hash(folder_name: str) -> str:
"""Generate a hash for the folder name to use as a unique identifier"""
return hashlib.md5(folder_name.encode()).hexdigest()
def load_uploaded_folders() -> Set[str]:
"""Load set of uploaded folder hashes for BG2"""
if os.path.exists(UPLOADED_FOLDERS_FILE):
try:
with open(UPLOADED_FOLDERS_FILE, "r") as f:
data = json.load(f)
return set(data.get("uploaded_folder_hashes", []))
except json.JSONDecodeError:
log_message(f"⚠️ Warning: Could not decode {UPLOADED_FOLDERS_FILE}. Starting with empty set.")
return set()
return set()
def save_uploaded_folders(uploaded_set: Set[str]):
"""Save set of uploaded folder hashes for BG2"""
with open(UPLOADED_FOLDERS_FILE, "w") as f:
json.dump({"uploaded_folder_hashes": list(uploaded_set)}, f)
def load_processed_video_courses() -> Set[str]:
"""Loads the set of processed video course folder names."""
if os.path.exists(PROCESSED_VIDEO_COURSES_FILE):
try:
with open(PROCESSED_VIDEO_COURSES_FILE, "r") as f:
return set(json.load(f))
except json.JSONDecodeError:
log_message(f"⚠️ Warning: Could not decode {PROCESSED_VIDEO_COURSES_FILE}. Starting with empty set.")
return set()
return set()
def save_processed_video_courses(processed_set: set):
"""Saves the set of processed video course folder names to a file."""
with open(PROCESSED_VIDEO_COURSES_FILE, "w") as f:
json.dump(list(processed_set), f)
def load_download_state() -> int:
"""Load download progress state"""
if os.path.exists(DOWNLOAD_STATE_FILE):
try:
with open(DOWNLOAD_STATE_FILE, "r") as f:
return json.load(f).get("next_download_index", 0)
except json.JSONDecodeError:
log_message(f"⚠️ Warning: Could not decode {DOWNLOAD_STATE_FILE}. Starting download from index 0.")
return 0
return 0
def save_download_state(next_index: int):
"""Save download progress state"""
with open(DOWNLOAD_STATE_FILE, "w") as f:
json.dump({"next_download_index": next_index}, f)
def load_processed_files_state() -> set:
"""Load processed files from the state file"""
if os.path.exists(PROCESS_STATE_FILE):
try:
with open(PROCESS_STATE_FILE, "r") as f:
data = json.load(f)
return set(data.get("processed_rars", []))
except json.JSONDecodeError:
log_message(f"⚠️ Warning: Could not decode {PROCESS_STATE_FILE}. Starting with empty processed list.")
return set()
return set()
def save_processed_files_state(processed_set: set):
"""Save processed files to the state file"""
with open(PROCESS_STATE_FILE, "w") as f:
json.dump({"processed_rars": list(processed_set)}, f)
def download_rar_files(start_index: int, chunk_size: int) -> tuple:
"""Downloads a batch of RAR files from the source dataset"""
try:
all_files = list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset", token=HF_TOKEN)
# Filter for .rar files and exclude the specific one
rar_files_in_repo = sorted([f for f in all_files if f.endswith(".rar") and "ZBrush/3DConceptArtist_TheUltimateZbrushGuide_DownloadPirate.com.rar" not in f])
end_index = start_index + chunk_size
files_to_download_metadata = rar_files_in_repo[start_index:end_index]
if not files_to_download_metadata:
log_message("βœ… No more RAR files to download.")
return [], start_index
log_message(f"πŸ“₯ Downloading RAR files {start_index + 1} to {end_index} from {SOURCE_REPO_ID}")
downloaded_paths = []
for file_path_in_repo in files_to_download_metadata:
filename = os.path.basename(file_path_in_repo)
dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
file_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{file_path_in_repo}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
if os.path.exists(dest_path):
log_message(f"⏩ Already exists, skipping: {filename}")
downloaded_paths.append(dest_path)
continue
log_message(f"πŸ”½ Downloading: {file_path_in_repo}")
try:
with requests.get(file_url, headers=headers, stream=True) as r:
r.raise_for_status()
with open(dest_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
log_message(f"βœ… Downloaded: {filename}")
downloaded_paths.append(dest_path)
except Exception as e:
log_message(f"❌ Failed to download {file_path_in_repo}: {e}")
log_failed_file(file_path_in_repo, f"Download failed: {e}")
return downloaded_paths, end_index
except Exception as e:
log_message(f"❌ Error in download_rar_files: {e}")
return [], start_index
def extract_frames(video_path: str, output_folder: str, fps: int) -> bool:
"""Extracts frames from a video using ffmpeg."""
os.makedirs(output_folder, exist_ok=True)
# Ensure ffmpeg is available
if shutil.which("ffmpeg") is None:
log_message("❌ ffmpeg not found. Please install ffmpeg.")
return False
# ffmpeg command to extract frames at specified FPS
command = [
"ffmpeg",
"-i", video_path,
"-vf", f"fps={fps}",
os.path.join(output_folder, "frame_%04d.png")
]
log_message(f"πŸ–ΌοΈ Extracting frames from {os.path.basename(video_path)} to {output_folder} at {fps} FPS...")
try:
subprocess.run(command, check=True, capture_output=True, text=True)
log_message(f"βœ… Successfully extracted frames from {os.path.basename(video_path)}")
return True
except subprocess.CalledProcessError as e:
log_message(f"❌ Error extracting frames from {os.path.basename(video_path)}: {e.stderr}")
return False
def zip_folder(folder_path: str, output_zip_path: str) -> bool:
"""Zips the contents of a folder."""
log_message(f"πŸ“¦ Compressing {folder_path} to {output_zip_path}...")
try:
with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, _, files in os.walk(folder_path):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, folder_path)
zipf.write(file_path, arcname)
log_message(f"βœ… Successfully zipped {folder_path}")
return True
except Exception as e:
log_message(f"❌ Error zipping {folder_path}: {e}")
return False
def upload_file_to_hf(local_path: str, path_in_repo: str, repo_id: str, max_retries: int = 5, initial_delay: int = 5) -> bool:
"""Uploads a single file to Hugging Face Hub with retry logic and exponential backoff."""
log_message(f"⬆️ Uploading {os.path.basename(local_path)} to {repo_id}/{path_in_repo}")
for attempt in range(max_retries):
try:
api.upload_file(
path_or_fileobj=local_path,
path_in_repo=path_in_repo,
repo_id=repo_id,
repo_type="dataset"
)
log_message(f"βœ… Uploaded: {os.path.basename(local_path)}")
return True
except HfHubHTTPError as e:
if e.response.status_code == 429 and attempt < max_retries - 1: # Too Many Requests
delay = initial_delay * (2 ** attempt)
log_message(f"⚠️ Rate limit hit. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})")
time.sleep(delay)
else:
log_message(f"❌ Hugging Face Hub error uploading {os.path.basename(local_path)}: {e}")
return False
except Exception as e:
log_message(f"❌ Error uploading {os.path.basename(local_path)}: {e}")
return False
log_message(f"❌ Failed to upload {os.path.basename(local_path)} after {max_retries} attempts.")
return False
def process_video_frames_for_download(extracted_rar_folder: str, processed_video_courses_set: Set[str]) -> bool:
"""Scans an extracted RAR folder for MP4s, extracts frames, zips, and saves for download."""
video_processed_successfully = False
# Use the top-level folder name of the extracted RAR as the course folder name
course_folder_name = os.path.basename(extracted_rar_folder)
if course_folder_name in processed_video_courses_set:
log_message(f"⏩ Video frames for course '{course_folder_name}' already processed. Skipping.")
return True
log_message(f"🎬 Processing videos in extracted RAR folder: {course_folder_name}")
video_files_found = []
for root, _, files in os.walk(extracted_rar_folder):
for file in files:
# Check for common video file extensions
if file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv')):
video_files_found.append(os.path.join(root, file))
if not video_files_found:
log_message(f"⚠️ No video files found in {course_folder_name}. Skipping video frame extraction.")
return False # Indicate no video processing was done
course_video_extract_dir = os.path.join(VIDEO_FRAMES_EXTRACT_FOLDER, course_folder_name)
os.makedirs(course_video_extract_dir, exist_ok=True)
frames_extracted_count = 0
for video_path in video_files_found:
video_basename = os.path.splitext(os.path.basename(video_path))[0]
# Create a unique output folder for frames from this video within the course's frame directory
video_output_folder = os.path.join(course_video_extract_dir, video_basename)
if extract_frames(video_path, video_output_folder, VIDEO_FRAME_FPS):
frames_extracted_count += 1
else:
log_message(f"❌ Failed to extract frames from {os.path.basename(video_path)}. Continuing with other videos.")
# Clean up partially extracted frames for this video
if os.path.exists(video_output_folder):
shutil.rmtree(video_output_folder)
# Check if any frames were extracted for the entire course folder
if frames_extracted_count == 0:
log_message(f"⚠️ No frames extracted for any video in {course_folder_name}. Skipping zipping.")
if os.path.exists(course_video_extract_dir):
shutil.rmtree(course_video_extract_dir)
return False
course_zip_path = os.path.join(ZIPPED_FRAMES_FOLDER, f"{course_folder_name}_frames.zip")
if zip_folder(course_video_extract_dir, course_zip_path):
log_message(f"βœ… Successfully processed video frames and saved {course_folder_name}_frames.zip for download")
processed_video_courses_set.add(course_folder_name) # Mark as processed
save_processed_video_courses(processed_video_courses_set) # Save state
video_processed_successfully = True
# Clean up the temporary extraction folder but keep the zip file for download
log_message(f"🧹 Cleaning up temporary video frame files for {course_folder_name}")
if os.path.exists(course_video_extract_dir):
shutil.rmtree(course_video_extract_dir)
else:
log_message(f"❌ Failed to zip video frames for {course_folder_name}")
return video_processed_successfully
def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_folders_set: Set[str], processed_video_courses_set: Set[str]) -> bool:
"""Extracts a single RAR file, uploads its contents to BG2 (if DEST_REPO_ID_RAR is set), and then processes videos for download"""
filename = os.path.basename(rar_path)
processing_status["current_file"] = filename
folder_name = filename.replace(".rar", "")
folder_hash = get_folder_hash(folder_name)
current_extract_folder = os.path.join(EXTRACT_FOLDER, f"{folder_name}_extracted")
# Check if RAR is already processed (uploaded to BG2 or video frames processed)
# This logic needs to be careful. If BG2 is not set, we only care about video processing.
# If video processing is not needed, we only care about BG2 upload.
is_bg2_processed = (not DEST_REPO_ID_RAR) or (folder_hash in uploaded_folders_set)
is_bg3_processed = (folder_name in processed_video_courses_set)
if filename in processed_rars_set and is_bg2_processed and is_bg3_processed:
log_message(f"⏩ {filename} already fully processed, skipping.")
return True
# If BG2 upload is enabled and folder already uploaded to BG2, skip RAR extraction/upload to BG2
# but still proceed to video processing if not already done.
if DEST_REPO_ID_RAR and folder_hash in uploaded_folders_set and not is_bg3_processed:
log_message(f"πŸ”’ Folder '{folder_name}' already uploaded to BG2 (hash: {folder_hash[:8]}...), skipping RAR upload.")
# If the extracted folder doesn't exist, we can't process videos from it.
# This scenario might happen if the previous run was interrupted after BG2 upload but before video processing cleanup.
if not os.path.exists(current_extract_folder):
log_message(f"⚠️ Extracted folder {current_extract_folder} not found for video processing. Attempting re-extraction for video processing.")
# Fall through to re-extract and process videos
else:
# Proceed to video processing if not already done
log_message(f"Continuing with video processing for {filename}.")
video_processed = process_video_frames_for_download(current_extract_folder, processed_video_courses_set)
if video_processed:
processed_rars_set.add(filename)
save_processed_files_state(processed_rars_set)
return video_processed
log_message(f"πŸ“¦ Attempting to extract: {filename}")
os.makedirs(current_extract_folder, exist_ok=True)
try:
if shutil.which("unrar") is None:
raise RuntimeError("unrar command not found. Please install unrar.")
# Use -idq to suppress query messages and -o+ to overwrite without prompting
unrar_command = ["unrar", "x", "-o+", rar_path, current_extract_folder]
log_message(f"Running command: {' '.join(unrar_command)}")
result = subprocess.run(
unrar_command,
check=True,
capture_output=True,
text=True,
encoding='utf-8'
)
extracted_contents = os.listdir(current_extract_folder)
if not extracted_contents:
raise Exception("Extraction completed but no files were produced in the target directory.")
log_message(f"Successfully extracted {len(extracted_contents)} items")
# Upload extracted files to BG2 if DEST_REPO_ID_RAR is set
if DEST_REPO_ID_RAR:
upload_count = 0
for root, _, files in os.walk(current_extract_folder):
for file in files:
local_path = os.path.join(root, file)
# Construct path in repo relative to the extracted content's root
path_in_repo = os.path.join(folder_name, os.path.relpath(local_path, current_extract_folder))
log_message(f"⬆️ Uploading to BG2: {path_in_repo}")
try:
if upload_file_to_hf(
local_path=local_path,
path_in_repo=path_in_repo,
repo_id=DEST_REPO_ID_RAR
):
upload_count += 1
else:
log_message(f"❌ Failed to upload {path_in_repo} to BG2. Skipping remaining uploads for this RAR.")
# Consider if you want to fail the whole RAR processing here or continue.
# For now, we'll continue but log the failure.
except Exception as upload_error:
log_message(f"❌ Failed to upload {path_in_repo} to BG2: {upload_error}")
# Don't re-raise, allow other files to be attempted
if upload_count > 0: # Only mark as uploaded if at least one file was successfully uploaded
log_message(f"βœ… Successfully uploaded {upload_count} files from {filename} to BG2")
# Mark folder as uploaded to BG2 using hash
uploaded_folders_set.add(folder_hash)
save_uploaded_folders(uploaded_folders_set)
processing_status["uploaded_rar_folders"] = len(uploaded_folders_set)
log_message(f"πŸ”’ Folder '{folder_name}' locked in BG2 repo (hash: {folder_hash[:8]}...)")
else:
log_message(f"⚠️ No files were successfully uploaded from {filename} to BG2.")
else:
log_message("Skipping upload to BG2 as DEST_REPO_ID_RAR is not set.")
# Now process video frames from the extracted content for download
video_processed = process_video_frames_for_download(current_extract_folder, processed_video_courses_set)
# Mark RAR as processed only if both BG2 (if enabled) and video processing are successful
# Or if BG2 is not enabled, only video processing needs to be successful
if (not DEST_REPO_ID_RAR or (folder_hash in uploaded_folders_set)) and video_processed:
processed_rars_set.add(filename)
save_processed_files_state(processed_rars_set)
processing_status["processed_files"] = len(processed_rars_set)
processing_status["extracted_video_courses"] = len(processed_video_courses_set)
return True
elif DEST_REPO_ID_RAR and not (folder_hash in uploaded_folders_set):
log_message(f"❌ RAR processing failed for {filename}: BG2 upload was not successful.")
return False
elif not video_processed:
log_message(f"❌ RAR processing failed for {filename}: Video frame processing was not successful.")
return False
else:
# This case should ideally not be reached if the above logic is exhaustive
log_message(f"❌ RAR processing failed for {filename}: Unknown reason.")
return False
except subprocess.CalledProcessError as e:
error_msg = f"RAR extraction failed (exit {e.returncode}): {e.stderr.strip()}"
log_failed_file(filename, error_msg)
processing_status["failed_files"] += 1
return False
except Exception as e:
error_msg = f"Unexpected error during processing {filename}: {str(e)}"
log_failed_file(filename, error_msg)
processing_status["failed_files"] += 1
return False
finally:
# Always cleanup the extraction folder after processing (success or failure)
if os.path.exists(current_extract_folder):
log_message(f"🧹 Cleaning up extracted RAR files in {current_extract_folder}")
try:
shutil.rmtree(current_extract_folder)
log_message(f"βœ… Cleaned up RAR extraction folder")
except Exception as e:
log_message(f"⚠️ Could not clean up RAR extraction folder {current_extract_folder}: {e}")
def continuous_processing(start_download_index: Optional[int] = None):
"""Main processing loop that runs continuously, with an optional starting download index"""
processing_status["is_running"] = True
log_message("πŸš€ Starting continuous RAR and Video processing...")
try:
# Load uploaded folders tracking for BG2
uploaded_folders = load_uploaded_folders()
processing_status["uploaded_rar_folders"] = len(uploaded_folders)
# Load processed video courses tracking
processed_video_courses = load_processed_video_courses()
processing_status["extracted_video_courses"] = len(processed_video_courses)
if start_download_index is not None:
log_message(f"Starting download from index: {start_download_index}")
save_download_state(start_download_index) # Set download state to start from this index
else:
log_message("Starting download from saved state or beginning.")
while processing_status["is_running"]:
# 1. Download a batch of RAR files
download_start_index = load_download_state()
downloaded_rar_paths, next_download_index = download_rar_files(download_start_index, CHUNK_SIZE)
save_download_state(next_download_index)
# 2. Process all available RAR files (downloaded + existing)
all_local_rars = sorted([os.path.join(DOWNLOAD_FOLDER, f) for f in os.listdir(DOWNLOAD_FOLDER) if f.endswith(".rar")])
processed_rars = load_processed_files_state()
processing_status["total_files"] = len(all_local_rars)
# Recalculate processed_files based on actual processed_rars_set to be accurate
processing_status["processed_files"] = len(processed_rars)
# Filter out RARs that are already fully processed based on current state
rars_to_process = []
for rar_file_path in all_local_rars:
filename = os.path.basename(rar_file_path)
folder_name = filename.replace(".rar", "")
is_bg2_processed = (not DEST_REPO_ID_RAR) or (get_folder_hash(folder_name) in uploaded_folders)
is_bg3_processed = (folder_name in processed_video_courses)
if not (filename in processed_rars and is_bg2_processed and is_bg3_processed):
rars_to_process.append(rar_file_path)
if not downloaded_rar_paths and not rars_to_process:
log_message("βœ… No more RAR files to download or process. Stopping...")
break
for rar_file_path in rars_to_process:
if not processing_status["is_running"]:
break
filename = os.path.basename(rar_file_path)
success = extract_and_upload_rar(rar_file_path, processed_rars, uploaded_folders, processed_video_courses)
if success:
# Delete the RAR file after successful processing
log_message(f"πŸ—‘οΈ Deleting processed RAR: {filename}")
try:
os.remove(rar_file_path)
log_message(f"βœ… Deleted RAR file: {filename}")
except Exception as e:
log_message(f"⚠️ Could not delete {rar_file_path}: {e}")
# Add delay between processing files
time.sleep(PROCESSING_DELAY)
# If no new files were downloaded and all local files are processed, we're done
if not downloaded_rar_paths and not rars_to_process:
break
except Exception as e:
log_message(f"❌ Error in continuous processing: {e}")
finally:
processing_status["is_running"] = False
processing_status["current_file"] = None
log_message("🏁 Processing stopped")
@app.get("/", response_class=HTMLResponse)
async def root():
"""Serve the main HTML interface"""
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>RAR & Video Processing Service</title>
<meta charset=\"utf-8\">
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">
<style>
body { font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }
.container { max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
.status-card { background: #e3f2fd; padding: 15px; border-radius: 5px; margin: 10px 0; }
.logs { background: #f5f5f5; padding: 15px; border-radius: 5px; height: 400px; overflow-y: auto; font-family: monospace; font-size: 12px; }
.button { background: #2196F3; color: white; padding: 10px 20px; border: none; border-radius: 5px; cursor: pointer; margin: 5px; }
.button:hover { background: #1976D2; }
.button:disabled { background: #ccc; cursor: not-allowed; }
.stop-button { background: #f44336; }
.stop-button:hover { background: #d32f2f; }
.download-button { background: #4CAF50; }
.download-button:hover { background: #45a049; }
.stats { display: flex; gap: 20px; margin: 20px 0; }
.stat-item { background: #f0f0f0; padding: 10px; border-radius: 5px; text-align: center; flex: 1; }
.start-form { margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; }
.start-form input[type=\"number\"] { width: calc(100% - 120px); padding: 8px; margin-right: 10px; border: 1px solid #ccc; border-radius: 4px; }
.start-form button { padding: 8px 15px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer; }
.start-form button:hover { background: #45a049; }
.downloads-section { margin-top: 30px; padding: 20px; border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; }
.download-list { max-height: 300px; overflow-y: auto; }
.download-item { display: flex; justify-content: space-between; align-items: center; padding: 10px; border-bottom: 1px solid #eee; }
.download-item:last-child { border-bottom: none; }
</style>
</head>
<body>
<div class=\"container\">
<h1>πŸ”„ RAR & Video Processing Service</h1>
<p>Automated extraction and upload of RAR files from BG1 to BG2 dataset, and video frame extraction for download</p>
<div class=\"status-card\">
<h3>Status: <span id=\"status\">Stopped</span></h3>
<p>Current File: <span id=\"current-file\">None</span></p>
<p>Last Update: <span id=\"last-update\">Never</span></p>
</div>
<div class=\"stats\">
<div class=\"stat-item\">
<h4>Total Files (RARs)</h4>
<span id=\"total-files\">0</span>
</div>
<div class=\"stat-item\">
<h4>Processed (RARs)</h4>
<span id=\"processed-files\">0</span>
</div>
<div class=\"stat-item\">
<h4>Uploaded Folders (BG2)</h4>
<span id=\"uploaded-rar-folders\">0</span>
</div>
<div class=\"stat-item\">
<h4>Extracted Video Courses</h4>
<span id=\"extracted-video-courses\">0</span>
</div>
<div class=\"stat-item\">
<h4>Failed</h4>
<span id=\"failed-files\">0</span>
</div>
</div>
<div class=\"start-form\">
<h3>Start Processing from Specific Download Index</h3>
<input type=\"number\" id=\"start-index-input\" placeholder=\"Enter start index (e.g., 0)\" value=\"0\">
<button onclick=\"startProcessingWithIndex()\">Start from Index</button>
</div>
<div>
<button class=\"button\" onclick=\"startProcessing()\" id=\"start-btn\">Start Processing (from last saved index)</button>
<button class=\"button stop-button\" onclick=\"stopProcessing()\" id=\"stop-btn\" disabled>Stop Processing</button>
<button class=\"button\" onclick=\"refreshStatus()\">Refresh Status</button>
<button class=\"button download-button\" onclick=\"refreshDownloads()\">Refresh Downloads</button>
</div>
<div class=\"downloads-section\">
<h3>Available Downloads</h3>
<div class=\"download-list\" id=\"download-list\">
<p>Loading...</p>
</div>
</div>
<h3>Logs</h3>
<div class=\"logs\" id=\"logs\">Loading...</div>
</div>
<script>
async function startProcessing() {
try {
const response = await fetch(\"/start\", { method: \"POST\" });
const result = await response.json();
alert(result.message);
refreshStatus();
} catch (error) {
alert(\"Error starting processing: \" + error.message);
}
}
async function startProcessingWithIndex() {
const index = document.getElementById(\"start-index-input\").value;
if (index === \"\" || isNaN(index)) {
alert(\"Please enter a valid number for the start index.\");
return;
}
try {
const response = await fetch(\"/start_from_index\", {
method: \"POST\",
headers: { \"Content-Type\": \"application/x-www-form-urlencoded\" },
body: `start_index=${parseInt(index)}`
});
const result = await response.json();
alert(result.message);
refreshStatus();
} catch (error) {
alert(\"Error starting processing from index: \" + error.message);
}
}
async function stopProcessing() {
try {
const response = await fetch(\"/stop\", { method: \"POST\" });
const result = await response.json();
alert(result.message);
refreshStatus();
} catch (error) {
alert(\"Error stopping processing: \" + error.message);
}
}
async function refreshStatus() {
try {
const response = await fetch(\"/status\");
const status = await response.json();
document.getElementById(\"status\").textContent = status.is_running ? \"Running\" : \"Stopped\";
document.getElementById(\"current-file\").textContent = status.current_file || \"None\";
document.getElementById(\"last-update\").textContent = status.last_update || \"Never\";
document.getElementById(\"total-files\").textContent = status.total_files;
document.getElementById(\"processed-files\").textContent = status.processed_files;
document.getElementById(\"uploaded-rar-folders\").textContent = status.uploaded_rar_folders;
document.getElementById(\"extracted-video-courses\").textContent = status.extracted_video_courses;
document.getElementById(\"failed-files\").textContent = status.failed_files;
document.getElementById(\"start-btn\").disabled = status.is_running;
document.getElementById(\"stop-btn\").disabled = !status.is_running;
const logsDiv = document.getElementById(\"logs\");
logsDiv.innerHTML = status.logs.join(\"<br>\");
logsDiv.scrollTop = logsDiv.scrollHeight;
} catch (error) {
console.error(\"Error refreshing status:\", error);
}
}
async function refreshDownloads() {
try {
const response = await fetch(\"/downloads\");
const downloads = await response.json();
const downloadList = document.getElementById(\"download-list\");
if (downloads.files.length === 0) {
downloadList.innerHTML = \"<p>No downloads available yet.</p>\";
} else {
downloadList.innerHTML = downloads.files.map(file =>
`<div class=\"download-item\">
<span>${file.name} (${file.size})</span>
<a href=\"/download/${file.name}\" class=\"button download-button\" download>Download</a>
</div>`
).join(\"\");
}
} catch (error) {
console.error(\"Error refreshing downloads:\", error);
document.getElementById(\"download-list\").innerHTML = \"<p>Error loading downloads.</p>\";
}
}
// Auto-refresh every 5 seconds
setInterval(refreshStatus, 5000);
setInterval(refreshDownloads, 10000);
// Initial load
refreshStatus();
refreshDownloads();
</script>
</body>
</html>
"""
return HTMLResponse(content=html_content)
@app.get("/status")
async def get_status():
"""Get current processing status"""
return JSONResponse(content=processing_status)
@app.post("/start")
async def start_processing(background_tasks: BackgroundTasks):
"""Start the processing in background"""
if processing_status["is_running"]:
return {"message": "Processing is already running"}
background_tasks.add_task(continuous_processing)
return {"message": "Processing started"}
@app.post("/start_from_index")
async def start_processing_from_index(background_tasks: BackgroundTasks, start_index: int = Form(...)):
"""Start the processing from a specific download index in background"""
if processing_status["is_running"]:
return {"message": "Processing is already running"}
if start_index < 0:
return {"message": "Start index cannot be negative."}
background_tasks.add_task(continuous_processing, start_download_index=start_index)
return {"message": f"Processing started from download index: {start_index}"}
@app.post("/stop")
async def stop_processing():
"""Stop the processing"""
if not processing_status["is_running"]:
return {"message": "Processing is not running"}
processing_status["is_running"] = False
return {"message": "Processing stop requested"}
@app.get("/logs")
async def get_logs():
"""Get processing logs"""
return {"logs": processing_status["logs"]}
@app.get("/uploaded-folders")
async def get_uploaded_folders():
"""Get list of uploaded folder hashes for BG2"""
uploaded_folders = load_uploaded_folders()
return {"uploaded_folder_count": len(uploaded_folders), "folder_hashes": list(uploaded_folders)}
@app.get("/processed-video-courses")
async def get_processed_video_courses():
"""Get list of processed video course folder names"""
processed_video_courses = load_processed_video_courses()
return {"processed_video_course_count": len(processed_video_courses), "course_names": list(processed_video_courses)}
@app.get("/downloads")
async def list_downloads():
"""List available frame downloads"""
try:
if not os.path.exists(ZIPPED_FRAMES_FOLDER):
return {"files": []}
files = []
for filename in os.listdir(ZIPPED_FRAMES_FOLDER):
if filename.endswith('.zip'):
file_path = os.path.join(ZIPPED_FRAMES_FOLDER, filename)
file_size = os.path.getsize(file_path)
# Convert size to human readable format
if file_size < 1024:
size_str = f"{file_size} B"
elif file_size < 1024 * 1024:
size_str = f"{file_size / 1024:.1f} KB"
elif file_size < 1024 * 1024 * 1024:
size_str = f"{file_size / (1024 * 1024):.1f} MB"
else:
size_str = f"{file_size / (1024 * 1024 * 1024):.1f} GB"
files.append({
"name": filename,
"size": size_str,
"path": file_path
})
# Sort by filename
files.sort(key=lambda x: x["name"])
return {"files": files}
except Exception as e:
log_message(f"❌ Error listing downloads: {e}")
return {"files": [], "error": str(e)}
@app.get("/download/{filename}")
async def download_file(filename: str):
"""Download a specific frame zip file"""
try:
# Sanitize filename to prevent directory traversal
safe_filename = os.path.basename(filename)
file_path = os.path.join(ZIPPED_FRAMES_FOLDER, safe_filename)
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail="File not found")
if not file_path.endswith('.zip'):
raise HTTPException(status_code=400, detail="Only zip files can be downloaded")
log_message(f"πŸ“₯ Serving download: {safe_filename}")
return FileResponse(
path=file_path,
filename=safe_filename,
media_type='application/zip'
)
except HTTPException:
raise
except Exception as e:
log_message(f"❌ Error serving download {filename}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)