Spaces:

James040
/

RealESRGN2xVideo

Running

App Files Files Community

RealESRGN2xVideo / app.py

James040

Update app.py

def50be verified 16 days ago

raw

history blame contribute delete

4.86 kB

	import os
	import cv2
	import shutil
	import subprocess
	import numpy as np
	import onnxruntime as ort
	import gradio as gr
	from huggingface_hub import hf_hub_download

	# --- MODEL SETUP ---
	# Using the 2x model that requires 64x64 fixed input
	def load_model():
	model_path = hf_hub_download(
	repo_id="tidus2102/Real-ESRGAN",
	filename="Real-ESRGAN_x2plus.onnx"
	)
	sess_options = ort.SessionOptions()
	sess_options.intra_op_num_threads = 2
	return ort.InferenceSession(model_path, sess_options, providers=['CPUExecutionProvider'])

	session = load_model()

	def upscale_frame_tiled(frame):
	tile_size = 64
	h, w, c = frame.shape
	upscaled_img = np.zeros((h * 2, w * 2, c), dtype=np.uint8)

	tiles = []
	coords = []

	# 1. Collect all tiles first
	for y in range(0, h, tile_size):
	for x in range(0, w, tile_size):
	y_end, x_end = min(y + tile_size, h), min(x + tile_size, w)
	tile = frame[y:y_end, x:x_end]

	# Pad if necessary
	if tile.shape[0] < tile_size or tile.shape[1] < tile_size:
	tile = cv2.copyMakeBorder(tile, 0, tile_size - tile.shape[0], 0, tile_size - tile.shape[1], cv2.BORDER_REFLECT)

	tiles.append(cv2.cvtColor(tile, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0)
	coords.append((y, x, y_end - y, x_end - x))

	# 2. Process in Batches of 32 (Uses that extra RAM!)
	batch_size = 32
	all_outputs = []

	for i in range(0, len(tiles), batch_size):
	batch = np.array(tiles[i : i + batch_size]) # Shape: (Batch, 3, 64, 64)
	batch = np.transpose(batch, (0, 3, 1, 2))

	inputs = {session.get_inputs()[0].name: batch}
	output = session.run(None, inputs)[0]
	all_outputs.extend(output)

	# 3. Stitch back
	for i, output in enumerate(all_outputs):
	y, x, actual_h, actual_w = coords[i]
	tile_out = np.clip(np.squeeze(output), 0, 1).transpose(1, 2, 0)
	tile_out = cv2.cvtColor((tile_out * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR)
	upscaled_img[y2 : y2 + (actual_h2), x2 : x2 + (actual_w2)] = tile_out[:actual_h2, :actual_w2]

	return upscaled_img

	def process_video(input_path, progress=gr.Progress()):
	if not input_path: return None

	# 1. Sanitize Filename & Detect FPS
	local_input = "input_video_sanitized.mp4"
	shutil.copy(input_path, local_input)

	cap = cv2.VideoCapture(local_input)
	fps = cap.get(cv2.CAP_PROP_FPS)
	if fps < 1: fps = 30 # Default if metadata is missing
	cap.release()

	# 2. Setup Dirs
	frames_dir, audio_path, output_video = "temp_frames", "temp_audio.mp3", "upscaled_2x.mp4"
	if os.path.exists(frames_dir): shutil.rmtree(frames_dir)
	os.makedirs(frames_dir)

	# 3. Extract Audio & Frames
	subprocess.run(f'ffmpeg -i "{local_input}" -vn -acodec libmp3lame "{audio_path}" -y', shell=True)
	subprocess.run(f'ffmpeg -i "{local_input}" "{frames_dir}/raw_%05d.png" -y', shell=True)

	raw_files = sorted([f for f in os.listdir(frames_dir) if f.startswith("raw_")])
	total = len(raw_files)

	# 4. Upscale Loop (Now using Tiling)
	for i, f_name in enumerate(raw_files):
	f_path = os.path.join(frames_dir, f_name)
	img = cv2.imread(f_path)

	try:
	# THIS IS WHERE THE AI RUNS (Tiled to prevent dimension error)
	res = upscale_frame_tiled(img)
	cv2.imwrite(os.path.join(frames_dir, f"out_{i:05d}.png"), res)
	except Exception as e:
	print(f"Critical Error on Frame {i}: {e}")
	h, w = img.shape[:2]
	res = cv2.resize(img, (w2, h2), interpolation=cv2.INTER_LANCZOS4)
	cv2.imwrite(os.path.join(frames_dir, f"out_{i:05d}.png"), res)

	os.remove(f_path) # Conserve disk space
	if i % 2 == 0:
	progress(i/total, desc=f"AI Upscaling {i}/{total} @ {fps} FPS")

	# 5. Reassemble Video
	audio_cmd = f'-i "{audio_path}"' if os.path.exists(audio_path) else ""
	ffmpeg_cmd = (
	f'ffmpeg -framerate {fps} -i "{frames_dir}/out_%05d.png" {audio_cmd} '
	f'-c:v libx264 -pix_fmt yuv420p -c:a aac -shortest "{output_video}" -y'
	)
	subprocess.run(ffmpeg_cmd, shell=True)

	# Final Cleanup
	shutil.rmtree(frames_dir)
	for f in [audio_path, local_input]:
	if os.path.exists(f): os.remove(f)

	return output_video

	# --- UI ---
	demo = gr.Interface(
	fn=process_video,
	inputs=gr.Video(label="Input Video"),
	outputs=gr.Video(label="Upscaled 2x Result"),
	title="Real-ESRGAN 2x (CPU Tiled)",
	description="Uses 64x64 tiling to bypass dimension errors. Note: This is detailed but slower on CPU.Takes About 70 Sec Per Frame For 720P"
	)

	if __name__ == "__main__":
	demo.launch()