Upscale / app.py
William278989's picture
Create app.py
f75befb verified
import os
import cv2
import numpy as np
import onnxruntime as ort
import gradio as gr
import subprocess
import shutil
from huggingface_hub import hf_hub_download
# --- AUTH & MODEL SETUP ---
HF_TOKEN = os.getenv("HF_TOKEN")
def get_onnx_model():
try:
model_path = hf_hub_download(
repo_id="KingPro100/real-esrgan-onxx",
filename="Real-ESRGAN-x4plus.onnx",
token=HF_TOKEN
)
return model_path
except Exception as e:
# Fallback to Xenova if the path is tricky
return hf_hub_download(repo_id="Xenova/realesrgan-x4plus", filename="onnx/model.onnx")
MODEL_FILE = get_onnx_model()
# CPU Stability Settings
sess_options = ort.SessionOptions()
sess_options.intra_op_num_threads = 2
session = ort.InferenceSession(MODEL_FILE, sess_options, providers=['CPUExecutionProvider'])
def upscale_frame(frame):
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32) / 255.0
img = np.transpose(img, (2, 0, 1))
img = np.expand_dims(img, axis=0)
inputs = {session.get_inputs()[0].name: img}
output = session.run(None, inputs)[0]
output = np.squeeze(output)
output = np.clip(output, 0, 1)
output = np.transpose(output, (1, 2, 0))
output = cv2.cvtColor((output * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR)
return output
def process_video(input_path, do_sharpen, progress=gr.Progress()):
if not input_path: return None
cap = cv2.VideoCapture(input_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# 1. Extract Audio
audio_path = "temp_audio.mp3"
subprocess.run(f"ffmpeg -i {input_path} -vn -acodec libmp3lame {audio_path} -y", shell=True)
# 2. Setup Frames Dir
frames_dir = "temp_frames"
if os.path.exists(frames_dir): shutil.rmtree(frames_dir)
os.makedirs(frames_dir)
count = 0
while True:
ret, frame = cap.read()
if not ret: break
try:
upscaled = upscale_frame(frame)
cv2.imwrite(f"{frames_dir}/frame_{count:05d}.png", upscaled)
except Exception as e:
# Fallback for individual frame failure
h, w = frame.shape[:2]
upscaled = cv2.resize(frame, (w*4, h*4), interpolation=cv2.INTER_LANCZOS4)
cv2.imwrite(f"{frames_dir}/frame_{count:05d}.png", upscaled)
count += 1
if count % 5 == 0:
progress(count/total_frames, desc=f"4x Scaling: {count}/{total_frames}")
cap.release()
# 3. Final Reassembly with Optional Sharpening
output_video = "upscaled_output.mp4"
# FFmpeg 'unsharp' filter for clarity
# Settings: luma_matrix_width:luma_matrix_height:luma_amount
sharpen_filter = "-vf \"unsharp=5:5:1.0\"" if do_sharpen else ""
ffmpeg_cmd = (
f"ffmpeg -framerate 24 -i {frames_dir}/frame_%05d.png -i {audio_path} "
f"{sharpen_filter} "
f"-c:v libx264 -preset superfast -pix_fmt yuv420p -c:a aac -shortest {output_video} -y"
)
subprocess.run(ffmpeg_cmd, shell=True)
# Cleanup
shutil.rmtree(frames_dir)
if os.path.exists(audio_path): os.remove(audio_path)
return output_video
# --- Updated UI with Toggle ---
demo = gr.Interface(
fn=process_video,
inputs=[
gr.Video(label="Upload Video"),
gr.Checkbox(label="Enable Post-Upscale Sharpening", value=False, info="Check this if the AI output looks too soft or blurry.")
],
outputs=gr.Video(label="Upscaled Result"),
title="Real-ESRGAN 4x CPU (with Clarity Toggle)",
description="Processes video to 4x resolution. Use the toggle to add extra sharpness if needed."
)
if __name__ == "__main__":
demo.launch()