Spaces:

ibrahim313
/

Lipsing

Configuration error

App Files Files Community

Lipsing / run.py

ibrahim313

Upload 12 files

24bde82 verified about 1 year ago

raw

history blame contribute delete

17.8 kB

	import os
	import sys
	import re
	import argparse
	from easy_functions import (format_time,
	get_input_length,
	get_video_details,
	show_video,
	g_colab)
	import contextlib
	import shutil
	import subprocess
	import time
	from IPython.display import Audio, Image, clear_output, display
	from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
	import configparser

	parser = argparse.ArgumentParser(description='SyncKing-Kong main run file')

	parser.add_argument('-video_file', type=str,
	help='Input video file path', required=False, default=False)
	parser.add_argument('-vocal_file', type=str,
	help='Input audio file path', required=False, default=False)
	parser.add_argument('-output_file', type=str,
	help='Output video file path', required=False, default=False)
	args = parser.parse_args()

	# retrieve variables from config.ini
	config = configparser.ConfigParser()

	config.read('config.ini')
	if args.video_file:
	video_file = args.video_file
	else:
	video_file = config['OPTIONS']['video_file']

	if args.vocal_file:
	vocal_file = args.vocal_file
	else:
	vocal_file = config['OPTIONS']['vocal_file']
	quality = config['OPTIONS']['quality']
	output_height = config['OPTIONS']['output_height']
	wav2lip_version = config['OPTIONS']['wav2lip_version']
	use_previous_tracking_data = config['OPTIONS']['use_previous_tracking_data']
	nosmooth = config.getboolean('OPTIONS', 'nosmooth')
	U = config.getint('PADDING', 'U')
	D = config.getint('PADDING', 'D')
	L = config.getint('PADDING', 'L')
	R = config.getint('PADDING', 'R')
	size = config.getfloat('MASK', 'size')
	feathering = config.getint('MASK', 'feathering')
	mouth_tracking = config.getboolean('MASK', 'mouth_tracking')
	debug_mask = config.getboolean('MASK', 'debug_mask')
	batch_process = config.getboolean('OTHER', 'batch_process')
	output_suffix = config['OTHER']['output_suffix']
	include_settings_in_suffix = config.getboolean('OTHER', 'include_settings_in_suffix')

	if g_colab():
	preview_input = config.getboolean("OTHER", "preview_input")
	else:
	preview_input = False
	preview_settings = config.getboolean("OTHER", "preview_settings")
	frame_to_preview = config.getint("OTHER", "frame_to_preview")

	working_directory = os.getcwd()


	start_time = time.time()

	video_file = video_file.strip('"')
	vocal_file = vocal_file.strip('"')

	# check video_file exists
	if video_file == "":
	sys.exit(f"video_file cannot be blank")

	if os.path.isdir(video_file):
	sys.exit(f"{video_file} is a directory, you need to point to a file")

	if not os.path.exists(video_file):
	sys.exit(f"Could not find file: {video_file}")

	if wav2lip_version == "Wav2Lip_GAN":
	checkpoint_path = os.path.join(working_directory, "checkpoints", "Wav2Lip_GAN.pth")
	else:
	checkpoint_path = os.path.join(working_directory, "checkpoints", "Wav2Lip.pth")

	if feathering == 3:
	feathering = 5
	if feathering == 2:
	feathering = 3

	resolution_scale = 1
	res_custom = False
	if output_height == "half resolution":
	resolution_scale = 2
	elif output_height == "full resolution":
	resolution_scale = 1
	else:
	res_custom = True
	resolution_scale = 3

	in_width, in_height, in_fps, in_length = get_video_details(video_file)
	out_height = round(in_height / resolution_scale)

	if res_custom:
	out_height = int(output_height)
	fps_for_static_image = 30


	if output_suffix == "" and not include_settings_in_suffix:
	sys.exit(
	"Current suffix settings will overwrite your input video! Please add a suffix or tick include_settings_in_suffix"
	)

	frame_to_preview = max(frame_to_preview - 1, 0)

	if include_settings_in_suffix:
	if wav2lip_version == "Wav2Lip_GAN":
	output_suffix = f"{output_suffix}_GAN"
	output_suffix = f"{output_suffix}_{quality}"
	if output_height != "full resolution":
	output_suffix = f"{output_suffix}_{out_height}"
	if nosmooth:
	output_suffix = f"{output_suffix}_nosmooth1"
	else:
	output_suffix = f"{output_suffix}_nosmooth0"
	if U != 0 or D != 0 or L != 0 or R != 0:
	output_suffix = f"{output_suffix}_pads-"
	if U != 0:
	output_suffix = f"{output_suffix}U{U}"
	if D != 0:
	output_suffix = f"{output_suffix}D{D}"
	if L != 0:
	output_suffix = f"{output_suffix}L{L}"
	if R != 0:
	output_suffix = f"{output_suffix}R{R}"
	if quality != "fast":
	output_suffix = f"{output_suffix}_mask-S{size}F{feathering}"
	if mouth_tracking:
	output_suffix = f"{output_suffix}_mt"
	if debug_mask:
	output_suffix = f"{output_suffix}_debug"
	if preview_settings:
	output_suffix = f"{output_suffix}_preview"


	rescaleFactor = str(round(1 // resolution_scale))
	pad_up = str(round(U * resolution_scale))
	pad_down = str(round(D * resolution_scale))
	pad_left = str(round(L * resolution_scale))
	pad_right = str(round(R * resolution_scale))
	################################################################################


	######################### reconstruct input paths ##############################
	# Extract each part of the path
	folder, filename_with_extension = os.path.split(video_file)
	filename, file_type = os.path.splitext(filename_with_extension)

	# Extract filenumber if it exists
	filenumber_match = re.search(r"\d+$", filename)
	if filenumber_match: # if there is a filenumber - extract it
	filenumber = str(filenumber_match.group())
	filenamenonumber = re.sub(r"\d+$", "", filename)
	else: # if there is no filenumber - make it blank
	filenumber = ""
	filenamenonumber = filename

	# if vocal_file is blank - use the video as audio
	if vocal_file == "":
	vocal_file = video_file
	# if not, check that the vocal_file file exists
	else:
	if not os.path.exists(vocal_file):
	sys.exit(f"Could not find file: {vocal_file}")
	if os.path.isdir(vocal_file):
	sys.exit(f"{vocal_file} is a directory, you need to point to a file")

	# Extract each part of the path
	audio_folder, audio_filename_with_extension = os.path.split(vocal_file)
	audio_filename, audio_file_type = os.path.splitext(audio_filename_with_extension)

	# Extract filenumber if it exists
	audio_filenumber_match = re.search(r"\d+$", audio_filename)
	if audio_filenumber_match: # if there is a filenumber - extract it
	audio_filenumber = str(audio_filenumber_match.group())
	audio_filenamenonumber = re.sub(r"\d+$", "", audio_filename)
	else: # if there is no filenumber - make it blank
	audio_filenumber = ""
	audio_filenamenonumber = audio_filename
	################################################################################

	# set process_failed to False so that it may be set to True if one or more processings fail
	process_failed = False


	temp_output = os.path.join(working_directory, "temp", "output.mp4")
	temp_folder = os.path.join(working_directory, "temp")

	last_input_video = None
	last_input_audio = None

	# --------------------------Batch processing loop-------------------------------!
	while True:

	# construct input_video
	input_video = os.path.join(folder, filenamenonumber + str(filenumber) + file_type)
	input_videofile = os.path.basename(input_video)

	# construct input_audio
	input_audio = os.path.join(
	audio_folder, audio_filenamenonumber + str(audio_filenumber) + audio_file_type
	)
	input_audiofile = os.path.basename(input_audio)

	# see if filenames are different:
	if filenamenonumber + str(filenumber) != audio_filenamenonumber + str(
	audio_filenumber
	):
	output_filename = (
	filenamenonumber
	+ str(filenumber)
	+ "_"
	+ audio_filenamenonumber
	+ str(audio_filenumber)
	)
	else:
	output_filename = filenamenonumber + str(filenumber)

	# construct output_video
	output_video = os.path.join(folder, output_filename + output_suffix + ".mp4")
	output_video = os.path.normpath(output_video)
	output_videofile = os.path.basename(output_video)

	# remove last outputs
	if os.path.exists("temp"):
	shutil.rmtree("temp")
	os.makedirs("temp", exist_ok=True)

	# preview inputs (if enabled)
	if preview_input:
	print("input video:")
	show_video(input_video)
	if vocal_file != "":
	print("input audio:")
	display(Audio(input_audio))
	else:
	print("using", input_videofile, "for audio")
	print("You may want to check now that they're the correct files!")

	last_input_video = input_video
	last_input_audio = input_audio
	shutil.copy(input_video, temp_folder)
	shutil.copy(input_audio, temp_folder)

	# rename temp file to include padding or else changing padding does nothing
	temp_input_video = os.path.join(temp_folder, input_videofile)
	renamed_temp_input_video = os.path.join(
	temp_folder, str(U) + str(D) + str(L) + str(R) + input_videofile
	)
	shutil.copy(temp_input_video, renamed_temp_input_video)
	temp_input_video = renamed_temp_input_video
	temp_input_videofile = os.path.basename(renamed_temp_input_video)
	temp_input_audio = os.path.join(temp_folder, input_audiofile)

	# trim video if it's longer than the audio
	video_length = get_input_length(temp_input_video)
	audio_length = get_input_length(temp_input_audio)

	if preview_settings:
	batch_process = False

	preview_length_seconds = 1
	converted_preview_frame = frame_to_preview / in_fps
	preview_start_time = min(
	converted_preview_frame, video_length - preview_length_seconds
	)

	preview_video_path = os.path.join(
	temp_folder,
	"preview_"
	+ str(preview_start_time)
	+ "_"
	+ str(U)
	+ str(D)
	+ str(L)
	+ str(R)
	+ input_videofile,
	)
	preview_audio_path = os.path.join(temp_folder, "preview_" + input_audiofile)

	subprocess.call(
	[
	"ffmpeg",
	"-loglevel",
	"error",
	"-i",
	temp_input_video,
	"-ss",
	str(preview_start_time),
	"-to",
	str(preview_start_time + preview_length_seconds),
	"-c",
	"copy",
	preview_video_path,
	]
	)
	subprocess.call(
	[
	"ffmpeg",
	"-loglevel",
	"error",
	"-i",
	temp_input_audio,
	"-ss",
	str(preview_start_time),
	"-to",
	str(preview_start_time + 1),
	"-c",
	"copy",
	preview_audio_path,
	]
	)
	temp_input_video = preview_video_path
	temp_input_audio = preview_audio_path

	if video_length > audio_length:
	trimmed_video_path = os.path.join(
	temp_folder, "trimmed_" + temp_input_videofile
	)
	with open(os.devnull, "w") as devnull:
	with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(
	devnull
	):
	ffmpeg_extract_subclip(
	temp_input_video, 0, audio_length, targetname=trimmed_video_path
	)
	temp_input_video = trimmed_video_path
	# check if face detection has already happened on this clip
	last_detected_face = os.path.join(working_directory, "last_detected_face.pkl")
	if os.path.isfile("last_file.txt"):
	with open("last_file.txt", "r") as file:
	last_file = file.readline()
	if last_file != temp_input_video or use_previous_tracking_data == "False":
	if os.path.isfile(last_detected_face):
	os.remove(last_detected_face)

	# ----------------------------Process the inputs!-----------------------------!
	print(
	f"Processing{' preview of' if preview_settings else ''} "
	f"{input_videofile} using {input_audiofile} for audio"
	)

	# execute Wav2Lip & upscaler

	cmd = [
	sys.executable,
	"inference.py",
	"--face",
	temp_input_video,
	"--audio",
	temp_input_audio,
	"--outfile",
	temp_output,
	"--pads",
	str(pad_up),
	str(pad_down),
	str(pad_left),
	str(pad_right),
	"--checkpoint_path",
	checkpoint_path,
	"--out_height",
	str(out_height),
	"--fullres",
	str(resolution_scale),
	"--quality",
	quality,
	"--mask_dilation",
	str(size),
	"--mask_feathering",
	str(feathering),
	"--nosmooth",
	str(nosmooth),
	"--debug_mask",
	str(debug_mask),
	"--preview_settings",
	str(preview_settings),
	"--mouth_tracking",
	str(mouth_tracking),
	]

	# Run the command
	subprocess.run(cmd)

	if preview_settings:
	if os.path.isfile(os.path.join(temp_folder, "preview.jpg")):
	print(f"preview successful! Check out temp/preview.jpg")
	with open("last_file.txt", "w") as f:
	f.write(temp_input_video)
	# end processing timer and format the time it took
	end_time = time.time()
	elapsed_time = end_time - start_time
	formatted_setup_time = format_time(elapsed_time)
	print(f"Execution time: {formatted_setup_time}")
	break

	else:
	print(f"Processing failed! :( see line above 👆")
	print("Consider searching the issues tab on the github:")
	print("https://github.com/anothermartz/Easy-Wav2Lip/issues")
	exit()

	# rename temp file and move to correct directory
	if os.path.isfile(temp_output):
	if os.path.isfile(output_video):
	os.remove(output_video)
	shutil.copy(temp_output, output_video)
	# show output video
	with open("last_file.txt", "w") as f:
	f.write(temp_input_video)
	print(f"{output_filename} successfully lip synced! It will be found here:")
	print(output_video)

	# end processing timer and format the time it took
	end_time = time.time()
	elapsed_time = end_time - start_time
	formatted_setup_time = format_time(elapsed_time)
	print(f"Execution time: {formatted_setup_time}")

	else:
	print(f"Processing failed! :( see line above 👆")
	print("Consider searching the issues tab on the github:")
	print("https://github.com/anothermartz/Easy-Wav2Lip/issues")
	process_failed = True

	if batch_process == False:
	if process_failed:
	exit()
	else:
	break

	elif filenumber == "" and audio_filenumber == "":
	print("Files not set for batch processing")
	break

	# -----------------------------Batch Processing!------------------------------!
	if filenumber != "": # if video has a filenumber
	match = re.search(r"\d+", filenumber)
	# add 1 to video filenumber
	filenumber = (
	f"{filenumber[:match.start()]}{int(match.group())+1:0{len(match.group())}d}"
	)

	if audio_filenumber != "": # if audio has a filenumber
	match = re.search(r"\d+", audio_filenumber)
	# add 1 to audio filenumber
	audio_filenumber = f"{audio_filenumber[:match.start()]}{int(match.group())+1:0{len(match.group())}d}"

	# construct input_video
	input_video = os.path.join(folder, filenamenonumber + str(filenumber) + file_type)
	input_videofile = os.path.basename(input_video)
	# construct input_audio
	input_audio = os.path.join(
	audio_folder, audio_filenamenonumber + str(audio_filenumber) + audio_file_type
	)
	input_audiofile = os.path.basename(input_audio)

	# now check which input files exist and what to do for each scenario

	# both +1 files exist - continue processing
	if os.path.exists(input_video) and os.path.exists(input_audio):
	continue

	# video +1 only - continue with last audio file
	if os.path.exists(input_video) and input_video != last_input_video:
	if audio_filenumber != "": # if audio has a filenumber
	match = re.search(r"\d+", audio_filenumber)
	# take 1 from audio filenumber
	audio_filenumber = f"{audio_filenumber[:match.start()]}{int(match.group())-1:0{len(match.group())}d}"
	continue

	# audio +1 only - continue with last video file
	if os.path.exists(input_audio) and input_audio != last_input_audio:
	if filenumber != "": # if video has a filenumber
	match = re.search(r"\d+", filenumber)
	# take 1 from video filenumber
	filenumber = f"{filenumber[:match.start()]}{int(match.group())-1:0{len(match.group())}d}"
	continue

	# neither +1 files exist or current files already processed - finish processing
	print("Finished all sequentially numbered files")
	if process_failed:
	sys.exit("Processing failed on at least one video")
	else:
	break