Spaces:

darshankr
/

lip_sync

Runtime error

App Files Files Community

darshankr commited on Oct 23, 2024

Commit

994d199

verified ·

1 Parent(s): 6a6e465

Update inference.py

Browse files

Files changed (1) hide show

inference.py +58 -26

inference.py CHANGED Viewed

@@ -7,6 +7,7 @@ from glob import glob
 import torch, face_detection
 from models import Wav2Lip
 import platform
 parser = argparse.ArgumentParser(description='Inference code to lip-sync videos in the wild using Wav2Lip models')
@@ -178,41 +179,72 @@ def load_model(path):
 	model = model.to(device)
 	return model.eval()
-def main():
-	if not os.path.isfile(args.face):
-		raise ValueError('--face argument must be a valid path to video/image file')
-	elif args.face.split('.')[1] in ['jpg', 'png', 'jpeg']:
-		full_frames = [cv2.imread(args.face)]
-		fps = args.fps
-	else:
-		video_stream = cv2.VideoCapture(args.face)
-		fps = video_stream.get(cv2.CAP_PROP_FPS)
-		print('Reading video frames...')
-		full_frames = []
-		while 1:
-			still_reading, frame = video_stream.read()
-			if not still_reading:
-				video_stream.release()
-				break
-			if args.resize_factor > 1:
-				frame = cv2.resize(frame, (frame.shape[1]//args.resize_factor, frame.shape[0]//args.resize_factor))
-			if args.rotate:
-				frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_CLOCKWISE)
-			y1, y2, x1, x2 = args.crop
-			if x2 == -1: x2 = frame.shape[1]
-			if y2 == -1: y2 = frame.shape[0]
-			frame = frame[y1:y2, x1:x2]
-			full_frames.append(frame)
-	print ("Number of frames available for inference: "+str(len(full_frames)))
 	if not args.audio.endswith('.wav'):
 		print('Extracting raw audio...')

 import torch, face_detection
 from models import Wav2Lip
 import platform
+import ffmpeg
 parser = argparse.ArgumentParser(description='Inference code to lip-sync videos in the wild using Wav2Lip models')
 	model = model.to(device)
 	return model.eval()
+def convert_video_to_h264(input_path, output_path="converted_video.mp4"):
+	"""Convert AV1 or unsupported videos to H.264 using ffmpeg."""
+	try:
+		print(f"Converting {input_path} to {output_path}...")
+		subprocess.run([
+			"ffmpeg", "-y", "-i", input_path, "-c:v", "libx264", "-c:a", "aac", output_path
+		], check=True)
+		print("Conversion successful.")
+		return output_path
+	except subprocess.CalledProcessError as e:
+		print(f"Error during video conversion: {e}")
+		raise
+def load_video_frames(video_path):
+	"""Load video frames from the given path."""
+	video_stream = cv2.VideoCapture(video_path)
+	if not video_stream.isOpened():
+		raise ValueError(f"Could not open video: {video_path}")
+	fps = video_stream.get(cv2.CAP_PROP_FPS)
+	print(f"Reading video frames at {fps} FPS...")
+	full_frames = []
+	while True:
+		still_reading, frame = video_stream.read()
+		if not still_reading:
+			break
+		if args.resize_factor > 1:
+			frame = cv2.resize(
+				frame, (frame.shape[1] // args.resize_factor, frame.shape[0] // args.resize_factor)
+			)
+		if args.rotate:
+			frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
+		y1, y2, x1, x2 = args.crop
+		if x2 == -1: x2 = frame.shape[1]
+		if y2 == -1: y2 = frame.shape[0]
+		frame = frame[y1:y2, x1:x2]
+		full_frames.append(frame)
+	video_stream.release()
+	return full_frames, fps
+def main():
+	if not os.path.isfile(args.face):
+		raise ValueError("--face argument must be a valid path to video/image file")
+	if args.face.split('.')[-1] in ['jpg', 'png', 'jpeg']:
+		full_frames = [cv2.imread(args.face)]
+		fps = args.fps
+	else:
+		# Try loading the video with OpenCV first
+		video_path = args.face
+		video_stream = cv2.VideoCapture(video_path)
+		if not video_stream.isOpened():
+			print("OpenCV failed to open video. Attempting ffmpeg conversion...")
+			video_path = convert_video_to_h264(args.face)
+		full_frames, fps = load_video_frames(video_path)
+	print(f"Loaded {len(full_frames)} frames.")
 	if not args.audio.endswith('.wav'):
 		print('Extracting raw audio...')