| | from config import * |
| | from image import generate_image |
| | import humanize |
| | import datetime as dt |
| | from argparse import ArgumentParser |
| | import shutil |
| |
|
| | import os |
| | from animate_face import animate_face |
| | import subprocess, platform |
| |
|
| | avatar_description = "Young asian man, with short brunette hair, slightly smiling" |
| |
|
| | def main(): |
| | parser = ArgumentParser() |
| | parser.add_argument("--image", default=imgfile, help="path to avatar file") |
| | parser.add_argument("--path_id", default=str(int(time.time())), help="set the path id to use") |
| | parser.add_argument("--pitch", default=1.0, help="change pitch of voice, 1.0 is original, higher number is higher pitch") |
| | args = parser.parse_args() |
| | tstart = time.time() |
| |
|
| | |
| | path_id = args.path_id |
| | path = os.path.join("temp", path_id) |
| | os.makedirs(path, exist_ok=True) |
| |
|
| | |
| | timage = "None" |
| | if args.image == imgfile: |
| | print("-----------------------------------------") |
| | print("generating avatar image") |
| | t1 = time.time() |
| | generate_image(path_id, imgfile, f"hyperrealistic digital avatar, centered, \ |
| | {avatar_description}, rim lighting, studio lighting, looking at the camera") |
| | timage = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t1))) |
| | print("\ngenerating avatar:", timage) |
| | else: |
| | shutil.copyfile(args.image, os.path.join("temp", path_id, imgfile)) |
| |
|
| | |
| | print("-----------------------------------------") |
| | print("extracting speech from mp4") |
| | t2 = time.time() |
| | wavoutfile = os.path.join(path, audiofile) |
| | command = 'ffmpeg -i {} -acodec pcm_s16le -ar 44100 -ac 1 {}'.format(driverfile, wavoutfile) |
| | subprocess.call(command, shell=platform.system() != 'Windows') |
| | tspeech = humanize.naturaldelta(dt.timedelta(microseconds=int(time.time() - t2))) |
| | print("\nextracting speech:", tspeech) |
| |
|
| | |
| | print("-----------------------------------------") |
| | print("animating face with driver") |
| | t3 = time.time() |
| | |
| | |
| | animate_face(path_id, audiofile, driverfile, imgfile, animatedfile) |
| | tanimate = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t3))) |
| | print("\nanimating face:", tanimate) |
| |
|
| | |
| | print("-----------------------------------------") |
| | print("changing pitch of voice") |
| | t4 = time.time() |
| | wavpitchedfile = os.path.join(path, "pitched.wav") |
| | |
| | command = 'ffmpeg -i {} -af "asetrate=44100*{},aresample=44100,atempo=1/{}" {}'.format(wavoutfile, args.pitch, args.pitch, wavpitchedfile) |
| | |
| | subprocess.call(command, shell=platform.system() != 'Windows') |
| | tpitch = humanize.naturaldelta(dt.timedelta(microseconds=int(time.time() - t4))) |
| | print("\changing pitch:", tpitch) |
| |
|
| | |
| | print("-----------------------------------------") |
| | print("combining animation with speech") |
| | t5 = time.time() |
| | animatedoutfile = os.path.join(path, animatedfile) |
| | finaloutfile = os.path.join("results", path_id + "_animated.mp4") |
| | command = 'ffmpeg -i {} -i {} -c:v copy -map 0:v:0 -map 1:a:0 -shortest {}'.format(animatedoutfile, wavpitchedfile, finaloutfile) |
| | subprocess.call(command, shell=platform.system() != 'Windows') |
| | tcombi = humanize.naturaldelta(dt.timedelta(microseconds=int(time.time() - t5))) |
| | print("\combining animation with speech:", tcombi) |
| |
|
| |
|
| | print("done") |
| | print("Overall timing") |
| | print("--------------") |
| | print("generating avatar image:", timage) |
| | print("extracting speech from mp4:", tspeech) |
| | print("animating face:", tanimate) |
| | print("changing pitch of voice:", tpitch) |
| | print("combining animation with speech:", tcombi) |
| | print("total time:", humanize.naturaldelta(minimum_unit="microseconds", value=dt.timedelta(seconds=int(time.time() - tstart)))) |
| |
|
| | if __name__ == '__main__': |
| | main() |