Spaces:
Runtime error
Runtime error
| import subprocess | |
| import os | |
| import sys | |
| import shutil | |
| from pathlib import Path | |
| import argparse | |
| import gradio as gr | |
| # โ ุงูุชูุธูู ุฃููุงู: ููุท ููู ุฌูุฏุงุช ุงูู ุคูุชุฉ | |
| folders_to_delete = ["./output", "./__pycache__", "./.cache", "./temp"] | |
| for folder in folders_to_delete: | |
| if os.path.exists(folder): | |
| print(f"๐๏ธ ุญุฐู {folder}") | |
| shutil.rmtree(folder) | |
| # โ ุทุจุงุนุฉ ุญุงูุฉ ุงูุฐุงูุฑุฉ | |
| import psutil | |
| mem = psutil.virtual_memory() | |
| print(f"๐ RAM ุงูู ุณุชุฎุฏู ุฉ: {mem.used / 1e9:.2f} GB / {mem.total / 1e9:.2f} GB") | |
| # โ ุชุญู ูู ุงูู ูุฏููุงุช ุฅุฐุง ู ุง ูุงูุช ู ูุฌูุฏุฉ | |
| if not os.path.exists("./models/fantasytalking_model.ckpt"): | |
| print("๐ ๏ธ ุฌุงุฑู ุชุญู ูู ุงููู ุงุฐุฌ ุนุจุฑ download_models.py ...") | |
| subprocess.run(["python", "download_models.py"]) | |
| # โ ุฅุนุฏุงุฏ ุงูู ุณุงุฑุงุช | |
| sys.path.append(os.path.abspath(".")) | |
| # โ ุงุณุชูุฑุงุฏ ุงูู ูููุงุช | |
| from STT.sst import speech_to_text | |
| from LLM.llm import generate_reply | |
| from TTS_X.tts import generate_voice | |
| from FantasyTalking.infer import load_models, main | |
| # โ ุซุงุจุชุงุช ุงููู ูุฐุฌ | |
| args_template = argparse.Namespace( | |
| fantasytalking_model_path="./models/fantasytalking_model.ckpt", | |
| wav2vec_model_dir="./models/wav2vec2-base-960h", | |
| wan_model_dir="./models/Wan2.1-I2V-14B-720P", | |
| image_path="", | |
| audio_path="", | |
| prompt="", | |
| output_dir="./output", | |
| image_size=512, | |
| audio_scale=1.0, | |
| prompt_cfg_scale=5.0, | |
| audio_cfg_scale=5.0, | |
| max_num_frames=81, | |
| inference_steps=20, | |
| fps=23, | |
| num_persistent_param_in_dit=None, | |
| seed=1111 | |
| ) | |
| # โ ุชุญู ูู ุงููู ุงุฐุฌ | |
| print("๐ ุฌุงุฑู ุชุญู ูู FantasyTalking ู Wav2Vec...") | |
| pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template) | |
| print("โ ุชู ุงูุชุญู ูู!") | |
| # โ ุชูููุฏ ููุฏูู | |
| def generate_video(image_path, audio_path, prompt, output_dir="./output"): | |
| # ุงูุณุฎู args_template ุฅูู dict ุนุดุงู ูุนุฏู ุนููู ุจุณูููุฉ | |
| args_dict = vars(args_template).copy() | |
| # ูุญุฏุซ ููุท ุงููู ูุญุชุงุฌู | |
| args_dict.update({ | |
| "image_path": image_path, | |
| "audio_path": audio_path, | |
| "prompt": prompt, | |
| "output_dir": output_dir | |
| }) | |
| # ูุญูู ู ู dict ุฅูู argparse.Namespace | |
| args = argparse.Namespace(**args_dict) | |
| return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec) | |
| # โ ุฎุท ุงูุฃูุงุจูุจ ุงููุงู ู | |
| def full_pipeline(user_audio, user_image): | |
| print("๐ค ุชุญููู ุงูุตูุช ุฅูู ูุต...") | |
| user_text = speech_to_text(user_audio) | |
| print("๐ฌ ุชูููุฏ ุงูุฑุฏ...") | |
| reply = generate_reply(user_text) | |
| print("๐ ุชุญููู ุงูุฑุฏ ุฅูู ุตูุช...") | |
| reply_audio_path = generate_voice(reply) | |
| print("๐ฝ๏ธ ุชูููุฏ ุงูููุฏูู...") | |
| Path("./output").mkdir(parents=True, exist_ok=True) | |
| video_path = generate_video( | |
| image_path=user_image, | |
| audio_path=reply_audio_path, | |
| prompt=reply | |
| ) | |
| return user_text, reply, reply_audio_path, video_path | |
| # โ ูุงุฌูุฉ Gradio | |
| with gr.Blocks(title="๐ง ุตูุชู ูุญุฑู ุตูุฑุฉ!") as demo: | |
| gr.Markdown("## ๐คโก๏ธ๐ฌโก๏ธ๐โก๏ธ๐ฝ๏ธ ู ู ุตูุชู ุฅูู ููุฏูู ู ุชููู !") | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = gr.Audio(label="๐๏ธ ุงุฑูุน ุตูุชู", type="filepath") | |
| image_input = gr.Image(label="๐ผ๏ธ ุตูุฑุฉ ุงูู ุชุญุฏุซ", type="filepath") | |
| btn = gr.Button("๐ฌ ุดุบู") | |
| with gr.Column(): | |
| user_text = gr.Textbox(label="๐ ุงููุต ุงูู ุณู ูุน") | |
| reply_text = gr.Textbox(label="๐ค ุฑุฏ ุงูู ุณุงุนุฏ") | |
| reply_audio = gr.Audio(label="๐ ุงูุฑุฏ ุงูู ูุทูู") | |
| video_output = gr.Video(label="๐ฝ๏ธ ุงูููุฏูู ุงููุงุชุฌ") | |
| btn.click(fn=full_pipeline, | |
| inputs=[audio_input, image_input], | |
| outputs=[user_text, reply_text, reply_audio, video_output]) | |
| demo.launch(inbrowser=True, share=True) | |