Spaces:
Runtime error
Runtime error
| import spaces | |
| import gradio as gr | |
| import os | |
| import numpy as np | |
| from pydub import AudioSegment | |
| import hashlib | |
| from sonic import Sonic | |
| from PIL import Image | |
| import torch # 필요 시 사용 | |
| # ------------------------------------------------------------------ | |
| # 모델 초기화 | |
| # ------------------------------------------------------------------ | |
| cmd = ( | |
| 'python3 -m pip install "huggingface_hub[cli]"; ' | |
| 'huggingface-cli download LeonJoe13/Sonic --local-dir checkpoints; ' | |
| 'huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --local-dir checkpoints/stable-video-diffusion-img2vid-xt; ' | |
| 'huggingface-cli download openai/whisper-tiny --local-dir checkpoints/whisper-tiny;' | |
| ) | |
| os.system(cmd) | |
| pipe = Sonic() | |
| # ------------------------------------------------------------------ | |
| # 유틸 | |
| # ------------------------------------------------------------------ | |
| def get_md5(content): | |
| """바이트/배열에서 md5 해시 문자열 반환""" | |
| md5hash = hashlib.md5(content) | |
| return md5hash.hexdigest() | |
| # ------------------------------------------------------------------ | |
| # 비디오 생성 | |
| # ------------------------------------------------------------------ | |
| # 최대 5분까지 GPU 세션 유지 | |
| def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0): | |
| expand_ratio = 0.0 # ★ 얼굴 크롭 방지 | |
| min_resolution = 512 | |
| # 오디오 길이 → 프레임 수 결정 (fps=25, 최대 60초=1500프레임) | |
| audio = AudioSegment.from_file(audio_path) | |
| duration = len(audio) / 1000.0 # 초 | |
| fps = 25 | |
| max_steps = fps * 60 # 1500 | |
| inference_steps = max(1, min(int(duration * fps), max_steps)) | |
| print(f"Audio duration: {duration:.2f}s → inference_steps: {inference_steps}") | |
| # 얼굴 정보는 참고용으로만 출력 | |
| face_info = pipe.preprocess(img_path, expand_ratio=expand_ratio) | |
| print(f"Face detection info: {face_info}") | |
| if face_info["face_num"] == 0: | |
| print("Warning: face not detected – proceeding with full image.") | |
| # 출력 폴더 보장 | |
| os.makedirs(os.path.dirname(res_video_path), exist_ok=True) | |
| # 비디오 생성 | |
| pipe.process( | |
| img_path, | |
| audio_path, | |
| res_video_path, | |
| min_resolution=min_resolution, | |
| inference_steps=inference_steps, | |
| dynamic_scale=dynamic_scale, | |
| ) | |
| return res_video_path | |
| # ------------------------------------------------------------------ | |
| # 캐시·경로 설정 | |
| # ------------------------------------------------------------------ | |
| tmp_path = "./tmp_path/" | |
| res_path = "./res_path/" | |
| os.makedirs(tmp_path, exist_ok=True) | |
| os.makedirs(res_path, exist_ok=True) | |
| # ------------------------------------------------------------------ | |
| # Gradio 콜백 | |
| # ------------------------------------------------------------------ | |
| def process_sonic(image, audio, dynamic_scale): | |
| # 입력 검증 | |
| if image is None: | |
| raise gr.Error("Please upload an image") | |
| if audio is None: | |
| raise gr.Error("Please upload an audio file") | |
| img_md5 = get_md5(np.array(image)) | |
| audio_md5 = get_md5(audio[1]) | |
| print(f"Processing (img={img_md5}, audio={audio_md5})") | |
| # numpy 오디오 → AudioSegment | |
| sampling_rate, arr = audio[:2] | |
| if arr.ndim == 1: | |
| arr = arr[:, None] | |
| audio_segment = AudioSegment( | |
| arr.tobytes(), | |
| frame_rate=sampling_rate, | |
| sample_width=arr.dtype.itemsize, | |
| channels=arr.shape[1], | |
| ) | |
| # 경로 | |
| image_path = os.path.abspath(os.path.join(tmp_path, f"{img_md5}.png")) | |
| audio_path = os.path.abspath(os.path.join(tmp_path, f"{audio_md5}.wav")) | |
| res_video_path = os.path.abspath( | |
| os.path.join(res_path, f"{img_md5}_{audio_md5}_{dynamic_scale}.mp4") | |
| ) | |
| # 저장 / 캐시 | |
| if not os.path.exists(image_path): | |
| image.save(image_path) | |
| if not os.path.exists(audio_path): | |
| audio_segment.export(audio_path, format="wav") | |
| if os.path.exists(res_video_path): | |
| print(f"Using cached result: {res_video_path}") | |
| return res_video_path | |
| print(f"Generating new video (dynamic_scale={dynamic_scale})") | |
| return get_video_res(image_path, audio_path, res_video_path, dynamic_scale) | |
| # ------------------------------------------------------------------ | |
| # Gradio UI | |
| # ------------------------------------------------------------------ | |
| def get_example(): | |
| """예시 데이터 (필요 시 추가)""" | |
| return [] | |
| css = """ | |
| .gradio-container { font-family: 'Arial', sans-serif; } | |
| .main-header { text-align: center; color: #2a2a2a; margin-bottom: 2em; } | |
| .parameter-section { background-color: #f5f5f5; padding: 1em; border-radius: 8px; margin: 1em 0; } | |
| .example-section { margin-top: 2em; } | |
| """ | |
| with gr.Blocks(css=css, theme="apriel") as demo: | |
| gr.HTML( | |
| """ | |
| <div class="main-header"> | |
| <h1>🎭 Longer Sonic: Advanced Portrait Animation</h1> | |
| <p>Transform still images into dynamic videos synchronized with audio(Demo max 60sec)</p> | |
| </div> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(type="pil", label="Portrait Image", elem_id="image_input") | |
| audio_input = gr.Audio(label="Voice/Audio Input", elem_id="audio_input", type="numpy") | |
| dynamic_scale = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Animation Intensity", | |
| info="Adjust to control movement intensity (0.5: subtle, 2.0: dramatic)", | |
| ) | |
| process_btn = gr.Button("Generate Animation", variant="primary", elem_id="process_btn") | |
| with gr.Column(): | |
| video_output = gr.Video(label="Generated Animation", elem_id="video_output") | |
| process_btn.click( | |
| fn=process_sonic, | |
| inputs=[image_input, audio_input, dynamic_scale], | |
| outputs=video_output, | |
| api_name="animate", | |
| ) | |
| gr.Examples( | |
| examples=get_example(), | |
| fn=process_sonic, | |
| inputs=[image_input, audio_input, dynamic_scale], | |
| outputs=video_output, | |
| cache_examples=False, | |
| ) | |
| # ------------------------------------------------------------------ | |
| # Launch | |
| # ------------------------------------------------------------------ | |
| demo.launch(share=True) | |