Spaces:
Sleeping
Sleeping
| """Test a single cut end-to-end: Image -> TTS (Pocket TTS) -> Video -> Mux. | |
| Generates a frontal pose manhwa character with cloned Genshin voice. | |
| """ | |
| import asyncio | |
| import sys | |
| import time | |
| from pathlib import Path | |
| sys.path.insert(0, str(Path(__file__).resolve().parent)) | |
| OUTPUT_DIR = Path("test_single_cut_output") | |
| async def main(): | |
| OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| from app.services.pollinations import generate_image, generate_video, upload_media | |
| from app.services.ffmpeg import mux_audio | |
| # ββ 1. Generate Image (frontal pose, manhwa style) ββ | |
| print("=" * 60) | |
| print("[1/4] Generating image...") | |
| print("=" * 60) | |
| image_prompt = ( | |
| "manhwa webtoon style, clean cel-shading, sharp lineart, vivid colors. " | |
| "medium close-up shot, a young man with sharp jaw, messy black hair, " | |
| "piercing golden eyes, wearing a dark high-collared coat with silver trim, " | |
| "standing in a grand marble hall, dramatic side lighting casting sharp shadows, " | |
| "confident smirk, arms crossed, facing the viewer directly, " | |
| "cinematic composition, 4k detail" | |
| ) | |
| image_path = str(OUTPUT_DIR / "frame.png") | |
| t0 = time.time() | |
| await generate_image( | |
| prompt=image_prompt, | |
| output_path=image_path, | |
| model="klein-large", | |
| width=1024, | |
| height=768, | |
| seed=42, | |
| ) | |
| print(f" Image saved: {image_path} ({time.time() - t0:.1f}s)") | |
| # ββ 2. Generate TTS (Pocket TTS with Childe's voice) ββ | |
| print(f"\n{'=' * 60}") | |
| print("[2/4] Generating voice (Pocket TTS - Childe voice)...") | |
| print("=" * 60) | |
| from app.services.pocket_tts_service import PocketTTSService | |
| dialogue_text = "You think you can challenge me? How amusing. Let me show you what true power looks like." | |
| voice_state_path = "data/genshin_voices/childe/voice_state.safetensors" | |
| audio_path = str(OUTPUT_DIR / "voice.wav") | |
| t0 = time.time() | |
| tts_result = await PocketTTSService.generate( | |
| text=dialogue_text, | |
| voice_ref=voice_state_path, | |
| output_path=audio_path, | |
| ) | |
| print(f" Audio saved: {audio_path}") | |
| print(f" Duration: {tts_result['duration_sec']:.1f}s ({time.time() - t0:.1f}s gen)") | |
| # ββ 3. Generate Video (img2vid from the image) ββ | |
| print(f"\n{'=' * 60}") | |
| print("[3/4] Generating video (grok-video img2vid)...") | |
| print("=" * 60) | |
| # Upload image first for img2vid | |
| print(" Uploading image to media.pollinations.ai...") | |
| t0 = time.time() | |
| image_url = await upload_media(image_path) | |
| print(f" Image URL: {image_url} ({time.time() - t0:.1f}s)") | |
| video_prompt = "character smirks confidently, slight head tilt, coat fabric shifts, dramatic lighting flickers" | |
| silent_video_path = str(OUTPUT_DIR / "video_silent.mp4") | |
| # Use TTS duration (rounded up) for video length | |
| import math | |
| video_duration = min(max(math.ceil(tts_result["duration_sec"]), 3), 5) | |
| t0 = time.time() | |
| print(f" Generating {video_duration}s video...") | |
| await generate_video( | |
| prompt=video_prompt, | |
| output_path=silent_video_path, | |
| duration=video_duration, | |
| image_url=image_url, | |
| ) | |
| print(f" Silent video saved: {silent_video_path} ({time.time() - t0:.1f}s)") | |
| # ββ 4. Mux audio + video ββ | |
| print(f"\n{'=' * 60}") | |
| print("[4/4] Muxing audio + video...") | |
| print("=" * 60) | |
| final_path = str(OUTPUT_DIR / "final_cut.mp4") | |
| t0 = time.time() | |
| await mux_audio( | |
| video_path=silent_video_path, | |
| audio_path=audio_path, | |
| output_path=final_path, | |
| duration_sec=tts_result["duration_sec"], | |
| ) | |
| print(f" Final cut: {final_path} ({time.time() - t0:.1f}s)") | |
| # Summary | |
| print(f"\n{'=' * 60}") | |
| print("DONE!") | |
| print(f"{'=' * 60}") | |
| print(f" Image: {image_path}") | |
| print(f" Audio: {audio_path} ({tts_result['duration_sec']:.1f}s)") | |
| print(f" Video: {silent_video_path}") | |
| print(f" Final: {final_path}") | |
| print(f"\nOpen {final_path} to watch!") | |
| if __name__ == "__main__": | |
| asyncio.run(main()) | |