talkingAvater_bgk

Runtime error

App Files Files Community

oKen38461 commited on Jul 17, 2025

Commit

0f839d2

1 Parent(s): 07b71bb

`.gitignore`に`tests/`を追加し、`README.md`のAPIドキュメントセクションを更新しました。また、`test_api_client.py`、`test_api.py`、`test_performance_optimized.py`、`test_performance.py`のテストスクリプトを削除しました。

Browse files

Files changed (6) hide show

.gitignore +1 -0
README.md +7 -3
test_api.py +0 -102
test_api_client.py +0 -220
test_performance.py +0 -175
test_performance_optimized.py +0 -375

.gitignore CHANGED Viewed

@@ -38,6 +38,7 @@ log/*
 example/
 ToDo/
 docs/
 !example/audio.wav

 example/
 ToDo/
 docs/
+tests/
 !example/audio.wav

README.md CHANGED Viewed

@@ -74,8 +74,12 @@ python test_api_client.py
 - **処理速度**: 16秒の音声を約15秒で処理（Phase 3最適化により50-65%高速化）
 ## ドキュメント
-- [APIドキュメント](docs/api_documentation.md) - 詳細なAPI仕様とサンプルコード
-- [Phase2実装仕様](ToDo/0717-2_Phase2_API_SOW.md) - API実装の詳細
-- [Phase3最適化ガイド](docs/phase3_optimization_guide.md) - パフォーマンス最適化の詳細
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 - **処理速度**: 16秒の音声を約15秒で処理（Phase 3最適化により50-65%高速化）
 ## ドキュメント
+- 📁 **[APIドキュメント](docs/api/)** - リアルタイムを超える動画生成APIの全ドキュメント
+  - 🚀 [統合ガイド](docs/api/integration_guide.md) - 完全なAPIインテグレーションガイド
+  - ⚡ [クイックリファレンス](docs/api/quick_reference.md) - 5分で実装できるクイックスタート
+  - 📝 [API仕様書](docs/api/documentation.md) - 詳細なAPI仕様とサンプルコード
+  - 💻 [統合サンプル集](docs/api/integration_examples.py) - 実装例とベストプラクティス
+- 📋 [Phase2実装仕様](ToDo/0717-2_Phase2_API_SOW.md) - API実装の詳細
+- 🔧 [Phase3最適化ガイド](docs/phase3_optimization_guide.md) - パフォーマンス最適化の詳細
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

test_api.py DELETED Viewed

@@ -1,102 +0,0 @@
-#!/usr/bin/env python3
-"""
-DittoTalkingHead API テストスクリプト
-簡単なAPIテストを実行します
-"""
-import logging
-import sys
-from test_api_client import TalkingHeadAPIClient
-# ロギング設定
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S'
-)
-def test_basic_functionality():
-    """基本機能のテスト"""
-    logging.info("=== 基本機能テスト開始 ===")
-    # クライアント初期化
-    client = TalkingHeadAPIClient()
-    # サンプルファイルを使用
-    audio_path = "example/audio.wav"
-    image_path = "example/image.png"
-    try:
-        # 動画生成
-        logging.info(f"接続開始: O-ken5481/talkingAvater_bgk")
-        logging.info(f"ファイルアップロード: {audio_path}, {image_path}")
-        logging.info("処理開始...")
-        result = client.generate_video(audio_path, image_path)
-        video_path, status = result
-        if video_path:
-            logging.info("動画生成完了")
-            # タイムスタンプ付きで保存
-            if isinstance(video_path, dict) and 'video' in video_path:
-                saved_path = client.save_with_timestamp(video_path['video'])
-                if saved_path:
-                    logging.info(f"保存完了: {saved_path}")
-                    print(f"\n✅ テスト成功!")
-                    print(f"ステータス: {status}")
-                    print(f"保存先: {saved_path}")
-                    return True
-        print(f"\n❌ テスト失敗")
-        print(f"ステータス: {status}")
-        return False
-    except Exception as e:
-        logging.error(f"エラー発生: {e}")
-        return False
-def test_error_handling():
-    """エラーハンドリングのテスト"""
-    logging.info("\n=== エラーハンドリングテスト開始 ===")
-    client = TalkingHeadAPIClient()
-    # 存在しないファイルでテスト
-    result = client.generate_video("nonexistent.wav", "nonexistent.png")
-    video_path, status = result
-    if video_path is None and "見つかりません" in status:
-        logging.info("✅ ファイル不在エラーを正しく検出")
-        return True
-    else:
-        logging.error("❌ エラーハンドリングが正しく動作していません")
-        return False
-def main():
-    """メイン関数"""
-    print("DittoTalkingHead API テスト")
-    print("=" * 50)
-    # 基本機能テスト
-    basic_test_passed = test_basic_functionality()
-    # エラーハンドリングテスト
-    error_test_passed = test_error_handling()
-    # 結果サマリー
-    print("\n" + "=" * 50)
-    print("テスト結果:")
-    print(f"- 基本機能テスト: {'✅ 成功' if basic_test_passed else '❌ 失敗'}")
-    print(f"- エラーハンドリングテスト: {'✅ 成功' if error_test_passed else '❌ 失敗'}")
-    # 終了コード
-    if basic_test_passed and error_test_passed:
-        print("\n全てのテストが成功しました! 🎉")
-        sys.exit(0)
-    else:
-        print("\n一部のテストが失敗しました。")
-        sys.exit(1)
-if __name__ == "__main__":
-    main()

test_api_client.py DELETED Viewed

@@ -1,220 +0,0 @@
-from gradio_client import Client, handle_file
-from datetime import datetime
-import os
-import shutil
-import logging
-import time
-from typing import Tuple, Optional
-class TalkingHeadAPIClient:
-    """DittoTalkingHead API クライアント"""
-    def __init__(self, space_name: str = "O-ken5481/talkingAvater_bgk", max_retries: int = 3, retry_delay: int = 5):
-        """
-        Args:
-            space_name: Hugging Face SpaceのID（デフォルト: O-ken5481/talkingAvater_bgk）
-            max_retries: 最大リトライ回数
-            retry_delay: リトライ間隔（秒）
-        """
-        self.space_name = space_name
-        self.max_retries = max_retries
-        self.retry_delay = retry_delay
-        self.logger = self._setup_logger()
-        self.client = None
-        self._connect()
-    def _setup_logger(self) -> logging.Logger:
-        """ロガーの設定"""
-        logger = logging.getLogger('TalkingHeadAPIClient')
-        logger.setLevel(logging.INFO)
-        if not logger.handlers:
-            handler = logging.StreamHandler()
-            formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s',
-                                        datefmt='%Y-%m-%d %H:%M:%S')
-            handler.setFormatter(formatter)
-            logger.addHandler(handler)
-        return logger
-    def _connect(self) -> None:
-        """APIへの接続"""
-        for attempt in range(self.max_retries):
-            try:
-                self.logger.info(f"接続開始: {self.space_name} (試行 {attempt + 1}/{self.max_retries})")
-                self.client = Client(self.space_name)
-                self.logger.info("接続成功")
-                return
-            except Exception as e:
-                self.logger.error(f"接続失敗: {e}")
-                if attempt < self.max_retries - 1:
-                    self.logger.info(f"{self.retry_delay}秒後にリトライします...")
-                    time.sleep(self.retry_delay)
-                else:
-                    raise ConnectionError(f"APIへの接続に失敗しました: {e}")
-    def generate_video(self, audio_path: str, image_path: str) -> Tuple[Optional[dict], str]:
-        """
-        API経由で動画生成
-        Args:
-            audio_path: 音声ファイルのパス
-            image_path: 画像ファイルのパス
-        Returns:
-            tuple: (video_data, status_message)
-        """
-        # ファイルの存在確認
-        if not os.path.exists(audio_path):
-            error_msg = f"音声ファイルが見つかりません: {audio_path}"
-            self.logger.error(error_msg)
-            return None, error_msg
-        if not os.path.exists(image_path):
-            error_msg = f"画像ファイルが見つかりません: {image_path}"
-            self.logger.error(error_msg)
-            return None, error_msg
-        # API呼び出し
-        for attempt in range(self.max_retries):
-            try:
-                self.logger.info(f"ファイルアップロード: {audio_path}, {image_path}")
-                self.logger.info("処理開始...")
-                result = self.client.predict(
-                    audio_file=handle_file(audio_path),
-                    source_image=handle_file(image_path),
-                    api_name="/process_talking_head"
-                )
-                self.logger.info("動画生成完了")
-                return result
-            except Exception as e:
-                self.logger.error(f"処理エラー (試行 {attempt + 1}/{self.max_retries}): {e}")
-                if attempt < self.max_retries - 1:
-                    self.logger.info(f"{self.retry_delay}秒後にリトライします...")
-                    time.sleep(self.retry_delay)
-                else:
-                    error_msg = f"動画生成に失敗しました: {e}"
-                    return None, error_msg
-    def save_with_timestamp(self, video_path: str, output_dir: str = "example") -> Optional[str]:
-        """
-        動画をタイムスタンプ付きで保存
-        Args:
-            video_path: 生成された動画のパス
-            output_dir: 保存先ディレクトリ
-        Returns:
-            str: 保存されたファイルパス（エラー時はNone）
-        """
-        try:
-            # 動画パスの確認
-            if not video_path or not os.path.exists(video_path):
-                self.logger.error(f"動画ファイルが見つかりません: {video_path}")
-                return None
-            # 出力ディレクトリの作成
-            os.makedirs(output_dir, exist_ok=True)
-            # YYYY-MM-DD_HH-MM-SS.mp4 形式で保存
-            timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-            output_path = os.path.join(output_dir, f"{timestamp}.mp4")
-            # ファイルをコピー
-            shutil.copy2(video_path, output_path)
-            # ファイルサイズの確認
-            file_size = os.path.getsize(output_path)
-            self.logger.info(f"保存完了: {output_path} (サイズ: {file_size:,} bytes)")
-            return output_path
-        except Exception as e:
-            self.logger.error(f"保存エラー: {e}")
-            return None
-    def process_with_save(self, audio_path: str, image_path: str, output_dir: str = "example") -> Tuple[Optional[str], str]:
-        """
-        動画生成と保存を一括実行
-        Args:
-            audio_path: 音声ファイルのパス
-            image_path: 画像ファイルのパス
-            output_dir: 保存先ディレクトリ
-        Returns:
-            tuple: (saved_path, status_message)
-        """
-        # 動画生成
-        result = self.generate_video(audio_path, image_path)
-        if result[0] is None:
-            return None, result[1]
-        video_data, status = result
-        # 動画の保存
-        if isinstance(video_data, dict) and 'video' in video_data:
-            saved_path = self.save_with_timestamp(video_data['video'], output_dir)
-            if saved_path:
-                return saved_path, f"{status}\n保存先: {saved_path}"
-            else:
-                return None, f"{status}\n保存に失敗しました"
-        else:
-            return None, f"予期しないレスポンス形式: {video_data}"
-def main():
-    """テストスクリプトのメイン関数"""
-    # ロギング設定
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s - %(message)s',
-        datefmt='%Y-%m-%d %H:%M:%S'
-    )
-    # クライアント初期化
-    try:
-        client = TalkingHeadAPIClient()
-    except Exception as e:
-        logging.error(f"クライアント初期化失敗: {e}")
-        return
-    # サンプルファイルを使用
-    audio_path = "example/audio.wav"
-    image_path = "example/image.png"
-    # ファイルの存在確認
-    if not os.path.exists(audio_path):
-        logging.error(f"音声ファイルが見つかりません: {audio_path}")
-        return
-    if not os.path.exists(image_path):
-        logging.error(f"画像ファイルが見つかりません: {image_path}")
-        return
-    try:
-        # 動画生成と保存
-        saved_path, status = client.process_with_save(audio_path, image_path)
-        if saved_path:
-            print(f"\n✅ 成功!")
-            print(f"ステータス: {status}")
-            print(f"動画を確認してください: {saved_path}")
-        else:
-            print(f"\n❌ 失敗")
-            print(f"ステータス: {status}")
-    except KeyboardInterrupt:
-        logging.info("処理を中断しました")
-    except Exception as e:
-        logging.error(f"予期しないエラー: {e}")
-        import traceback
-        traceback.print_exc()
-if __name__ == "__main__":
-    main()

test_performance.py DELETED Viewed

@@ -1,175 +0,0 @@
-#!/usr/bin/env python3
-"""
-パフォーマンステストスクリプト
-動画生成の各ステップの実行時間を計測
-"""
-import time
-import logging
-from test_api_client import TalkingHeadAPIClient
-import os
-# ロギング設定
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S'
-)
-class TimingStats:
-    def __init__(self):
-        self.stats = {}
-        self.start_times = {}
-    def start(self, name):
-        self.start_times[name] = time.time()
-    def end(self, name):
-        if name in self.start_times:
-            duration = time.time() - self.start_times[name]
-            self.stats[name] = duration
-            return duration
-        return None
-    def report(self):
-        print("\n=== パフォーマンス計測結果 ===")
-        total_time = sum(self.stats.values())
-        for name, duration in self.stats.items():
-            percentage = (duration / total_time) * 100 if total_time > 0 else 0
-            print(f"{name}: {duration:.2f}秒 ({percentage:.1f}%)")
-        print(f"\n合計時間: {total_time:.2f}秒")
-        # 音声ファイルの長さを取得
-        try:
-            import librosa
-            audio_path = "example/audio.wav"
-            y, sr = librosa.load(audio_path, sr=None)
-            audio_duration = len(y) / sr
-            print(f"音声ファイルの長さ: {audio_duration:.2f}秒")
-            print(f"処理時間比率: {total_time/audio_duration:.2f}x")
-        except Exception as e:
-            print(f"音声長さの取得失敗: {e}")
-def test_performance():
-    """パフォーマンステストを実行"""
-    timer = TimingStats()
-    # 全体の開始時間
-    timer.start("全体処理")
-    # クライアント初期化
-    timer.start("API接続")
-    try:
-        client = TalkingHeadAPIClient()
-        timer.end("API接続")
-    except Exception as e:
-        logging.error(f"クライアント初期化失敗: {e}")
-        return
-    # サンプルファイル
-    audio_path = "example/audio.wav"
-    image_path = "example/image.png"
-    # ファイル情報を表示
-    audio_size = os.path.getsize(audio_path) / 1024 / 1024  # MB
-    image_size = os.path.getsize(image_path) / 1024 / 1024  # MB
-    print(f"\n入力ファイル情報:")
-    print(f"- 音声: {audio_path} ({audio_size:.2f} MB)")
-    print(f"- 画像: {image_path} ({image_size:.2f} MB)")
-    # 動画生成
-    timer.start("動画生成（API呼び出し）")
-    try:
-        result = client.generate_video(audio_path, image_path)
-        video_data, status = result
-        timer.end("動画生成（API呼び出し）")
-        if video_data:
-            # 保存処理
-            timer.start("動画保存")
-            if isinstance(video_data, dict) and 'video' in video_data:
-                saved_path = client.save_with_timestamp(video_data['video'])
-                timer.end("動画保存")
-                # 出力ファイル情報
-                output_size = os.path.getsize(saved_path) / 1024 / 1024  # MB
-                print(f"\n出力ファイル情報:")
-                print(f"- 動画: {saved_path} ({output_size:.2f} MB)")
-            timer.end("全体処理")
-            timer.report()
-            print(f"\n✅ テスト成功!")
-            print(f"ステータス: {status}")
-        else:
-            print(f"\n❌ テスト失敗")
-            print(f"ステータス: {status}")
-    except Exception as e:
-        logging.error(f"エラー発生: {e}")
-        import traceback
-        traceback.print_exc()
-def test_multiple_runs(runs=3):
-    """複数回実行して平均時間を計測"""
-    print(f"\n=== {runs}回連続実行テスト ===")
-    times = []
-    for i in range(runs):
-        print(f"\n--- 実行 {i+1}/{runs} ---")
-        start = time.time()
-        try:
-            client = TalkingHeadAPIClient()
-            result = client.generate_video("example/audio.wav", "example/image.png")
-            if result[0]:
-                duration = time.time() - start
-                times.append(duration)
-                print(f"実行時間: {duration:.2f}秒")
-        except Exception as e:
-            print(f"エラー: {e}")
-    if times:
-        avg_time = sum(times) / len(times)
-        min_time = min(times)
-        max_time = max(times)
-        print(f"\n=== 統計 ===")
-        print(f"平均時間: {avg_time:.2f}秒")
-        print(f"最小時間: {min_time:.2f}秒")
-        print(f"最大時間: {max_time:.2f}秒")
-def analyze_bottlenecks():
-    """ボトルネック分析のための詳細テスト"""
-    print("\n=== ボトルネック分析 ===")
-    # ローカルファイルの読み込み時間
-    start = time.time()
-    with open("example/audio.wav", "rb") as f:
-        audio_data = f.read()
-    with open("example/image.png", "rb") as f:
-        image_data = f.read()
-    local_read_time = time.time() - start
-    print(f"ローカルファイル読み込み: {local_read_time:.3f}秒")
-    # ネットワーク遅延の推定（Hugging Face Spaceへのping相当）
-    import requests
-    start = time.time()
-    try:
-        response = requests.get("https://o-ken5481-talkingavater-bgk.hf.space", timeout=10)
-        network_time = time.time() - start
-        print(f"ネットワーク遅延（推定）: {network_time:.3f}秒")
-    except:
-        print("ネットワーク遅延の測定失敗")
-if __name__ == "__main__":
-    print("DittoTalkingHead パフォーマンステスト")
-    print("=" * 50)
-    # 1. 詳細な時間計測
-    test_performance()
-    # 2. 複数回実行テスト
-    # test_multiple_runs(3)
-    # 3. ボトルネック分析
-    analyze_bottlenecks()

test_performance_optimized.py DELETED Viewed

@@ -1,375 +0,0 @@
-"""
-Performance test script for Phase 3 optimizations
-Tests various optimization strategies and measures performance improvements
-"""
-import time
-import os
-import sys
-import numpy as np
-from pathlib import Path
-import torch
-from typing import Dict, List, Tuple
-import json
-from datetime import datetime
-# Add project root to path
-sys.path.append(str(Path(__file__).parent))
-from model_manager import ModelManager
-from core.optimization import (
-    FixedResolutionProcessor,
-    GPUOptimizer,
-    AvatarCache,
-    AvatarTokenManager,
-    ColdStartOptimizer
-)
-class PerformanceTester:
-    """Performance testing framework for DittoTalkingHead optimizations"""
-    def __init__(self):
-        self.results = []
-        self.resolution_optimizer = FixedResolutionProcessor()
-        self.gpu_optimizer = GPUOptimizer()
-        self.cold_start_optimizer = ColdStartOptimizer()
-        self.avatar_cache = AvatarCache()
-        # Test configurations
-        self.test_configs = {
-            "audio_durations": [4, 8, 16, 32],  # seconds
-            "resolutions": [256, 320, 512],  # will test 320 fixed vs others
-            "optimization_levels": ["none", "gpu_only", "resolution_only", "full"]
-        }
-    def setup_test_environment(self):
-        """Set up test environment"""
-        print("=== Setting up test environment ===")
-        # Initialize models
-        USE_PYTORCH = True
-        model_manager = ModelManager(cache_dir="/tmp/ditto_models", use_pytorch=USE_PYTORCH)
-        if not model_manager.setup_models():
-            raise RuntimeError("Failed to setup models")
-        # Initialize SDK
-        if USE_PYTORCH:
-            data_root = "./checkpoints/ditto_pytorch"
-            cfg_pkl = "./checkpoints/ditto_cfg/v0.4_hubert_cfg_pytorch.pkl"
-        else:
-            data_root = "./checkpoints/ditto_trt_Ampere_Plus"
-            cfg_pkl = "./checkpoints/ditto_cfg/v0.4_hubert_cfg_trt.pkl"
-        from stream_pipeline_offline import StreamSDK
-        self.sdk = StreamSDK(cfg_pkl, data_root)
-        print("✅ Test environment ready")
-    def generate_test_data(self, duration: int) -> Tuple[str, str]:
-        """
-        Generate test audio and image files
-        Args:
-            duration: Audio duration in seconds
-        Returns:
-            Tuple of (audio_path, image_path)
-        """
-        import tempfile
-        from scipy.io import wavfile
-        from PIL import Image
-        # Generate test audio (sine wave)
-        sample_rate = 16000
-        t = np.linspace(0, duration, duration * sample_rate)
-        audio_data = np.sin(2 * np.pi * 440 * t).astype(np.float32) * 0.5
-        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
-            wavfile.write(tmp.name, sample_rate, audio_data)
-            audio_path = tmp.name
-        # Generate test image
-        img = Image.new('RGB', (512, 512), color='white')
-        # Add some features
-        from PIL import ImageDraw
-        draw = ImageDraw.Draw(img)
-        draw.ellipse([156, 156, 356, 356], fill='lightblue')  # Face
-        draw.ellipse([200, 200, 220, 220], fill='black')  # Left eye
-        draw.ellipse([292, 200, 312, 220], fill='black')  # Right eye
-        draw.arc([220, 250, 292, 300], 0, 180, fill='red', width=3)  # Mouth
-        with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
-            img.save(tmp.name)
-            image_path = tmp.name
-        return audio_path, image_path
-    def test_baseline(self, audio_duration: int) -> Dict[str, float]:
-        """
-        Test baseline performance without optimizations
-        Args:
-            audio_duration: Test audio duration in seconds
-        Returns:
-            Performance metrics
-        """
-        print(f"\n--- Testing baseline (no optimizations, {audio_duration}s audio) ---")
-        audio_path, image_path = self.generate_test_data(audio_duration)
-        try:
-            # Disable optimizations
-            torch.backends.cudnn.benchmark = False
-            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp:
-                output_path = tmp.name
-            # Run without optimizations
-            from inference import run, seed_everything
-            seed_everything(1024)
-            start_time = time.time()
-            run(self.sdk, audio_path, image_path, output_path)
-            process_time = time.time() - start_time
-            # Clean up
-            for path in [audio_path, image_path, output_path]:
-                if os.path.exists(path):
-                    os.unlink(path)
-            return {
-                "audio_duration": audio_duration,
-                "process_time": process_time,
-                "realtime_factor": process_time / audio_duration,
-                "optimization": "none"
-            }
-        except Exception as e:
-            print(f"Error in baseline test: {e}")
-            return None
-    def test_gpu_optimization(self, audio_duration: int) -> Dict[str, float]:
-        """Test with GPU optimizations only"""
-        print(f"\n--- Testing GPU optimization ({audio_duration}s audio) ---")
-        audio_path, image_path = self.generate_test_data(audio_duration)
-        try:
-            # Apply GPU optimizations
-            self.gpu_optimizer._setup_cuda_optimizations()
-            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp:
-                output_path = tmp.name
-            from inference import run, seed_everything
-            seed_everything(1024)
-            start_time = time.time()
-            run(self.sdk, audio_path, image_path, output_path)
-            process_time = time.time() - start_time
-            # Clean up
-            for path in [audio_path, image_path, output_path]:
-                if os.path.exists(path):
-                    os.unlink(path)
-            return {
-                "audio_duration": audio_duration,
-                "process_time": process_time,
-                "realtime_factor": process_time / audio_duration,
-                "optimization": "gpu_only"
-            }
-        except Exception as e:
-            print(f"Error in GPU optimization test: {e}")
-            return None
-    def test_resolution_optimization(self, audio_duration: int) -> Dict[str, float]:
-        """Test with resolution optimization (320x320)"""
-        print(f"\n--- Testing resolution optimization ({audio_duration}s audio) ---")
-        audio_path, image_path = self.generate_test_data(audio_duration)
-        try:
-            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp:
-                output_path = tmp.name
-            # Apply resolution optimization
-            setup_kwargs = {
-                "max_size": self.resolution_optimizer.get_max_dim(),  # 320
-                "sampling_timesteps": self.resolution_optimizer.get_diffusion_steps()  # 25
-            }
-            from inference import run, seed_everything
-            seed_everything(1024)
-            start_time = time.time()
-            run(self.sdk, audio_path, image_path, output_path,
-                more_kwargs={"setup_kwargs": setup_kwargs})
-            process_time = time.time() - start_time
-            # Clean up
-            for path in [audio_path, image_path, output_path]:
-                if os.path.exists(path):
-                    os.unlink(path)
-            return {
-                "audio_duration": audio_duration,
-                "process_time": process_time,
-                "realtime_factor": process_time / audio_duration,
-                "optimization": "resolution_only",
-                "resolution": f"{self.resolution_optimizer.get_max_dim()}x{self.resolution_optimizer.get_max_dim()}"
-            }
-        except Exception as e:
-            print(f"Error in resolution optimization test: {e}")
-            return None
-    def test_full_optimization(self, audio_duration: int) -> Dict[str, float]:
-        """Test with all optimizations enabled"""
-        print(f"\n--- Testing full optimization ({audio_duration}s audio) ---")
-        audio_path, image_path = self.generate_test_data(audio_duration)
-        try:
-            # Apply all optimizations
-            self.gpu_optimizer._setup_cuda_optimizations()
-            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp:
-                output_path = tmp.name
-            setup_kwargs = {
-                "max_size": self.resolution_optimizer.get_max_dim(),
-                "sampling_timesteps": self.resolution_optimizer.get_diffusion_steps()
-            }
-            from inference import run, seed_everything
-            seed_everything(1024)
-            start_time = time.time()
-            run(self.sdk, audio_path, image_path, output_path,
-                more_kwargs={"setup_kwargs": setup_kwargs})
-            process_time = time.time() - start_time
-            # Clean up
-            for path in [audio_path, image_path, output_path]:
-                if os.path.exists(path):
-                    os.unlink(path)
-            return {
-                "audio_duration": audio_duration,
-                "process_time": process_time,
-                "realtime_factor": process_time / audio_duration,
-                "optimization": "full",
-                "resolution": f"{self.resolution_optimizer.get_max_dim()}x{self.resolution_optimizer.get_max_dim()}",
-                "gpu_optimized": True
-            }
-        except Exception as e:
-            print(f"Error in full optimization test: {e}")
-            return None
-    def run_comprehensive_test(self):
-        """Run comprehensive performance tests"""
-        print("\n" + "="*60)
-        print("Starting comprehensive performance test")
-        print("="*60)
-        self.setup_test_environment()
-        # Test different audio durations and optimization levels
-        for duration in self.test_configs["audio_durations"]:
-            print(f"\n{'='*60}")
-            print(f"Testing with {duration}s audio")
-            print(f"{'='*60}")
-            # Run tests with different optimization levels
-            tests = [
-                ("Baseline", self.test_baseline),
-                ("GPU Only", self.test_gpu_optimization),
-                ("Resolution Only", self.test_resolution_optimization),
-                ("Full Optimization", self.test_full_optimization)
-            ]
-            duration_results = []
-            for test_name, test_func in tests:
-                result = test_func(duration)
-                if result:
-                    duration_results.append(result)
-                    print(f"{test_name}: {result['process_time']:.2f}s (RT factor: {result['realtime_factor']:.2f}x)")
-                # Clear GPU cache between tests
-                self.gpu_optimizer.clear_cache()
-                time.sleep(1)  # Brief pause
-            self.results.extend(duration_results)
-        # Generate report
-        self.generate_report()
-    def generate_report(self):
-        """Generate performance test report"""
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        report_file = f"performance_report_{timestamp}.json"
-        # Calculate improvements
-        summary = {
-            "test_date": timestamp,
-            "gpu_info": self.gpu_optimizer.get_memory_stats(),
-            "optimization_config": self.resolution_optimizer.get_performance_config(),
-            "results": self.results
-        }
-        # Calculate average improvements by optimization type
-        avg_improvements = {}
-        for opt_type in ["gpu_only", "resolution_only", "full"]:
-            opt_results = [r for r in self.results if r.get("optimization") == opt_type]
-            baseline_results = [r for r in self.results if r.get("optimization") == "none"
-                              and r["audio_duration"] == opt_results[0]["audio_duration"]]
-            if opt_results and baseline_results:
-                avg_improvement = 0
-                for opt_r in opt_results:
-                    baseline_r = next((b for b in baseline_results
-                                     if b["audio_duration"] == opt_r["audio_duration"]), None)
-                    if baseline_r:
-                        improvement = (baseline_r["process_time"] - opt_r["process_time"]) / baseline_r["process_time"] * 100
-                        avg_improvement += improvement
-                avg_improvements[opt_type] = avg_improvement / len(opt_results)
-        summary["average_improvements"] = avg_improvements
-        # Save report
-        with open(report_file, 'w') as f:
-            json.dump(summary, f, indent=2)
-        # Print summary
-        print("\n" + "="*60)
-        print("PERFORMANCE TEST SUMMARY")
-        print("="*60)
-        print("\nAverage Performance Improvements:")
-        for opt_type, improvement in avg_improvements.items():
-            print(f"- {opt_type}: {improvement:.1f}% faster")
-        print(f"\nDetailed results saved to: {report_file}")
-        # Check if we meet the target (16s audio in <10s)
-        target_results = [r for r in self.results
-                         if r.get("optimization") == "full" and r["audio_duration"] == 16]
-        if target_results:
-            meets_target = target_results[0]["process_time"] <= 10.0
-            print(f"\n✅ Target Achievement (16s audio < 10s): {'YES' if meets_target else 'NO'}")
-            print(f"   Actual time: {target_results[0]['process_time']:.2f}s")
-if __name__ == "__main__":
-    import tempfile
-    tester = PerformanceTester()
-    tester.run_comprehensive_test()