#!/usr/bin/env python3 """Stage 1 → Stage 2 E2E 파이프라인 실행 스크립트. Usage: python scripts/run_pipeline.py # 기본 샘플 사용 python scripts/run_pipeline.py data/samples/my_call.wav # 특정 파일 지정 python scripts/run_pipeline.py --stage2-only # Stage 2만 실행 (stage1_output.json 필요) """ from __future__ import annotations import argparse import json import logging import sys from pathlib import Path # 프로젝트 루트를 sys.path에 추가 PROJECT_ROOT = Path(__file__).parent.parent sys.path.insert(0, str(PROJECT_ROOT)) logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) logger = logging.getLogger("run_pipeline") def run_stage1(audio_path: str) -> dict: """Stage 1 실행: 화자분리 + ASR.""" from src.stage1.process import process as stage1_process logger.info("=" * 60) logger.info("Stage 1 시작: %s", audio_path) logger.info("=" * 60) result = stage1_process(audio_path) logger.info( "Stage 1 완료: %d segments, %.1fs 처리시간", len(result.segments), result.processing_info.processing_time_sec, ) return result def run_stage2(stage1_output) -> dict: """Stage 2 실행: 감정 분석.""" from src.stage2.process import process as stage2_process logger.info("=" * 60) logger.info("Stage 2 시작: %s (%d segments)", stage1_output.call_id, len(stage1_output.segments)) logger.info("=" * 60) result = stage2_process(stage1_output) logger.info( "Stage 2 완료: %d emotions, speakers=%s", len(result.emotions), list(result.speaker_summaries.keys()), ) # 결과 요약 출력 for speaker_id, summary in result.speaker_summaries.items(): logger.info( " %s: dominant=%s (%.1f%%), avg_confidence=%.2f", speaker_id, summary.dominant_emotion, summary.emotion_distribution.get(summary.dominant_emotion, 0) * 100, summary.avg_confidence, ) return result def main(): parser = argparse.ArgumentParser(description="Stage 1 → Stage 2 파이프라인 실행") parser.add_argument( "audio_path", nargs="?", default="data/samples/sample_data.wav", help="입력 오디오 파일 경로 (기본: data/samples/sample_data.wav)", ) parser.add_argument( "--stage2-only", action="store_true", help="Stage 2만 실행 (data/stage1_output.json 필요)", ) args = parser.parse_args() if args.stage2_only: # Stage 2만 실행 from src.common.schemas import Stage1Output stage1_path = PROJECT_ROOT / "data" / "stage1_output.json" if not stage1_path.exists(): logger.error("data/stage1_output.json 없음. Stage 1을 먼저 실행하세요.") sys.exit(1) stage1_output = Stage1Output.model_validate_json(stage1_path.read_text()) run_stage2(stage1_output) else: # Stage 1 → Stage 2 전체 실행 audio_path = str(PROJECT_ROOT / args.audio_path) if not Path(args.audio_path).is_absolute() else args.audio_path if not Path(audio_path).exists(): logger.error("오디오 파일 없음: %s", audio_path) sys.exit(1) stage1_output = run_stage1(audio_path) run_stage2(stage1_output) logger.info("=" * 60) logger.info("파이프라인 완료!") logger.info(" Stage 1 출력: data/stage1_output.json") logger.info(" Stage 2 출력: data/stage2_output.json") logger.info("=" * 60) if __name__ == "__main__": main()