Spaces:

BBBAKERY
/

ustwo-api

Sleeping

App Files Files Community

ustwo-api / scripts /run_pipeline.py

asdfasdfqrqwer

Deploy from GitHub 2026-04-23T03:56:31Z

c857b85 2 months ago

Raw

History Blame Contribute Delete

3.73 kB

	#!/usr/bin/env python3
	"""Stage 1 → Stage 2 E2E 파이프라인 실행 스크립트.

	Usage:
	python scripts/run_pipeline.py # 기본 샘플 사용
	python scripts/run_pipeline.py data/samples/my_call.wav # 특정 파일 지정
	python scripts/run_pipeline.py --stage2-only # Stage 2만 실행 (stage1_output.json 필요)
	"""

	from __future__ import annotations

	import argparse
	import json
	import logging
	import sys
	from pathlib import Path

	# 프로젝트 루트를 sys.path에 추가
	PROJECT_ROOT = Path(__file__).parent.parent
	sys.path.insert(0, str(PROJECT_ROOT))

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	)
	logger = logging.getLogger("run_pipeline")


	def run_stage1(audio_path: str) -> dict:
	"""Stage 1 실행: 화자분리 + ASR."""
	from src.stage1.process import process as stage1_process

	logger.info("=" * 60)
	logger.info("Stage 1 시작: %s", audio_path)
	logger.info("=" * 60)

	result = stage1_process(audio_path)

	logger.info(
	"Stage 1 완료: %d segments, %.1fs 처리시간",
	len(result.segments),
	result.processing_info.processing_time_sec,
	)
	return result


	def run_stage2(stage1_output) -> dict:
	"""Stage 2 실행: 감정 분석."""
	from src.stage2.process import process as stage2_process

	logger.info("=" * 60)
	logger.info("Stage 2 시작: %s (%d segments)", stage1_output.call_id, len(stage1_output.segments))
	logger.info("=" * 60)

	result = stage2_process(stage1_output)

	logger.info(
	"Stage 2 완료: %d emotions, speakers=%s",
	len(result.emotions),
	list(result.speaker_summaries.keys()),
	)

	# 결과 요약 출력
	for speaker_id, summary in result.speaker_summaries.items():
	logger.info(
	" %s: dominant=%s (%.1f%%), avg_confidence=%.2f",
	speaker_id,
	summary.dominant_emotion,
	summary.emotion_distribution.get(summary.dominant_emotion, 0) * 100,
	summary.avg_confidence,
	)

	return result


	def main():
	parser = argparse.ArgumentParser(description="Stage 1 → Stage 2 파이프라인 실행")
	parser.add_argument(
	"audio_path",
	nargs="?",
	default="data/samples/sample_data.wav",
	help="입력 오디오 파일 경로 (기본: data/samples/sample_data.wav)",
	)
	parser.add_argument(
	"--stage2-only",
	action="store_true",
	help="Stage 2만 실행 (data/stage1_output.json 필요)",
	)
	args = parser.parse_args()

	if args.stage2_only:
	# Stage 2만 실행
	from src.common.schemas import Stage1Output

	stage1_path = PROJECT_ROOT / "data" / "stage1_output.json"
	if not stage1_path.exists():
	logger.error("data/stage1_output.json 없음. Stage 1을 먼저 실행하세요.")
	sys.exit(1)

	stage1_output = Stage1Output.model_validate_json(stage1_path.read_text())
	run_stage2(stage1_output)
	else:
	# Stage 1 → Stage 2 전체 실행
	audio_path = str(PROJECT_ROOT / args.audio_path) if not Path(args.audio_path).is_absolute() else args.audio_path

	if not Path(audio_path).exists():
	logger.error("오디오 파일 없음: %s", audio_path)
	sys.exit(1)

	stage1_output = run_stage1(audio_path)
	run_stage2(stage1_output)

	logger.info("=" * 60)
	logger.info("파이프라인 완료!")
	logger.info(" Stage 1 출력: data/stage1_output.json")
	logger.info(" Stage 2 출력: data/stage2_output.json")
	logger.info("=" * 60)


	if __name__ == "__main__":
	main()