Upload folder using huggingface_hub

77c8117 verified about 2 months ago

10.2 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	==============================================================================
	실시간 호출어 감지 스크립트
	==============================================================================
	마이크 입력을 받아 학습된 커스텀 모델로 실시간 호출어 감지를 수행합니다.

	사용법:
	python run_live.py --model ./my_model.onnx --threshold 0.5
	"""

	import os
	import sys
	import time
	import argparse
	from pathlib import Path
	from datetime import datetime
	from typing import Optional

	import numpy as np

	# openWakeWord 라이브러리 경로 추가
	sys.path.insert(0, str(Path(__file__).parent.parent / "openWakeWord"))

	try:
	from openwakeword.model import Model
	print("✅ openwakeword.Model 임포트 성공")
	except ImportError as e:
	print(f"❌ openwakeword 임포트 실패: {e}")
	print(" -> 'pip install openwakeword' 실행 필요")
	sys.exit(1)

	try:
	import sounddevice as sd
	print("✅ sounddevice 임포트 성공")
	except ImportError as e:
	print(f"❌ sounddevice 임포트 실패: {e}")
	print(" -> 'pip install sounddevice' 실행 필요")
	sys.exit(1)


	# ============================================
	# 설정
	# ============================================
	DEFAULT_MODEL_PATH = "./my_model.onnx"
	SAMPLE_RATE = 16000 # openWakeWord 요구사항: 16kHz
	CHUNK_SIZE = 1280 # 80ms 프레임 (16000 * 0.08)
	DEFAULT_THRESHOLD = 0.5 # 감지 임계값


	def print_banner():
	"""시작 배너 출력"""
	print("\n" + "="*60)
	print("🎤 한국어 호출어 실시간 감지 시스템")
	print("="*60)
	print("모델을 로드하고 마이크 입력을 시작합니다...")
	print("종료하려면 Ctrl+C를 누르세요.\n")


	def check_microphone_permission() -> bool:
	"""
	마이크 권한 확인

	Returns:
	마이크 사용 가능 여부
	"""
	print("🔍 마이크 권한 확인 중...")

	try:
	# 사용 가능한 오디오 디바이스 목록 확인
	devices = sd.query_devices()

	# 입력 디바이스 찾기
	input_devices = [d for d in devices if d['max_input_channels'] > 0]

	if not input_devices:
	print("❌ 사용 가능한 입력 디바이스(마이크)가 없습니다!")
	print(" 시스템의 오디오 설정을 확인해주세요.")
	return False

	# 기본 입력 디바이스 정보 출력
	default_input = sd.query_devices(kind='input')
	print(f"✅ 기본 마이크: {default_input['name']}")
	print(f" 최대 입력 채널: {default_input['max_input_channels']}")
	print(f" 기본 샘플 레이트: {default_input['default_samplerate']}")

	# 짧은 테스트 녹음
	print(" 마이크 테스트 녹음 중... ", end="", flush=True)
	test_recording = sd.rec(
	int(0.1 * SAMPLE_RATE), # 100ms
	samplerate=SAMPLE_RATE,
	channels=1,
	dtype='int16'
	)
	sd.wait()
	print("✅ 성공!")

	return True

	except Exception as e:
	print(f"❌ 마이크 권한 확인 실패: {e}")
	print("\n💡 해결 방법:")
	print(" 1. 마이크가 연결되어 있는지 확인")
	print(" 2. 시스템 설정에서 마이크 권한 허용")
	print(" 3. Linux: 'sudo usermod -a -G audio $USER' 실행 후 재로그인")
	return False


	def play_beep():
	"""
	감지 시 비프음 재생

	간단한 사인파 비프음을 생성하여 재생합니다.
	"""
	try:
	duration = 0.15 # 150ms
	frequency = 880 # Hz (A5 음계)

	t = np.linspace(0, duration, int(SAMPLE_RATE * duration), False)
	beep = np.sin(frequency * 2 * np.pi * t) * 0.3

	# Fade in/out 적용
	fade_samples = int(0.01 * SAMPLE_RATE)
	beep[:fade_samples] *= np.linspace(0, 1, fade_samples)
	beep[-fade_samples:] *= np.linspace(1, 0, fade_samples)

	sd.play(beep.astype(np.float32), SAMPLE_RATE)

	except Exception as e:
	# 비프음 실패해도 계속 진행
	pass


	def run_detection(
	model_path: str,
	threshold: float = 0.5,
	debounce_time: float = 1.0,
	log_file: Optional[str] = None
	):
	"""
	실시간 호출어 감지 실행

	Args:
	model_path: ONNX 모델 파일 경로
	threshold: 감지 임계값 (0~1)
	debounce_time: 연속 감지 방지 시간 (초)
	log_file: 감지 로그 파일 경로 (선택사항)
	"""
	# 모델 파일 확인
	if not os.path.exists(model_path):
	print(f"❌ 모델 파일을 찾을 수 없습니다: {model_path}")
	print(" 먼저 train_model.py를 실행하여 모델을 학습하세요.")
	sys.exit(1)

	# 모델 이름 추출 (파일명에서)
	model_name = Path(model_path).stem

	print(f"\n📁 모델 로드 중: {model_path}")
	print(f"🎯 모델 이름: {model_name}")
	print(f"🔊 감지 임계값: {threshold}")
	print(f"⏱️ Debounce 시간: {debounce_time}초")

	# openWakeWord 모델 로드
	# inference_framework을 'onnx'로 설정하여 ONNX 모델 사용
	try:
	oww_model = Model(
	wakeword_models=[model_path],
	inference_framework='onnx'
	)
	print("✅ 모델 로드 완료!")
	except Exception as e:
	print(f"❌ 모델 로드 실패: {e}")
	sys.exit(1)

	# 로그 파일 설정
	log_handle = None
	if log_file:
	log_handle = open(log_file, 'a', encoding='utf-8')
	log_handle.write(f"\n--- 세션 시작: {datetime.now()} ---\n")
	print(f"📝 로그 파일: {log_file}")

	# 감지 카운터
	detection_count = 0
	last_detection_time = 0

	print("\n" + "-"*60)
	print("🎧 실시간 감지 시작! 호출어를 말해보세요.")
	print("-"*60 + "\n")

	# 콜백 함수 정의
	def audio_callback(indata, frames, time_info, status):
	nonlocal detection_count, last_detection_time

	if status:
	print(f"⚠️ 오디오 상태: {status}")

	# 오디오 데이터 전처리 (int16으로 변환)
	audio_data = (indata[:, 0] * 32767).astype(np.int16)

	# 예측 수행
	prediction = oww_model.predict(audio_data)

	# 모델 점수 확인
	score = prediction.get(model_name, 0)

	# 임계값 초과 및 debounce 확인
	current_time = time.time()
	if score >= threshold and (current_time - last_detection_time) > debounce_time:
	detection_count += 1
	last_detection_time = current_time

	# 콘솔 출력
	timestamp = datetime.now().strftime("%H:%M:%S")
	print(f"\n🎯 [{timestamp}] DETECTED! (Score: {score:.3f}) - #{detection_count}")

	# 비프음 재생
	play_beep()

	# 로그 기록
	if log_handle:
	log_handle.write(f"[{timestamp}] Score: {score:.3f}\n")
	log_handle.flush()

	# 현재 점수 실시간 표시 (높은 점수만)
	if score > 0.1:
	bar_length = int(score * 30)
	bar = "█" * bar_length + "░" * (30 - bar_length)
	print(f"\r[{bar}] {score:.3f}", end="", flush=True)

	# 오디오 스트림 시작
	try:
	with sd.InputStream(
	samplerate=SAMPLE_RATE,
	channels=1,
	dtype='float32',
	blocksize=CHUNK_SIZE,
	callback=audio_callback
	):
	print("(마이크 스트림 활성화됨)\n")

	# 무한 루프 (Ctrl+C로 종료)
	while True:
	time.sleep(0.1)

	except KeyboardInterrupt:
	print("\n\n" + "="*60)
	print("🛑 감지 종료")
	print("="*60)
	print(f"📊 총 감지 횟수: {detection_count}회")

	if log_handle:
	log_handle.write(f"--- 세션 종료: {datetime.now()} ---\n")
	log_handle.write(f"총 감지: {detection_count}회\n")
	log_handle.close()
	print(f"📝 로그 저장됨: {log_file}")

	except Exception as e:
	print(f"\n❌ 오류 발생: {e}")
	if log_handle:
	log_handle.close()
	raise


	def main():
	"""메인 함수"""
	parser = argparse.ArgumentParser(
	description="실시간 호출어 감지 (마이크 입력)"
	)
	parser.add_argument(
	"--model", "-m",
	type=str,
	default=DEFAULT_MODEL_PATH,
	help=f"ONNX 모델 파일 경로 (기본값: {DEFAULT_MODEL_PATH})"
	)
	parser.add_argument(
	"--threshold", "-t",
	type=float,
	default=DEFAULT_THRESHOLD,
	help=f"감지 임계값 0~1 (기본값: {DEFAULT_THRESHOLD})"
	)
	parser.add_argument(
	"--debounce", "-d",
	type=float,
	default=1.0,
	help="연속 감지 방지 시간(초) (기본값: 1.0)"
	)
	parser.add_argument(
	"--log", "-l",
	type=str,
	default=None,
	help="감지 로그 파일 경로 (선택사항)"
	)
	parser.add_argument(
	"--list-devices",
	action="store_true",
	help="사용 가능한 오디오 디바이스 목록 출력"
	)

	args = parser.parse_args()

	# 디바이스 목록 출력 옵션
	if args.list_devices:
	print("\n📋 사용 가능한 오디오 디바이스:")
	print("-"*60)
	print(sd.query_devices())
	return

	# 배너 출력
	print_banner()

	# 마이크 권한 확인
	if not check_microphone_permission():
	sys.exit(1)

	# 실시간 감지 실행
	run_detection(
	model_path=args.model,
	threshold=args.threshold,
	debounce_time=args.debounce,
	log_file=args.log
	)


	if __name__ == "__main__":
	main()