Spaces:
Running
Running
Commit ·
1056960
1
Parent(s): 35c6482
Add UTAU WebUI project with LFS support for voice files
Browse files- .gitignore +11 -0
- .python-version +1 -0
- COMPRESSION_REPORT.md +1 -0
- Makefile +168 -0
- README.md +211 -1
- app.py +650 -0
- compressed_utau_engine.py +208 -0
- pyproject.toml +32 -0
- requirements.txt +809 -0
- straycat.py +825 -0
- test_compressed_voicebank.py +158 -0
- utau_engine.py +467 -0
- uv.lock +0 -0
- voice/hanseol_CVC_compressed.h5 +3 -0
- voice/test_voice.sc.npz +3 -0
- voice/test_voice.wav +0 -0
- voice_data_converter.py +264 -0
.gitignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
.DS_Store
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
COMPRESSION_REPORT.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
Makefile
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.PHONY: help setup install compress run clean test dev check-deps status
|
| 2 |
+
|
| 3 |
+
# 기본 설정
|
| 4 |
+
PYTHON := uv run python
|
| 5 |
+
UV := uv
|
| 6 |
+
VOICEBANK_DIR := voice/hanseol CVC
|
| 7 |
+
COMPRESSED_FILE := voice/hanseol_CVC_compressed.h5
|
| 8 |
+
PORT := 7860
|
| 9 |
+
|
| 10 |
+
# 기본 타겟
|
| 11 |
+
help: ## 도움말 출력
|
| 12 |
+
@echo "🎵 UTAU WebUI - 개발 환경 자동화 도구"
|
| 13 |
+
@echo ""
|
| 14 |
+
@echo "📋 사용 가능한 명령어:"
|
| 15 |
+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-15s\033[0m %s\n", $$1, $$2}'
|
| 16 |
+
@echo ""
|
| 17 |
+
@echo "🚀 빠른 시작:"
|
| 18 |
+
@echo " 1. make setup # 개발 환경 설정"
|
| 19 |
+
@echo " 2. make compress # 보이스뱅크 압축"
|
| 20 |
+
@echo " 3. make run # 웹UI 실행"
|
| 21 |
+
@echo ""
|
| 22 |
+
|
| 23 |
+
setup: ## 개발 환경 초기 설정
|
| 24 |
+
@echo "🔧 개발 환경을 설정합니다..."
|
| 25 |
+
@if ! command -v uv >/dev/null 2>&1; then \
|
| 26 |
+
echo "❌ uv가 설치되지 않았습니다. https://docs.astral.sh/uv/ 에서 설치하세요."; \
|
| 27 |
+
exit 1; \
|
| 28 |
+
fi
|
| 29 |
+
@echo "📦 의존성을 설치합니다..."
|
| 30 |
+
$(UV) sync
|
| 31 |
+
@echo "📁 필요한 디렉토리를 생성합니다..."
|
| 32 |
+
@mkdir -p voice
|
| 33 |
+
@echo "✅ 개발 환경 설정 완료!"
|
| 34 |
+
|
| 35 |
+
install: setup ## setup의 별칭
|
| 36 |
+
|
| 37 |
+
check-deps: ## 의존성 및 환경 확인
|
| 38 |
+
@echo "🔍 환경을 확인합니다..."
|
| 39 |
+
@echo "UV 버전: $$($(UV) --version 2>/dev/null || echo '❌ uv 없음')"
|
| 40 |
+
@echo "Python 버전: $$($(PYTHON) --version 2>/dev/null || echo '❌ Python 없음')"
|
| 41 |
+
@if [ -f "$(COMPRESSED_FILE)" ]; then \
|
| 42 |
+
echo "✅ 압축된 보이스뱅크: $(COMPRESSED_FILE)"; \
|
| 43 |
+
$(PYTHON) -c "import h5py; f=h5py.File('$(COMPRESSED_FILE)', 'r'); print(f'📊 메타데이터: {dict(f[\"metadata\"].attrs)}')"; \
|
| 44 |
+
else \
|
| 45 |
+
echo "❌ 압축된 보이스뱅크가 없음: $(COMPRESSED_FILE)"; \
|
| 46 |
+
fi
|
| 47 |
+
@if [ -d "$(VOICEBANK_DIR)" ]; then \
|
| 48 |
+
echo "✅ 원본 보이스뱅크: $(VOICEBANK_DIR) ($$(find "$(VOICEBANK_DIR)" -name "*.wav" | wc -l)개 WAV 파일)"; \
|
| 49 |
+
else \
|
| 50 |
+
echo "❌ 원본 보이스뱅크가 없음: $(VOICEBANK_DIR)"; \
|
| 51 |
+
fi
|
| 52 |
+
|
| 53 |
+
compress: ## 보이스뱅크를 HDF5 형태로 압축
|
| 54 |
+
@echo "🗜️ 보이스뱅크를 압축합니다..."
|
| 55 |
+
@if [ ! -d "$(VOICEBANK_DIR)" ]; then \
|
| 56 |
+
echo "❌ 원본 보이스뱅크를 찾을 수 없습니다: $(VOICEBANK_DIR)"; \
|
| 57 |
+
echo "📋 해결 방법:"; \
|
| 58 |
+
echo " 1. hanseol CVC 보이스뱅크를 $(VOICEBANK_DIR) 에 배치"; \
|
| 59 |
+
echo " 2. 또는 다른 보이스뱅크를 사용하려면:"; \
|
| 60 |
+
echo " make compress VOICEBANK_DIR=your/voicebank/path"; \
|
| 61 |
+
exit 1; \
|
| 62 |
+
fi
|
| 63 |
+
@echo "📁 원본 위치: $(VOICEBANK_DIR)"
|
| 64 |
+
@echo "💾 압축 파일: $(COMPRESSED_FILE)"
|
| 65 |
+
$(PYTHON) -c "\
|
| 66 |
+
from voice_data_converter import convert_voicebank_to_compressed_format; \
|
| 67 |
+
success = convert_voicebank_to_compressed_format('$(VOICEBANK_DIR)', '$(COMPRESSED_FILE)'); \
|
| 68 |
+
print('✅ 압축 완료!' if success else '❌ 압축 실패')"
|
| 69 |
+
@echo "🎉 보이스뱅크 압축이 완료되었습니다!"
|
| 70 |
+
|
| 71 |
+
run: ## 웹UI 실행
|
| 72 |
+
@echo "🚀 UTAU WebUI를 시작합니다..."
|
| 73 |
+
@if [ ! -f "$(COMPRESSED_FILE)" ]; then \
|
| 74 |
+
echo "❌ 압축된 보이스뱅크가 없습니다."; \
|
| 75 |
+
echo "📋 먼저 다음 명령어를 실행하세요: make compress"; \
|
| 76 |
+
exit 1; \
|
| 77 |
+
fi
|
| 78 |
+
@echo "🌐 웹 브라우저에서 http://localhost:$(PORT) 을 열어주세요"
|
| 79 |
+
$(PYTHON) webui.py
|
| 80 |
+
|
| 81 |
+
dev: ## 개발 모드로 실행 (auto-reload)
|
| 82 |
+
@echo "🔧 개발 모드로 UTAU WebUI를 시작합니다..."
|
| 83 |
+
@if [ ! -f "$(COMPRESSED_FILE)" ]; then \
|
| 84 |
+
echo "❌ 압축된 보이스뱅크가 없습니다."; \
|
| 85 |
+
echo "📋 먼저 다음 명령어를 실행하세요: make compress"; \
|
| 86 |
+
exit 1; \
|
| 87 |
+
fi
|
| 88 |
+
@echo "🌐 웹 브라우저에서 http://localhost:$(PORT) 을 열어주세요"
|
| 89 |
+
@echo "🔄 파일 변경 시 자동으로 재시작됩니다"
|
| 90 |
+
$(UV) run --env GRADIO_AUTO_RELOAD=1 python webui.py
|
| 91 |
+
|
| 92 |
+
test: ## 압축된 보이스뱅크 테스트
|
| 93 |
+
@echo "🧪 압축된 보이스뱅크를 테스트합니다..."
|
| 94 |
+
@if [ ! -f "$(COMPRESSED_FILE)" ]; then \
|
| 95 |
+
echo "❌ 압축된 보이스뱅크가 없습니다: $(COMPRESSED_FILE)"; \
|
| 96 |
+
echo "📋 먼저 다음 명령어를 실행하세요: make compress"; \
|
| 97 |
+
exit 1; \
|
| 98 |
+
fi
|
| 99 |
+
$(PYTHON) test_compressed_voicebank.py
|
| 100 |
+
|
| 101 |
+
status: ## 현재 상태 확인
|
| 102 |
+
@echo "📊 UTAU WebUI 상태"
|
| 103 |
+
@echo "===================="
|
| 104 |
+
@make check-deps
|
| 105 |
+
@echo ""
|
| 106 |
+
@if [ -f "$(COMPRESSED_FILE)" ] && [ -d "$(VOICEBANK_DIR)" ]; then \
|
| 107 |
+
echo "🎉 모든 준비가 완료되었습니다! 'make run'으로 시작하세요."; \
|
| 108 |
+
elif [ -f "$(COMPRESSED_FILE)" ]; then \
|
| 109 |
+
echo "✅ 압축된 보이스뱅크가 준비되었습니다! 'make run'으로 시작하세요."; \
|
| 110 |
+
elif [ -d "$(VOICEBANK_DIR)" ]; then \
|
| 111 |
+
echo "⚠️ 보이스뱅크가 있지만 압축되지 않았습니다. 'make compress'를 실행하세요."; \
|
| 112 |
+
else \
|
| 113 |
+
echo "❌ 보이스뱅크가 없습니다. 먼저 보이스뱅크를 준비하고 'make compress'를 실행하세요."; \
|
| 114 |
+
fi
|
| 115 |
+
|
| 116 |
+
clean: ## 임시 파일 및 캐시 정리
|
| 117 |
+
@echo "🧹 임시 파일을 정리합니다..."
|
| 118 |
+
@find . -type f -name "*.pyc" -delete 2>/dev/null || true
|
| 119 |
+
@find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
| 120 |
+
@find . -type f -name "*.tmp" -delete 2>/dev/null || true
|
| 121 |
+
@find . -type f -name ".DS_Store" -delete 2>/dev/null || true
|
| 122 |
+
@rm -rf .pytest_cache 2>/dev/null || true
|
| 123 |
+
@echo "✅ 정리 완료!"
|
| 124 |
+
|
| 125 |
+
clean-all: clean ## 모든 생성된 파일 삭제 (압축 파일 포함)
|
| 126 |
+
@echo "🗑️ 모든 생성된 파일을 삭제합니다..."
|
| 127 |
+
@if [ -f "$(COMPRESSED_FILE)" ]; then \
|
| 128 |
+
echo "⚠️ 압축된 보이스뱅크도 삭제됩니다: $(COMPRESSED_FILE)"; \
|
| 129 |
+
read -p "계속하시겠습니까? (y/N): " confirm; \
|
| 130 |
+
if [ "$$confirm" = "y" ] || [ "$$confirm" = "Y" ]; then \
|
| 131 |
+
rm -f "$(COMPRESSED_FILE)"; \
|
| 132 |
+
echo "✅ 모든 파일이 삭제되었습니다."; \
|
| 133 |
+
else \
|
| 134 |
+
echo "❌ 취소되었습니다."; \
|
| 135 |
+
fi \
|
| 136 |
+
else \
|
| 137 |
+
echo "✅ 정리할 파일이 없습니다."; \
|
| 138 |
+
fi
|
| 139 |
+
|
| 140 |
+
# 개발자를 위한 추가 명령어
|
| 141 |
+
update: ## 의존성 업데이트
|
| 142 |
+
@echo "📦 의존성을 업데이트합니다..."
|
| 143 |
+
$(UV) sync --upgrade
|
| 144 |
+
@echo "✅ 업데이트 완료!"
|
| 145 |
+
|
| 146 |
+
shell: ## 프로젝트 쉘 진입
|
| 147 |
+
@echo "🐚 프로젝트 쉘에 진입합니다..."
|
| 148 |
+
$(UV) shell
|
| 149 |
+
|
| 150 |
+
info: ## 프로젝트 정보 출력
|
| 151 |
+
@echo "📋 UTAU WebUI 프로젝트 정보"
|
| 152 |
+
@echo "============================"
|
| 153 |
+
@echo "프로젝트: UTAU WebUI"
|
| 154 |
+
@echo "설명: 한국어 CVC 보이스뱅크를 사용한 웹 기반 UTAU 음성 합성기"
|
| 155 |
+
@echo "기술 스택: Python, Gradio, HDF5, UV"
|
| 156 |
+
@echo "포트: $(PORT)"
|
| 157 |
+
@echo "보이스뱅크: $(VOICEBANK_DIR)"
|
| 158 |
+
@echo "압축 파일: $(COMPRESSED_FILE)"
|
| 159 |
+
@echo ""
|
| 160 |
+
@echo "📁 디렉토리 구조:"
|
| 161 |
+
@find . -maxdepth 2 -type f -name "*.py" | head -10
|
| 162 |
+
@echo ""
|
| 163 |
+
|
| 164 |
+
# 전체 워크플로우
|
| 165 |
+
all: setup compress run ## 전체 설정 및 실행 (setup → compress → run)
|
| 166 |
+
|
| 167 |
+
# 기본 타겟을 help로 설정
|
| 168 |
+
.DEFAULT_GOAL := help
|
README.md
CHANGED
|
@@ -9,5 +9,215 @@ app_file: app.py
|
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
---
|
|
|
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
---
|
| 12 |
+
# 🎵 UTAU WebUI - 한국어 음성 합성기
|
| 13 |
|
| 14 |
+
피아노롤 기반의 웹 UTAU 음성 합성 시스템입니다. 한국어 CVC 보이스뱅크를 사용하여 자연스러운 한국어 음성을 합성할 수 있습니다.
|
| 15 |
+
|
| 16 |
+
## ✨ 특징
|
| 17 |
+
|
| 18 |
+
- 🎹 **직관적인 피아노롤 인터페이스**: 웹 브라우저에서 바로 사용 가능한 피아노롤 편집기
|
| 19 |
+
- 🇰🇷 **한국어 음성 합성**: hanseol CVC 보이스뱅크를 사용한 고품질 한국어 음성 합성
|
| 20 |
+
- 🗜️ **HDF5 압축 시스템**: 47.7MB → 33.9MB (29% 압축), 46개 파일 → 1개 파일로 최적화
|
| 21 |
+
- 🚀 **자동화된 워크플로우**: Makefile을 통한 원클릭 설정, 압축, 실행
|
| 22 |
+
- 🎵 **실시간 편집**: 노트 추가, 삭제, 가사 입력이 실시간으로 가능
|
| 23 |
+
- 🔊 **웨이브폼 시각화**: 합성된 음성의 웨이브폼을 피아노롤에서 바로 확인
|
| 24 |
+
- 🎤 **CVC 음소 시스템**: 585개의 한국어 CVC 음소로 자연스러운 발음 구현
|
| 25 |
+
- ☁️ **클라우드 최적화**: Gradio 및 Hugging Face Spaces 환경에 최적화
|
| 26 |
+
|
| 27 |
+
## 🎤 보이스뱅크 정보
|
| 28 |
+
|
| 29 |
+
- **보이스뱅크**: hanseol CVC (HDF5 압축)
|
| 30 |
+
- **CV (Character Voice)**: KUNGOM
|
| 31 |
+
- **UTAU**: KITANE 백한설
|
| 32 |
+
- **음소 수**: 585개 CVC 음소
|
| 33 |
+
- **언어**: 한국어
|
| 34 |
+
- **압축율**: 29% (원본 47.7MB → 압축 33.9MB)
|
| 35 |
+
|
| 36 |
+
## 🚀 빠른 시작
|
| 37 |
+
|
| 38 |
+
### 필요 조건
|
| 39 |
+
|
| 40 |
+
- Python 3.12+
|
| 41 |
+
- [uv](https://docs.astral.sh/uv/) (Python 패키지 관리자)
|
| 42 |
+
- GNU Make (자동화 스크립트용)
|
| 43 |
+
|
| 44 |
+
### uv 설치
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
# macOS/Linux
|
| 48 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 49 |
+
|
| 50 |
+
# Windows
|
| 51 |
+
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
### 🎯 한 번에 설정하고 실행하기
|
| 55 |
+
|
| 56 |
+
```bash
|
| 57 |
+
# 저장소 클론
|
| 58 |
+
git clone <repository-url>
|
| 59 |
+
cd utau-webui
|
| 60 |
+
|
| 61 |
+
# 모든 설정과 실행을 한 번에
|
| 62 |
+
make all
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
### 📋 단계별 실행
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# 1. 개발 환경 설정
|
| 69 |
+
make setup
|
| 70 |
+
|
| 71 |
+
# 2. 보이스뱅크 압축 (최초 1회만)
|
| 72 |
+
make compress
|
| 73 |
+
|
| 74 |
+
# 3. 웹UI 실행
|
| 75 |
+
make run
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
### 🔍 현재 상태 확인
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
# 프로젝트 상태 확인
|
| 82 |
+
make status
|
| 83 |
+
|
| 84 |
+
# 의존성 및 환경 확인
|
| 85 |
+
make check-deps
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
## 📋 Makefile 명령어
|
| 89 |
+
|
| 90 |
+
| 명령어 | 설명 |
|
| 91 |
+
|--------|------|
|
| 92 |
+
| `make help` | 사용 가능한 모든 명령어 표시 |
|
| 93 |
+
| `make setup` | 개발 환경 초기 설정 (의존성 설치) |
|
| 94 |
+
| `make compress` | 보이스뱅크를 HDF5 형태로 압축 |
|
| 95 |
+
| `make run` | 웹UI 실행 |
|
| 96 |
+
| `make dev` | 개발 모드로 실행 (auto-reload) |
|
| 97 |
+
| `make test` | 압축된 보이스뱅크 테스트 |
|
| 98 |
+
| `make status` | 현재 프로젝트 상태 확인 |
|
| 99 |
+
| `make clean` | 임시 파일 및 캐시 정리 |
|
| 100 |
+
| `make all` | 전체 설정 및 실행 (setup → compress → run) |
|
| 101 |
+
|
| 102 |
+
## 🎼 사용법
|
| 103 |
+
|
| 104 |
+
1. **노트 추가**: 피아노롤에서 원하는 위치를 클릭하여 노트 추가
|
| 105 |
+
2. **가사 입력**: 노트를 더블클릭하여 한국어 가사 입력
|
| 106 |
+
3. **노트 편집**: 드래그하여 노트 길이 조정, 위아래로 드래그하여 음높이 조정
|
| 107 |
+
4. **음성 합성**: "🎵 음성 합성" 버튼 클릭하여 음성 생성
|
| 108 |
+
5. **재생**: 생성된 음성을 바로 들어보거나 다운로드
|
| 109 |
+
|
| 110 |
+
### 지원하는 한국어 음소
|
| 111 |
+
|
| 112 |
+
- **기본 모음**: 아, 이, 우, 에, 오, 으, 어
|
| 113 |
+
- **자음+모음 조합**: 바, 다, 가, 하, 자, 카, 라, 마, 나, 파, 사, 타 등
|
| 114 |
+
- **복합 모음**: 야, 예, 여, 요, 유, 의, 와, 웨, 위, 워
|
| 115 |
+
- **도레미 음계**: 도, 레, 미, 파, 솔, 라, 시
|
| 116 |
+
|
| 117 |
+
## 🗜️ HDF5 압축 시스템
|
| 118 |
+
|
| 119 |
+
### 장점
|
| 120 |
+
- **파일 관리 최적화**: 46개 WAV 파일 → 1개 HDF5 파일
|
| 121 |
+
- **용량 최적화**: 29% 압축 효율 (47.7MB → 33.9MB)
|
| 122 |
+
- **성능 향상**: 더 빠른 로딩 및 배포
|
| 123 |
+
- **클라우드 친화적**: Hugging Face Spaces 등 클라우드 환경에 최적화
|
| 124 |
+
|
| 125 |
+
### 압축 과정
|
| 126 |
+
```bash
|
| 127 |
+
# 자동 압축
|
| 128 |
+
make compress
|
| 129 |
+
|
| 130 |
+
# 수동 압축
|
| 131 |
+
uv run python -c "from voice_data_converter import convert_voicebank_to_compressed_format; convert_voicebank_to_compressed_format('voice/hanseol CVC')"
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
## 🛠️ 기술 스택
|
| 135 |
+
|
| 136 |
+
- **Frontend**: Gradio + Custom PianoRoll Component
|
| 137 |
+
- **Backend**: Python
|
| 138 |
+
- **음성 합성**: UTAU Engine + Straycat Resampler
|
| 139 |
+
- **데이터 압축**: HDF5 (with gzip compression)
|
| 140 |
+
- **오디오 처리**: SoundFile, NumPy
|
| 141 |
+
- **패키지 관리**: uv
|
| 142 |
+
- **자동화**: GNU Make
|
| 143 |
+
|
| 144 |
+
## 🔧 개발하기
|
| 145 |
+
|
| 146 |
+
### 개발 환경 설정
|
| 147 |
+
```bash
|
| 148 |
+
# 전체 개발 환경 설정
|
| 149 |
+
make setup
|
| 150 |
+
|
| 151 |
+
# 개발 모드로 실행 (파일 변경 시 자동 재시작)
|
| 152 |
+
make dev
|
| 153 |
+
|
| 154 |
+
# 쉘 진입
|
| 155 |
+
make shell
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
### 의존성 관리
|
| 159 |
+
```bash
|
| 160 |
+
# 의존성 추가
|
| 161 |
+
uv add <package-name>
|
| 162 |
+
|
| 163 |
+
# 개발 의존성 추가
|
| 164 |
+
uv add --dev <package-name>
|
| 165 |
+
|
| 166 |
+
# 의존성 업데이트
|
| 167 |
+
make update
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
### 프로젝트 정보
|
| 171 |
+
```bash
|
| 172 |
+
# 프로젝트 정보 확인
|
| 173 |
+
make info
|
| 174 |
+
|
| 175 |
+
# 의존성 트리 확인
|
| 176 |
+
uv tree
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
## 🚨 문제 해결
|
| 180 |
+
|
| 181 |
+
### 압축된 보이스뱅크가 없는 경우
|
| 182 |
+
```bash
|
| 183 |
+
# 상태 확인
|
| 184 |
+
make status
|
| 185 |
+
|
| 186 |
+
# 보이스뱅크 압축
|
| 187 |
+
make compress
|
| 188 |
+
```
|
| 189 |
+
|
| 190 |
+
### 원본 보이스뱅크가 없는 경우
|
| 191 |
+
1. hanseol CVC 보이스뱅크를 `voice/hanseol CVC` 디렉토리에 배치
|
| 192 |
+
2. `make compress` 실행
|
| 193 |
+
|
| 194 |
+
### 환경 문제
|
| 195 |
+
```bash
|
| 196 |
+
# 환경 확인
|
| 197 |
+
make check-deps
|
| 198 |
+
|
| 199 |
+
# 의존성 재설치
|
| 200 |
+
make setup
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
## 📝 저작권 및 라이선스
|
| 204 |
+
|
| 205 |
+
### UTAU WebUI
|
| 206 |
+
본 프로젝트는 오픈소스 소프트웨어입니다.
|
| 207 |
+
|
| 208 |
+
### Straycat Resampler
|
| 209 |
+
본 프로젝트에서 사용하는 UTAU 리샘플러는 [straycat](https://github.com/UtaUtaUtau/straycat)을 기반으로 합니다.
|
| 210 |
+
|
| 211 |
+
**원본 저장소**: https://github.com/UtaUtaUtau/straycat
|
| 212 |
+
**라이선스**: MIT License
|
| 213 |
+
**저작권**: Copyright (c) UtaUtaUtau
|
| 214 |
+
|
| 215 |
+
> Yet another WORLD-based UTAU resampler.
|
| 216 |
+
|
| 217 |
+
MIT 라이선스에 따라 본 프로젝트에 포함되었으며, 원본 저작권 표시를 유지합니다.
|
| 218 |
+
|
| 219 |
+
### hanseol CVC 보이스뱅크
|
| 220 |
+
- **CV**: KUNGOM
|
| 221 |
+
- **UTAU**: KITANE 백한설
|
| 222 |
+
|
| 223 |
+
해당 보이스뱅크의 사용 권한에 대해서는 원 저작권자의 이용 약관을 따릅니다.
|
app.py
ADDED
|
@@ -0,0 +1,650 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import gradio_pianoroll as grp
|
| 3 |
+
import tempfile
|
| 4 |
+
import os
|
| 5 |
+
import numpy as np
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from straycat import Resampler
|
| 9 |
+
import logging
|
| 10 |
+
import json
|
| 11 |
+
import base64
|
| 12 |
+
import io
|
| 13 |
+
import wave
|
| 14 |
+
from compressed_utau_engine import CompressedUTAUEngine
|
| 15 |
+
import os
|
| 16 |
+
|
| 17 |
+
# 로깅 설정
|
| 18 |
+
logging.basicConfig(level=logging.INFO)
|
| 19 |
+
|
| 20 |
+
# 압축된 보이스뱅크만 사용
|
| 21 |
+
utau_engine = None
|
| 22 |
+
USE_UTAU = False
|
| 23 |
+
|
| 24 |
+
# 압축된 hanseol CVC 보이스뱅크 경로
|
| 25 |
+
compressed_hanseol_path = "voice/hanseol_CVC_compressed.h5"
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
# 압축된 보이스뱅크 로드 (필수)
|
| 30 |
+
if os.path.exists(compressed_hanseol_path):
|
| 31 |
+
try:
|
| 32 |
+
utau_engine = CompressedUTAUEngine(compressed_hanseol_path)
|
| 33 |
+
USE_UTAU = True
|
| 34 |
+
available_phonemes = utau_engine.get_available_phonemes()
|
| 35 |
+
compression_info = utau_engine.get_compression_info()
|
| 36 |
+
logger.info(f"✅ 압축된 hanseol CVC 보이스뱅크 로드 완료: {len(available_phonemes)}개 음소")
|
| 37 |
+
logger.info(f"📊 압축율: {compression_info.get('compression_ratio', 0):.1f}%")
|
| 38 |
+
logger.info(f"💾 압축된 파일 크기: {compression_info.get('compressed_size_bytes', 0) / (1024*1024):.1f} MB")
|
| 39 |
+
except Exception as e:
|
| 40 |
+
logger.error(f"❌ 압축된 보이스뱅크 로드 실패: {e}")
|
| 41 |
+
print(f"\n{'='*60}")
|
| 42 |
+
print("🚨 압축된 보이스뱅크 로드 실패!")
|
| 43 |
+
print(f"파일 경로: {compressed_hanseol_path}")
|
| 44 |
+
print(f"오류: {e}")
|
| 45 |
+
print("\n📋 해결 방법:")
|
| 46 |
+
print("1. 다음 명령어로 보이스뱅크를 압축하세요:")
|
| 47 |
+
print(" make compress")
|
| 48 |
+
print("2. 또는 수동으로 실행:")
|
| 49 |
+
print(" uv run python voice_data_converter.py")
|
| 50 |
+
print(f"{'='*60}\n")
|
| 51 |
+
USE_UTAU = False
|
| 52 |
+
else:
|
| 53 |
+
logger.error(f"❌ 압축된 보이스뱅크 파일을 찾을 수 없음: {compressed_hanseol_path}")
|
| 54 |
+
print(f"\n{'='*60}")
|
| 55 |
+
print("🚨 압축된 보이스뱅크 파일이 없습니다!")
|
| 56 |
+
print(f"예상 위치: {compressed_hanseol_path}")
|
| 57 |
+
print("\n📋 해결 방법:")
|
| 58 |
+
print("1. 원본 보이스뱅크가 있다면 압축하세요:")
|
| 59 |
+
print(" make compress")
|
| 60 |
+
print("2. 또는 수동으로 실행:")
|
| 61 |
+
print(" uv run python -c \"from voice_data_converter import convert_voicebank_to_compressed_format; convert_voicebank_to_compressed_format('voice/hanseol CVC')\"")
|
| 62 |
+
print("\n3. 보이스뱅크 다운로드가 필요한 경우:")
|
| 63 |
+
print(" - hanseol CVC 보이스뱅크를 voice/ 디렉토리에 배치")
|
| 64 |
+
print(" - 그 후 위의 압축 명령어 실행")
|
| 65 |
+
print(f"{'='*60}\n")
|
| 66 |
+
USE_UTAU = False
|
| 67 |
+
|
| 68 |
+
# 압축된 보이스뱅크가 없으면 경고 메시지와 함께 제한된 기능만 제공
|
| 69 |
+
if not USE_UTAU:
|
| 70 |
+
available_phonemes = []
|
| 71 |
+
logger.warning("⚠️ 압축된 보이스뱅크 없이 제한된 모드로 실행됩니다.")
|
| 72 |
+
logger.warning("⚠️ 음성 합성 기능을 사용하려면 먼저 보이스뱅크를 압축하세요.")
|
| 73 |
+
|
| 74 |
+
def audio_to_base64_wav(audio_data, sample_rate):
|
| 75 |
+
"""Convert audio data to base64 encoded WAV string"""
|
| 76 |
+
if audio_data is None or len(audio_data) == 0:
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
# Normalize audio data to [-1, 1] range
|
| 80 |
+
if np.max(np.abs(audio_data)) > 0:
|
| 81 |
+
audio_data = audio_data / np.max(np.abs(audio_data))
|
| 82 |
+
|
| 83 |
+
# Convert to 16-bit PCM
|
| 84 |
+
audio_16bit = (audio_data * 32767).astype(np.int16)
|
| 85 |
+
|
| 86 |
+
# Create WAV file in memory
|
| 87 |
+
buffer = io.BytesIO()
|
| 88 |
+
with wave.open(buffer, 'wb') as wav_file:
|
| 89 |
+
wav_file.setnchannels(1) # Mono
|
| 90 |
+
wav_file.setsampwidth(2) # 16-bit
|
| 91 |
+
wav_file.setframerate(sample_rate)
|
| 92 |
+
wav_file.writeframes(audio_16bit.tobytes())
|
| 93 |
+
|
| 94 |
+
# base64 encoding
|
| 95 |
+
buffer.seek(0)
|
| 96 |
+
wav_data = buffer.read()
|
| 97 |
+
base64_data = base64.b64encode(wav_data).decode('utf-8')
|
| 98 |
+
|
| 99 |
+
return f"data:audio/wav;base64,{base64_data}"
|
| 100 |
+
|
| 101 |
+
def calculate_waveform_data(audio_data, pixels_per_beat, tempo, target_width=1000):
|
| 102 |
+
"""Calculate waveform visualization data from audio data"""
|
| 103 |
+
if audio_data is None or len(audio_data) == 0:
|
| 104 |
+
return None
|
| 105 |
+
|
| 106 |
+
sample_rate = 44100
|
| 107 |
+
|
| 108 |
+
# Calculate total audio duration (seconds)
|
| 109 |
+
audio_duration = len(audio_data) / sample_rate
|
| 110 |
+
|
| 111 |
+
# Calculate total pixel length (based on tempo and pixels per beat)
|
| 112 |
+
total_pixels = (tempo / 60) * pixels_per_beat * audio_duration
|
| 113 |
+
|
| 114 |
+
# Calculate samples per pixel
|
| 115 |
+
samples_per_pixel = len(audio_data) / total_pixels
|
| 116 |
+
|
| 117 |
+
waveform_points = []
|
| 118 |
+
|
| 119 |
+
# Calculate min/max values for each pixel
|
| 120 |
+
for pixel in range(int(total_pixels)):
|
| 121 |
+
start_sample = int(pixel * samples_per_pixel)
|
| 122 |
+
end_sample = int((pixel + 1) * samples_per_pixel)
|
| 123 |
+
end_sample = min(end_sample, len(audio_data))
|
| 124 |
+
|
| 125 |
+
if start_sample >= len(audio_data):
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
if start_sample < end_sample:
|
| 129 |
+
# Audio data for the pixel range
|
| 130 |
+
pixel_data = audio_data[start_sample:end_sample]
|
| 131 |
+
|
| 132 |
+
# Calculate min, max values
|
| 133 |
+
min_val = float(np.min(pixel_data))
|
| 134 |
+
max_val = float(np.max(pixel_data))
|
| 135 |
+
|
| 136 |
+
# Time information (pixel position)
|
| 137 |
+
time_position = pixel
|
| 138 |
+
|
| 139 |
+
waveform_points.append({
|
| 140 |
+
'x': time_position,
|
| 141 |
+
'min': min_val,
|
| 142 |
+
'max': max_val
|
| 143 |
+
})
|
| 144 |
+
|
| 145 |
+
return waveform_points
|
| 146 |
+
|
| 147 |
+
def add_waveform_to_pianoroll(pianoroll_data, audio_data, sample_rate, tempo):
|
| 148 |
+
"""Add waveform data to pianoroll for visualization - demo/app.py와 동일한 방식"""
|
| 149 |
+
# demo/app.py와 동일한 방식으로 완전히 복사
|
| 150 |
+
updated_pianoroll = pianoroll_data.copy() if pianoroll_data else {}
|
| 151 |
+
|
| 152 |
+
# Add backend audio data
|
| 153 |
+
audio_base64 = audio_to_base64_wav(audio_data, sample_rate)
|
| 154 |
+
updated_pianoroll['audio_data'] = audio_base64
|
| 155 |
+
updated_pianoroll['use_backend_audio'] = True
|
| 156 |
+
|
| 157 |
+
# Get tempo and pixels per beat from pianoroll data
|
| 158 |
+
pixels_per_beat = updated_pianoroll.get('pixelsPerBeat', 80)
|
| 159 |
+
|
| 160 |
+
# Calculate waveform data
|
| 161 |
+
waveform_data = calculate_waveform_data(audio_data, pixels_per_beat, tempo)
|
| 162 |
+
|
| 163 |
+
# demo/app.py와 동일한 curve_data 처리 방식
|
| 164 |
+
curve_data = {}
|
| 165 |
+
|
| 166 |
+
# Add waveform data to curve_data
|
| 167 |
+
if waveform_data:
|
| 168 |
+
curve_data['waveform_data'] = waveform_data
|
| 169 |
+
print(f"Waveform data created: {len(waveform_data)} points")
|
| 170 |
+
|
| 171 |
+
# Set curve data for piano roll (demo/app.py와 동일)
|
| 172 |
+
if curve_data:
|
| 173 |
+
updated_pianoroll['curve_data'] = curve_data
|
| 174 |
+
|
| 175 |
+
# demo/app.py와 같은 방식으로 segment_data 추가
|
| 176 |
+
if 'notes' in updated_pianoroll and updated_pianoroll['notes']:
|
| 177 |
+
segment_data = []
|
| 178 |
+
|
| 179 |
+
for i, note in enumerate(updated_pianoroll['notes']):
|
| 180 |
+
start_seconds = note.get('startSeconds', 0)
|
| 181 |
+
duration_seconds = note.get('durationSeconds', 0.5)
|
| 182 |
+
|
| 183 |
+
segment_data.append({
|
| 184 |
+
'start': start_seconds,
|
| 185 |
+
'end': start_seconds + duration_seconds,
|
| 186 |
+
'type': 'note',
|
| 187 |
+
'value': note.get('lyric', f"Note_{i+1}"),
|
| 188 |
+
'confidence': 0.95
|
| 189 |
+
})
|
| 190 |
+
|
| 191 |
+
updated_pianoroll['segment_data'] = segment_data
|
| 192 |
+
|
| 193 |
+
# 상세한 디버깅 로그 (demo/app.py와 동일한 형식)
|
| 194 |
+
print(f"🔊 [add_waveform_to_pianoroll] Setting backend audio data:")
|
| 195 |
+
print(f" - audio_data length: {len(audio_base64) if audio_base64 else 0}")
|
| 196 |
+
print(f" - use_backend_audio: {updated_pianoroll['use_backend_audio']}")
|
| 197 |
+
print(f" - waveform points: {len(waveform_data) if waveform_data else 0}")
|
| 198 |
+
print(f" - Updated pianoroll keys: {list(updated_pianoroll.keys())}")
|
| 199 |
+
|
| 200 |
+
return updated_pianoroll
|
| 201 |
+
|
| 202 |
+
def create_test_voice_sample():
|
| 203 |
+
"""테스트용 간단한 음성 샘플 생성"""
|
| 204 |
+
voice_dir = Path("voice")
|
| 205 |
+
voice_dir.mkdir(exist_ok=True)
|
| 206 |
+
|
| 207 |
+
sample_path = voice_dir / "test_voice.wav"
|
| 208 |
+
|
| 209 |
+
if not sample_path.exists():
|
| 210 |
+
# 간단한 사인파 기반 음성 샘플 생성 (A4 = 440Hz)
|
| 211 |
+
duration = 1.0 # 1초
|
| 212 |
+
sample_rate = 44100
|
| 213 |
+
t = np.linspace(0, duration, int(sample_rate * duration), False)
|
| 214 |
+
|
| 215 |
+
# 기본 주파수 (A4)
|
| 216 |
+
fundamental = 440.0
|
| 217 |
+
|
| 218 |
+
# 하모닉을 추가한 더 자연스러운 소리
|
| 219 |
+
signal = (np.sin(2 * np.pi * fundamental * t) * 0.5 +
|
| 220 |
+
np.sin(2 * np.pi * fundamental * 2 * t) * 0.2 +
|
| 221 |
+
np.sin(2 * np.pi * fundamental * 3 * t) * 0.1 +
|
| 222 |
+
np.sin(2 * np.pi * fundamental * 4 * t) * 0.05)
|
| 223 |
+
|
| 224 |
+
# ADSR 엔벨로프 적용
|
| 225 |
+
attack = int(0.05 * sample_rate)
|
| 226 |
+
decay = int(0.1 * sample_rate)
|
| 227 |
+
sustain_level = 0.7
|
| 228 |
+
release = int(0.2 * sample_rate)
|
| 229 |
+
sustain = len(signal) - attack - decay - release
|
| 230 |
+
|
| 231 |
+
envelope = np.ones_like(signal)
|
| 232 |
+
envelope[:attack] = np.linspace(0, 1, attack)
|
| 233 |
+
envelope[attack:attack+decay] = np.linspace(1, sustain_level, decay)
|
| 234 |
+
envelope[attack+decay:attack+decay+sustain] = sustain_level
|
| 235 |
+
envelope[-release:] = np.linspace(sustain_level, 0, release)
|
| 236 |
+
|
| 237 |
+
signal = signal * envelope
|
| 238 |
+
|
| 239 |
+
# 포먼트 필터 추가 (간단한 음성 특성)
|
| 240 |
+
from scipy import signal as scipy_signal
|
| 241 |
+
|
| 242 |
+
# 음성 특성을 모방한 간단한 필터
|
| 243 |
+
b, a = scipy_signal.butter(2, [300, 3000], btype='band', fs=sample_rate)
|
| 244 |
+
signal = scipy_signal.filtfilt(b, a, signal)
|
| 245 |
+
|
| 246 |
+
# 노이즈 추가로 더 자연스럽게
|
| 247 |
+
noise = np.random.normal(0, 0.02, len(signal))
|
| 248 |
+
signal = signal + noise
|
| 249 |
+
|
| 250 |
+
# 정규화
|
| 251 |
+
signal = signal / np.max(np.abs(signal)) * 0.8
|
| 252 |
+
|
| 253 |
+
sf.write(sample_path, signal, sample_rate)
|
| 254 |
+
logging.info(f"테스트 음성 샘플 생성: {sample_path}")
|
| 255 |
+
|
| 256 |
+
return sample_path
|
| 257 |
+
|
| 258 |
+
def midi_to_note_name(midi_note):
|
| 259 |
+
"""MIDI 노트 번호를 노트 이름으로 변환"""
|
| 260 |
+
notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
| 261 |
+
octave = (midi_note // 12) - 1
|
| 262 |
+
note = notes[midi_note % 12]
|
| 263 |
+
return f"{note}{octave}"
|
| 264 |
+
|
| 265 |
+
def synthesize_notes(pianoroll_data, use_utau_engine):
|
| 266 |
+
"""피아노롤 데이터를 받아서 음성을 합성하고 웨이브폼을 피아노롤에 업데이트"""
|
| 267 |
+
if not pianoroll_data or not pianoroll_data.get('notes'):
|
| 268 |
+
return pianoroll_data, None, "노트가 없습니다. 피아노롤에 노트를 추가하세요."
|
| 269 |
+
|
| 270 |
+
try:
|
| 271 |
+
notes = pianoroll_data['notes']
|
| 272 |
+
|
| 273 |
+
# 기본값 설정
|
| 274 |
+
velocity_setting = 100
|
| 275 |
+
volume_setting = 100
|
| 276 |
+
use_vibrato = False
|
| 277 |
+
vibrato_depth = 20
|
| 278 |
+
|
| 279 |
+
# 피아노롤에서 tempo 가져오기 (기본값: 120)
|
| 280 |
+
tempo = pianoroll_data.get('tempo', 120)
|
| 281 |
+
logging.info(f"합성할 노트 수: {len(notes)}, 템포: {tempo} BPM (피아노롤에서 가져옴)")
|
| 282 |
+
|
| 283 |
+
# 피아노롤 노트에서 가사 추출
|
| 284 |
+
lyrics = []
|
| 285 |
+
for note in notes:
|
| 286 |
+
lyric = note.get('lyric', '').strip()
|
| 287 |
+
if not lyric:
|
| 288 |
+
lyric = "あ" # 가사가 없으면 기본 일본어 음소
|
| 289 |
+
lyrics.append(lyric)
|
| 290 |
+
|
| 291 |
+
logging.info(f"추출된 가사: {lyrics}")
|
| 292 |
+
|
| 293 |
+
# UTAU 엔진 사용 여부 결정
|
| 294 |
+
use_utau = use_utau_engine.startswith("UTAU 엔진")
|
| 295 |
+
if use_utau and USE_UTAU and utau_engine:
|
| 296 |
+
# UTAU 엔진으로 합성
|
| 297 |
+
audio_file, status = utau_engine.synthesize_sequence(
|
| 298 |
+
notes=notes,
|
| 299 |
+
lyrics=lyrics,
|
| 300 |
+
tempo=tempo, # 피아노롤의 tempo 사용
|
| 301 |
+
volume=volume_setting
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
if audio_file:
|
| 305 |
+
# 합성된 오디오 로드
|
| 306 |
+
audio_data, sample_rate = sf.read(audio_file)
|
| 307 |
+
|
| 308 |
+
# 피아노롤에 웨이브폼 데이터 추가
|
| 309 |
+
updated_pianoroll = add_waveform_to_pianoroll(
|
| 310 |
+
pianoroll_data, audio_data, sample_rate, tempo # 피아노롤의 tempo 사용
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
return updated_pianoroll, audio_file, status
|
| 314 |
+
else:
|
| 315 |
+
return pianoroll_data, None, status
|
| 316 |
+
|
| 317 |
+
# 기본 엔진으로 합성 (기존 코드)
|
| 318 |
+
# 테스트 음성 샘플 준비
|
| 319 |
+
voice_sample = create_test_voice_sample()
|
| 320 |
+
|
| 321 |
+
# 전체 길이 계산 (가장 늦게 끝나는 노트 기준) - 초 단위로 계산
|
| 322 |
+
max_end_time_seconds = max(note.get('endSeconds', note.get('startSeconds', 0) + note.get('durationSeconds', 0.5)) for note in notes)
|
| 323 |
+
max_end_time = max_end_time_seconds * 1000 # 밀리초로 변환
|
| 324 |
+
sample_rate = 44100
|
| 325 |
+
total_samples = int(max_end_time * sample_rate / 1000) + sample_rate # 여유분 추가
|
| 326 |
+
|
| 327 |
+
# 최종 오디오 버퍼
|
| 328 |
+
final_audio = np.zeros(total_samples)
|
| 329 |
+
|
| 330 |
+
# 각 노트를 개별적으로 합성하고 믹싱
|
| 331 |
+
for i, note in enumerate(notes):
|
| 332 |
+
try:
|
| 333 |
+
pitch = note['pitch']
|
| 334 |
+
start_ms = note.get('startSeconds', 0) * 1000 # 초를 밀리초로 변환
|
| 335 |
+
duration_ms = note.get('durationSeconds', 0.5) * 1000 # 초를 밀리초로 변환
|
| 336 |
+
velocity = note.get('velocity', velocity_setting)
|
| 337 |
+
|
| 338 |
+
note_name = midi_to_note_name(pitch)
|
| 339 |
+
start_seconds = note.get('startSeconds', 0)
|
| 340 |
+
logging.info(f"노트 {i+1}: {note_name} (MIDI {pitch}), 시작: {start_seconds}s ({start_ms}ms), 길이: {duration_ms}ms")
|
| 341 |
+
|
| 342 |
+
# 임시 출력 파일
|
| 343 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
| 344 |
+
temp_output = temp_file.name
|
| 345 |
+
|
| 346 |
+
# 플래그 설정
|
| 347 |
+
flags = ''
|
| 348 |
+
if use_vibrato:
|
| 349 |
+
flags += f'A{int(vibrato_depth)}'
|
| 350 |
+
|
| 351 |
+
try:
|
| 352 |
+
# straycat Resampler로 합성 (length를 노트 길이에 맞게 설정)
|
| 353 |
+
resampler = Resampler(
|
| 354 |
+
in_file=str(voice_sample),
|
| 355 |
+
out_file=temp_output,
|
| 356 |
+
pitch=note_name,
|
| 357 |
+
velocity=velocity,
|
| 358 |
+
length=max(duration_ms, 200), # 최소 200ms 보장
|
| 359 |
+
volume=volume_setting,
|
| 360 |
+
flags=flags,
|
| 361 |
+
offset=0,
|
| 362 |
+
consonant=20, # 약간의 자연스러운 어택
|
| 363 |
+
cutoff=0,
|
| 364 |
+
modulation=10, # 약간의 모듈레이션
|
| 365 |
+
tempo=f'!{int(tempo)}' # 피아노롤의 tempo 사용
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
# 합성된 오디오 로드
|
| 369 |
+
if os.path.exists(temp_output):
|
| 370 |
+
synth_audio, _ = sf.read(temp_output)
|
| 371 |
+
|
| 372 |
+
# 오디오를 올바른 위치에 배치
|
| 373 |
+
start_sample = int(start_ms * sample_rate / 1000)
|
| 374 |
+
end_sample = start_sample + len(synth_audio)
|
| 375 |
+
|
| 376 |
+
if end_sample <= len(final_audio):
|
| 377 |
+
final_audio[start_sample:end_sample] += synth_audio * (velocity / 100)
|
| 378 |
+
else:
|
| 379 |
+
# 버퍼가 부족하면 확장
|
| 380 |
+
new_size = end_sample + sample_rate
|
| 381 |
+
new_final_audio = np.zeros(new_size)
|
| 382 |
+
new_final_audio[:len(final_audio)] = final_audio
|
| 383 |
+
new_final_audio[start_sample:end_sample] += synth_audio * (velocity / 100)
|
| 384 |
+
final_audio = new_final_audio
|
| 385 |
+
|
| 386 |
+
logging.info(f"노트 {i+1} 합성 완료")
|
| 387 |
+
|
| 388 |
+
except Exception as e:
|
| 389 |
+
logging.error(f"노트 {i+1} 합성 실패: {e}")
|
| 390 |
+
continue
|
| 391 |
+
|
| 392 |
+
finally:
|
| 393 |
+
# 임시 파일 정리
|
| 394 |
+
if os.path.exists(temp_output):
|
| 395 |
+
os.unlink(temp_output)
|
| 396 |
+
|
| 397 |
+
except Exception as e:
|
| 398 |
+
logging.error(f"노트 {i+1} 처리 실패: {e}")
|
| 399 |
+
continue
|
| 400 |
+
|
| 401 |
+
# 최종 오디오 정규화 및 마스터링
|
| 402 |
+
if np.max(np.abs(final_audio)) > 0:
|
| 403 |
+
# 컴프레서 효과 (간단한 버전)
|
| 404 |
+
threshold = 0.7
|
| 405 |
+
ratio = 4.0
|
| 406 |
+
|
| 407 |
+
# 피크 검출
|
| 408 |
+
abs_audio = np.abs(final_audio)
|
| 409 |
+
over_threshold = abs_audio > threshold
|
| 410 |
+
|
| 411 |
+
# 컴프레션 적용
|
| 412 |
+
compressed = final_audio.copy()
|
| 413 |
+
compressed[over_threshold] = (
|
| 414 |
+
np.sign(final_audio[over_threshold]) *
|
| 415 |
+
(threshold + (abs_audio[over_threshold] - threshold) / ratio)
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
final_audio = compressed / np.max(np.abs(compressed)) * 0.85
|
| 419 |
+
|
| 420 |
+
# 최종 파일 저장
|
| 421 |
+
output_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 422 |
+
sf.write(output_file.name, final_audio, sample_rate)
|
| 423 |
+
output_file.close()
|
| 424 |
+
|
| 425 |
+
# 피아노롤에 웨이브폼 데이터 추가
|
| 426 |
+
updated_pianoroll = add_waveform_to_pianoroll(
|
| 427 |
+
pianoroll_data, final_audio, sample_rate, tempo # 피아노롤의 tempo 사용
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
duration_sec = len(final_audio) / sample_rate
|
| 431 |
+
logging.info(f"합성 완료: {len(notes)}개 노트, 총 길이: {duration_sec:.2f}초, 템포: {tempo} BPM")
|
| 432 |
+
return updated_pianoroll, output_file.name, f"✅ 합성 완료: {len(notes)}개 노트, {duration_sec:.1f}초, 템포: {tempo} BPM"
|
| 433 |
+
|
| 434 |
+
except Exception as e:
|
| 435 |
+
error_msg = f"❌ 합성 중 오류 발생: {str(e)}"
|
| 436 |
+
logging.error(error_msg)
|
| 437 |
+
return pianoroll_data, None, error_msg
|
| 438 |
+
|
| 439 |
+
def create_example_melody():
|
| 440 |
+
"""예제 멜로디 생성 - demo/app.py와 동일한 방식"""
|
| 441 |
+
# demo/app.py와 동일한 노트 구조 사용 (id 추가)
|
| 442 |
+
notes = [
|
| 443 |
+
{
|
| 444 |
+
"id": "note_0",
|
| 445 |
+
"start": 0,
|
| 446 |
+
"duration": 160,
|
| 447 |
+
"pitch": 60, # C4
|
| 448 |
+
"velocity": 100,
|
| 449 |
+
"lyric": "도",
|
| 450 |
+
"startSeconds": 0.0,
|
| 451 |
+
"durationSeconds": 0.5,
|
| 452 |
+
"endSeconds": 0.5
|
| 453 |
+
},
|
| 454 |
+
{
|
| 455 |
+
"id": "note_1",
|
| 456 |
+
"start": 160,
|
| 457 |
+
"duration": 160,
|
| 458 |
+
"pitch": 62, # D4
|
| 459 |
+
"velocity": 100,
|
| 460 |
+
"lyric": "레",
|
| 461 |
+
"startSeconds": 0.5,
|
| 462 |
+
"durationSeconds": 0.5,
|
| 463 |
+
"endSeconds": 1.0
|
| 464 |
+
},
|
| 465 |
+
{
|
| 466 |
+
"id": "note_2",
|
| 467 |
+
"start": 320,
|
| 468 |
+
"duration": 160,
|
| 469 |
+
"pitch": 64, # E4
|
| 470 |
+
"velocity": 100,
|
| 471 |
+
"lyric": "미",
|
| 472 |
+
"startSeconds": 1.0,
|
| 473 |
+
"durationSeconds": 0.5,
|
| 474 |
+
"endSeconds": 1.5
|
| 475 |
+
},
|
| 476 |
+
{
|
| 477 |
+
"id": "note_3",
|
| 478 |
+
"start": 480,
|
| 479 |
+
"duration": 160,
|
| 480 |
+
"pitch": 67, # G4
|
| 481 |
+
"velocity": 100,
|
| 482 |
+
"lyric": "솔",
|
| 483 |
+
"startSeconds": 1.5,
|
| 484 |
+
"durationSeconds": 0.5,
|
| 485 |
+
"endSeconds": 2.0
|
| 486 |
+
}
|
| 487 |
+
]
|
| 488 |
+
|
| 489 |
+
# demo/app.py와 동일한 완전한 초기값 구조
|
| 490 |
+
initial_value = {
|
| 491 |
+
"notes": notes,
|
| 492 |
+
"tempo": 120,
|
| 493 |
+
"timeSignature": {"numerator": 4, "denominator": 4},
|
| 494 |
+
"editMode": "select",
|
| 495 |
+
"snapSetting": "1/4",
|
| 496 |
+
"pixelsPerBeat": 80, # demo/app.py와 동일
|
| 497 |
+
"curve_data": {},
|
| 498 |
+
"use_backend_audio": True # demo/app.py와 동일하게 True
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
print("🎼 예제 멜로디 생성됨")
|
| 502 |
+
return initial_value
|
| 503 |
+
|
| 504 |
+
# Gradio 인터페이스
|
| 505 |
+
with gr.Blocks(title="UTAU WebUI", theme=gr.themes.Soft()) as demo:
|
| 506 |
+
gr.Markdown("# 🎵 UTAU WebUI - Gradio로 구현된 UTAU 음성 합성기")
|
| 507 |
+
gr.Markdown("피아노롤에서 노트를 그리고 '음성 합성' 버튼을 클릭하여 음성을 생성하세요.")
|
| 508 |
+
gr.Markdown("## 🎤 주의 사항")
|
| 509 |
+
gr.Markdown("""
|
| 510 |
+
- 유닛 선택 알고리즘이 CVC 음원에 최적화 되지 않아서 발음 오류가 발생할 수 있습니다.
|
| 511 |
+
- 일부 UI 오류가 있으며 현재 해결 중 입니다.
|
| 512 |
+
- 템포가 적용되�� 않는 문제가 있습니다. 120bpm을 유지하면서 사용해주세요.""")
|
| 513 |
+
|
| 514 |
+
with gr.Row():
|
| 515 |
+
with gr.Column(scale=3):
|
| 516 |
+
# demo/app.py와 동일한 초기값 구조
|
| 517 |
+
initial_pianoroll_value = {
|
| 518 |
+
"notes": [],
|
| 519 |
+
"tempo": 120,
|
| 520 |
+
"timeSignature": {"numerator": 4, "denominator": 4},
|
| 521 |
+
"editMode": "select",
|
| 522 |
+
"snapSetting": "1/4",
|
| 523 |
+
"pixelsPerBeat": 80,
|
| 524 |
+
"curve_data": {},
|
| 525 |
+
"use_backend_audio": True # demo/app.py와 동일하게 True
|
| 526 |
+
}
|
| 527 |
+
|
| 528 |
+
pianoroll = grp.PianoRoll(
|
| 529 |
+
width=1000,
|
| 530 |
+
height=800,
|
| 531 |
+
label="피아노롤 편집기",
|
| 532 |
+
value=initial_pianoroll_value,
|
| 533 |
+
elem_id="piano_roll_utau", # 고유 ID 추가
|
| 534 |
+
use_backend_audio=True # demo/app.py와 동일하게 True로 시작
|
| 535 |
+
)
|
| 536 |
+
|
| 537 |
+
with gr.Row():
|
| 538 |
+
clear_btn = gr.Button("🗑️ 초기화", size="sm")
|
| 539 |
+
example_btn = gr.Button("🎼 예제 멜로디", size="sm", variant="secondary")
|
| 540 |
+
info_text = gr.Markdown("**사용법:** 클릭하여 노트 추가, 드래그하여 길이 조정, 더블클릭하여 가사 입력")
|
| 541 |
+
|
| 542 |
+
with gr.Column(scale=1):
|
| 543 |
+
# 엔진 선택 및 가사 입력
|
| 544 |
+
gr.Markdown("### 🎤 음성 엔진")
|
| 545 |
+
with gr.Group():
|
| 546 |
+
# UTAU 엔진 선택지를 동적으로 생성
|
| 547 |
+
engine_choices = []
|
| 548 |
+
if USE_UTAU and utau_engine:
|
| 549 |
+
engine_choices.append(f"UTAU 엔진 ({'hanseol CVC'})")
|
| 550 |
+
engine_choices.append("기본 엔진")
|
| 551 |
+
|
| 552 |
+
engine_radio = gr.Radio(
|
| 553 |
+
choices=engine_choices,
|
| 554 |
+
value=f"UTAU 엔진 ({'hanseol CVC'})" if USE_UTAU and utau_engine else "기본 엔진",
|
| 555 |
+
label="합성 엔진",
|
| 556 |
+
info="UTAU 엔진은 실제 보이스뱅크 사용"
|
| 557 |
+
)
|
| 558 |
+
|
| 559 |
+
# 가사는 피아노롤 노트에서 직접 입력
|
| 560 |
+
gr.Markdown("**가사 입력**: 피아노롤에서 노트를 더블클릭하여 가사를 입력하세요.")
|
| 561 |
+
|
| 562 |
+
synthesis_btn = gr.Button("🎵 음성 합성", variant="primary", size="lg")
|
| 563 |
+
status_text = gr.Textbox(
|
| 564 |
+
label="합성 상태",
|
| 565 |
+
value="노트를 추가하고 합성 버튼을 클릭하세요.",
|
| 566 |
+
interactive=False,
|
| 567 |
+
lines=2
|
| 568 |
+
)
|
| 569 |
+
|
| 570 |
+
audio_output = gr.Audio(
|
| 571 |
+
label="합성된 음성",
|
| 572 |
+
visible=True
|
| 573 |
+
)
|
| 574 |
+
|
| 575 |
+
gr.Markdown("### 📊 보이스뱅크 정보")
|
| 576 |
+
if USE_UTAU:
|
| 577 |
+
compression_info = utau_engine.get_compression_info()
|
| 578 |
+
gr.Markdown(f"""
|
| 579 |
+
- **보이스뱅크:** hanseol CVC (압축된 HDF5 🗜️)
|
| 580 |
+
- **CV:** KUNGOM
|
| 581 |
+
- **UTAU:** KITANE 백한설
|
| 582 |
+
- **사용 가능한 음소:** {len(utau_engine.get_available_phonemes())}개
|
| 583 |
+
- **압축율:** {compression_info.get('compression_ratio', 0):.1f}%
|
| 584 |
+
- **용량:** {compression_info.get('compressed_size_bytes', 0) / (1024*1024):.1f} MB""")
|
| 585 |
+
else:
|
| 586 |
+
gr.Markdown("""
|
| 587 |
+
- **보이스뱅크:** ❌ 압축된 보이스뱅크 없음
|
| 588 |
+
- **상태:** 제한된 모드로 실행 중
|
| 589 |
+
- **해결책:** `make compress` 명령어로 보이스뱅크를 먼저 압축하세요.""")
|
| 590 |
+
|
| 591 |
+
|
| 592 |
+
# 이벤트 핸들러
|
| 593 |
+
synthesis_btn.click(
|
| 594 |
+
fn=synthesize_notes,
|
| 595 |
+
inputs=[
|
| 596 |
+
pianoroll,
|
| 597 |
+
engine_radio
|
| 598 |
+
],
|
| 599 |
+
outputs=[pianoroll, audio_output, status_text]
|
| 600 |
+
)
|
| 601 |
+
|
| 602 |
+
def clear_pianoroll():
|
| 603 |
+
"""피아노롤 초기화 - demo/app.py와 동일한 방식"""
|
| 604 |
+
initial_data = {
|
| 605 |
+
"notes": [],
|
| 606 |
+
"tempo": 120,
|
| 607 |
+
"timeSignature": {"numerator": 4, "denominator": 4},
|
| 608 |
+
"editMode": "select",
|
| 609 |
+
"snapSetting": "1/4",
|
| 610 |
+
"pixelsPerBeat": 80, # demo/app.py와 동일
|
| 611 |
+
"curve_data": {},
|
| 612 |
+
"use_backend_audio": True # demo/app.py와 동일하게 True 유지
|
| 613 |
+
}
|
| 614 |
+
print("🗑️ 피아노롤 초기화됨")
|
| 615 |
+
return initial_data
|
| 616 |
+
|
| 617 |
+
clear_btn.click(
|
| 618 |
+
fn=clear_pianoroll,
|
| 619 |
+
outputs=[pianoroll]
|
| 620 |
+
)
|
| 621 |
+
|
| 622 |
+
example_btn.click(
|
| 623 |
+
fn=create_example_melody,
|
| 624 |
+
outputs=[pianoroll]
|
| 625 |
+
)
|
| 626 |
+
|
| 627 |
+
# 초기 설정
|
| 628 |
+
create_test_voice_sample()
|
| 629 |
+
|
| 630 |
+
# playhead 동작을 위한 이벤트 핸들러 추가
|
| 631 |
+
def log_play_event(event_data=None):
|
| 632 |
+
print("🔊 Play event triggered:", event_data)
|
| 633 |
+
return f"재생 시작: {event_data if event_data else '재생 중'}"
|
| 634 |
+
|
| 635 |
+
def log_pause_event(event_data=None):
|
| 636 |
+
print("🔊 Pause event triggered:", event_data)
|
| 637 |
+
return f"일시정지: {event_data if event_data else '일시정지됨'}"
|
| 638 |
+
|
| 639 |
+
def log_stop_event(event_data=None):
|
| 640 |
+
print("🔊 Stop event triggered:", event_data)
|
| 641 |
+
return f"정지: {event_data if event_data else '정지됨'}"
|
| 642 |
+
|
| 643 |
+
# playhead 이벤트 핸들러 연결
|
| 644 |
+
pianoroll.play(log_play_event, outputs=status_text)
|
| 645 |
+
pianoroll.pause(log_pause_event, outputs=status_text)
|
| 646 |
+
pianoroll.stop(log_stop_event, outputs=status_text)
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
if __name__ == "__main__":
|
| 650 |
+
demo.launch()
|
compressed_utau_engine.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tempfile
|
| 2 |
+
import numpy as np
|
| 3 |
+
import soundfile as sf
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 6 |
+
import logging
|
| 7 |
+
from straycat import Resampler
|
| 8 |
+
from voice_data_converter import CompressedVoicebankManager, OtoEntry
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
class CompressedUTAUEngine:
|
| 13 |
+
"""압축된 HDF5 보이스뱅크를 사용하는 UTAU 호환 음성 합성 엔진"""
|
| 14 |
+
|
| 15 |
+
def __init__(self, compressed_voicebank_path: Union[str, Path]):
|
| 16 |
+
self.voicebank = CompressedVoicebankManager(compressed_voicebank_path)
|
| 17 |
+
self.default_phoneme = "あ" # 기본 음소
|
| 18 |
+
logger.info(f"압축된 UTAU 엔진 초기화 완료")
|
| 19 |
+
|
| 20 |
+
def synthesize_sequence(self,
|
| 21 |
+
notes: List[Dict],
|
| 22 |
+
lyrics: List[str],
|
| 23 |
+
tempo: int = 120,
|
| 24 |
+
volume: int = 100) -> Tuple[Optional[str], str]:
|
| 25 |
+
"""노트 시퀀스와 가사로 음성 합성"""
|
| 26 |
+
|
| 27 |
+
if len(notes) != len(lyrics):
|
| 28 |
+
return None, "노트와 가사의 개수가 일치하지 않습니다."
|
| 29 |
+
|
| 30 |
+
if not notes:
|
| 31 |
+
return None, "합성할 노트가 없습니다."
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
# 전체 시퀀스 길이 계산
|
| 35 |
+
max_end_time = max(note.get('endSeconds',
|
| 36 |
+
note.get('startSeconds', 0) + note.get('durationSeconds', 0.5))
|
| 37 |
+
for note in notes)
|
| 38 |
+
|
| 39 |
+
sample_rate = 44100
|
| 40 |
+
total_samples = int(max_end_time * sample_rate) + sample_rate
|
| 41 |
+
final_audio = np.zeros(total_samples)
|
| 42 |
+
|
| 43 |
+
# 각 노트 합성
|
| 44 |
+
for i, (note, lyric) in enumerate(zip(notes, lyrics)):
|
| 45 |
+
try:
|
| 46 |
+
# 음소 변환
|
| 47 |
+
phoneme = self._lyric_to_phoneme(lyric)
|
| 48 |
+
|
| 49 |
+
# oto 엔트리 찾기
|
| 50 |
+
oto_entry = self.voicebank.get_sample_for_phoneme(phoneme)
|
| 51 |
+
if not oto_entry:
|
| 52 |
+
logger.warning(f"음소 '{phoneme}'에 해당하는 샘플을 찾을 수 없음")
|
| 53 |
+
continue
|
| 54 |
+
|
| 55 |
+
# 오디오 데이터 로드 (압축된 데이터에서)
|
| 56 |
+
audio_result = self.voicebank.get_audio_data(oto_entry.filename)
|
| 57 |
+
if not audio_result:
|
| 58 |
+
logger.warning(f"오디오 파일 로드 실패: {oto_entry.filename}")
|
| 59 |
+
continue
|
| 60 |
+
|
| 61 |
+
source_audio, source_sample_rate = audio_result
|
| 62 |
+
|
| 63 |
+
# 노트 합성
|
| 64 |
+
synth_audio = self._synthesize_note(
|
| 65 |
+
note, oto_entry, source_audio, source_sample_rate, tempo, volume
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
if synth_audio is not None:
|
| 69 |
+
# 시간 위치 계산 및 오디오 배치
|
| 70 |
+
start_sample = int(note.get('startSeconds', 0) * sample_rate)
|
| 71 |
+
end_sample = start_sample + len(synth_audio)
|
| 72 |
+
|
| 73 |
+
if end_sample <= len(final_audio):
|
| 74 |
+
final_audio[start_sample:end_sample] += synth_audio * (note.get('velocity', 100) / 100)
|
| 75 |
+
else:
|
| 76 |
+
# 버퍼 확장
|
| 77 |
+
new_size = end_sample + sample_rate
|
| 78 |
+
new_final_audio = np.zeros(new_size)
|
| 79 |
+
new_final_audio[:len(final_audio)] = final_audio
|
| 80 |
+
new_final_audio[start_sample:end_sample] += synth_audio * (note.get('velocity', 100) / 100)
|
| 81 |
+
final_audio = new_final_audio
|
| 82 |
+
|
| 83 |
+
logger.info(f"노트 {i+1} 합성 완료: {phoneme}")
|
| 84 |
+
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.error(f"노트 {i+1} 합성 실패: {e}")
|
| 87 |
+
continue
|
| 88 |
+
|
| 89 |
+
# 최종 오디오 정규화
|
| 90 |
+
if np.max(np.abs(final_audio)) > 0:
|
| 91 |
+
final_audio = final_audio / np.max(np.abs(final_audio)) * 0.85
|
| 92 |
+
|
| 93 |
+
# 임시 파일 저장
|
| 94 |
+
output_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 95 |
+
sf.write(output_file.name, final_audio, sample_rate)
|
| 96 |
+
output_file.close()
|
| 97 |
+
|
| 98 |
+
duration_sec = len(final_audio) / sample_rate
|
| 99 |
+
return output_file.name, f"✅ 압축된 보이스뱅크로 합성 완료: {len(notes)}개 노트, {duration_sec:.1f}초"
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.error(f"시퀀스 합성 실패: {e}")
|
| 103 |
+
return None, f"❌ 합성 실패: {str(e)}"
|
| 104 |
+
|
| 105 |
+
def _lyric_to_phoneme(self, lyric: str) -> str:
|
| 106 |
+
"""가사를 음소로 변환 (기존 로직과 동일)"""
|
| 107 |
+
lyric = lyric.strip()
|
| 108 |
+
if not lyric:
|
| 109 |
+
return self.default_phoneme
|
| 110 |
+
|
| 111 |
+
# 한글 → 일본어 음소 변환 (간단한 매핑)
|
| 112 |
+
hangul_to_japanese = {
|
| 113 |
+
'가': 'ka', '나': 'na', '다': 'da', '라': 'ra', '마': 'ma',
|
| 114 |
+
'바': 'ba', '사': 'sa', '아': 'a', '자': 'za', '차': 'cha',
|
| 115 |
+
'카': 'ka', '타': 'ta', '파': 'pa', '하': 'ha',
|
| 116 |
+
'거': 'ke', '너': 'ne', '더': 'de', '러': 're', '머': 'me',
|
| 117 |
+
'버': 'be', '서': 'se', '어': 'e', '저': 'ze', '처': 'che',
|
| 118 |
+
'커': 'ke', '터': 'te', '퍼': 'pe', '허': 'he',
|
| 119 |
+
'고': 'ko', '노': 'no', '도': 'do', '로': 'ro', '모': 'mo',
|
| 120 |
+
'보': 'bo', '소': 'so', '오': 'o', '조': 'zo', '초': 'cho',
|
| 121 |
+
'코': 'ko', '토': 'to', '포': 'po', '호': 'ho',
|
| 122 |
+
'구': 'ku', '누': 'nu', '두': 'du', '루': 'ru', '무': 'mu',
|
| 123 |
+
'부': 'bu', '수': 'su', '우': 'u', '주': 'zu', '추': 'chu',
|
| 124 |
+
'쿠': 'ku', '투': 'tu', '푸': 'pu', '후': 'hu',
|
| 125 |
+
'기': 'ki', '니': 'ni', '디': 'di', '리': 'ri', '미': 'mi',
|
| 126 |
+
'비': 'bi', '시': 'si', '이': 'i', '지': 'zi', '치': 'chi',
|
| 127 |
+
'키': 'ki', '티': 'ti', '피': 'pi', '히': 'hi',
|
| 128 |
+
'도': 'do', '레': 're', '미': 'mi', '파': 'pa', '솔': 'so', '라': 'ra', '시': 'si'
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
if lyric in hangul_to_japanese:
|
| 132 |
+
return hangul_to_japanese[lyric]
|
| 133 |
+
|
| 134 |
+
return lyric if lyric in self.voicebank.oto_entries else self.default_phoneme
|
| 135 |
+
|
| 136 |
+
def _synthesize_note(self,
|
| 137 |
+
note: Dict,
|
| 138 |
+
oto_entry: OtoEntry,
|
| 139 |
+
source_audio: np.ndarray,
|
| 140 |
+
source_sample_rate: int,
|
| 141 |
+
tempo: int,
|
| 142 |
+
volume: int) -> Optional[np.ndarray]:
|
| 143 |
+
"""개별 노트 합성 (압축된 오디오 데이터 사용)"""
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
# 임시 파일에 원본 오디오 저장
|
| 147 |
+
temp_input = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 148 |
+
sf.write(temp_input.name, source_audio, source_sample_rate)
|
| 149 |
+
temp_input.close()
|
| 150 |
+
|
| 151 |
+
# 출력 파일
|
| 152 |
+
temp_output = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 153 |
+
temp_output.close()
|
| 154 |
+
|
| 155 |
+
# 노트 정보 추출
|
| 156 |
+
pitch = note['pitch']
|
| 157 |
+
duration_ms = note.get('durationSeconds', 0.5) * 1000
|
| 158 |
+
velocity = note.get('velocity', 100)
|
| 159 |
+
|
| 160 |
+
# MIDI 노트를 음계명으로 변환
|
| 161 |
+
note_name = self._midi_to_note_name(pitch)
|
| 162 |
+
|
| 163 |
+
# straycat Resampler로 합성
|
| 164 |
+
resampler = Resampler(
|
| 165 |
+
in_file=temp_input.name,
|
| 166 |
+
out_file=temp_output.name,
|
| 167 |
+
pitch=note_name,
|
| 168 |
+
velocity=velocity,
|
| 169 |
+
length=max(duration_ms, 200), # 최소 200ms
|
| 170 |
+
volume=volume,
|
| 171 |
+
offset=oto_entry.offset,
|
| 172 |
+
consonant=oto_entry.consonant,
|
| 173 |
+
cutoff=oto_entry.cutoff,
|
| 174 |
+
modulation=10,
|
| 175 |
+
tempo=f'!{tempo}'
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# 합성된 오디오 로드
|
| 179 |
+
if Path(temp_output.name).exists():
|
| 180 |
+
synth_audio, _ = sf.read(temp_output.name)
|
| 181 |
+
|
| 182 |
+
# 정리
|
| 183 |
+
Path(temp_input.name).unlink(missing_ok=True)
|
| 184 |
+
Path(temp_output.name).unlink(missing_ok=True)
|
| 185 |
+
|
| 186 |
+
return synth_audio
|
| 187 |
+
else:
|
| 188 |
+
logger.error(f"합성된 파일이 생성되지 않음: {temp_output.name}")
|
| 189 |
+
return None
|
| 190 |
+
|
| 191 |
+
except Exception as e:
|
| 192 |
+
logger.error(f"노트 합성 실패: {e}")
|
| 193 |
+
return None
|
| 194 |
+
|
| 195 |
+
def _midi_to_note_name(self, midi_note: int) -> str:
|
| 196 |
+
"""MIDI 노트 번호를 음계명으로 변환"""
|
| 197 |
+
notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
| 198 |
+
octave = (midi_note // 12) - 1
|
| 199 |
+
note = notes[midi_note % 12]
|
| 200 |
+
return f"{note}{octave}"
|
| 201 |
+
|
| 202 |
+
def get_available_phonemes(self) -> List[str]:
|
| 203 |
+
"""사용 가능한 음소 목록 반환"""
|
| 204 |
+
return self.voicebank.list_available_phonemes()
|
| 205 |
+
|
| 206 |
+
def get_compression_info(self) -> Dict[str, any]:
|
| 207 |
+
"""압축 정보 반환"""
|
| 208 |
+
return self.voicebank.get_compression_info()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "utau-webui"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "한국어 CVC 보이스뱅크를 사용한 웹 기반 UTAU 음성 합성기"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
authors = [
|
| 8 |
+
{ name = "UTAU WebUI Team" }
|
| 9 |
+
]
|
| 10 |
+
keywords = ["utau", "voice-synthesis", "korean", "music", "audio"]
|
| 11 |
+
classifiers = [
|
| 12 |
+
"Development Status :: 3 - Alpha",
|
| 13 |
+
"Intended Audience :: End Users/Desktop",
|
| 14 |
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
| 15 |
+
"License :: OSI Approved :: MIT License",
|
| 16 |
+
"Programming Language :: Python :: 3",
|
| 17 |
+
"Programming Language :: Python :: 3.12",
|
| 18 |
+
]
|
| 19 |
+
dependencies = [
|
| 20 |
+
"gradio>=5.33.1",
|
| 21 |
+
"gradio-pianoroll>=0.0.8",
|
| 22 |
+
"h5py>=3.10.0",
|
| 23 |
+
"librosa>=0.11.0",
|
| 24 |
+
"llvmlite>=0.44.0",
|
| 25 |
+
"numba>=0.61.2",
|
| 26 |
+
"numpy>=2.2.0",
|
| 27 |
+
"pyworld>=0.3.5",
|
| 28 |
+
"resampy>=0.4.3",
|
| 29 |
+
"scipy>=1.15.3",
|
| 30 |
+
"setuptools>=80.9.0",
|
| 31 |
+
"soundfile>=0.12.1",
|
| 32 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,809 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file was autogenerated by uv via the following command:
|
| 2 |
+
# uv export -o requirements.txt
|
| 3 |
+
aiofiles==24.1.0 \
|
| 4 |
+
--hash=sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c \
|
| 5 |
+
--hash=sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5
|
| 6 |
+
# via gradio
|
| 7 |
+
annotated-types==0.7.0 \
|
| 8 |
+
--hash=sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53 \
|
| 9 |
+
--hash=sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89
|
| 10 |
+
# via pydantic
|
| 11 |
+
anyio==4.9.0 \
|
| 12 |
+
--hash=sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028 \
|
| 13 |
+
--hash=sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c
|
| 14 |
+
# via
|
| 15 |
+
# gradio
|
| 16 |
+
# httpx
|
| 17 |
+
# starlette
|
| 18 |
+
audioop-lts==0.2.1 ; python_full_version >= '3.13' \
|
| 19 |
+
--hash=sha256:05da64e73837f88ee5c6217d732d2584cf638003ac72df124740460531e95e47 \
|
| 20 |
+
--hash=sha256:120678b208cca1158f0a12d667af592e067f7a50df9adc4dc8f6ad8d065a93fb \
|
| 21 |
+
--hash=sha256:161249db9343b3c9780ca92c0be0d1ccbfecdbccac6844f3d0d44b9c4a00a17f \
|
| 22 |
+
--hash=sha256:2aeb6f96f7f6da80354330470b9134d81b4cf544cdd1c549f2f45fe964d28059 \
|
| 23 |
+
--hash=sha256:2bdb3b7912ccd57ea53197943f1bbc67262dcf29802c4a6df79ec1c715d45a78 \
|
| 24 |
+
--hash=sha256:3827e3fce6fee4d69d96a3d00cd2ab07f3c0d844cb1e44e26f719b34a5b15455 \
|
| 25 |
+
--hash=sha256:4a8dd6a81770f6ecf019c4b6d659e000dc26571b273953cef7cd1d5ce2ff3ae6 \
|
| 26 |
+
--hash=sha256:534ce808e6bab6adb65548723c8cbe189a3379245db89b9d555c4210b4aaa9b6 \
|
| 27 |
+
--hash=sha256:54cd4520fc830b23c7d223693ed3e1b4d464997dd3abc7c15dce9a1f9bd76ab2 \
|
| 28 |
+
--hash=sha256:56b7a0a4dba8e353436f31a932f3045d108a67b5943b30f85a5563f4d8488d77 \
|
| 29 |
+
--hash=sha256:5b7b4ff9de7a44e0ad2618afdc2ac920b91f4a6d3509520ee65339d4acde5abf \
|
| 30 |
+
--hash=sha256:64562c5c771fb0a8b6262829b9b4f37a7b886c01b4d3ecdbae1d629717db08b4 \
|
| 31 |
+
--hash=sha256:6e899eb8874dc2413b11926b5fb3857ec0ab55222840e38016a6ba2ea9b7d5e3 \
|
| 32 |
+
--hash=sha256:72e37f416adb43b0ced93419de0122b42753ee74e87070777b53c5d2241e7fab \
|
| 33 |
+
--hash=sha256:78bfb3703388c780edf900be66e07de5a3d4105ca8e8720c5c4d67927e0b15d0 \
|
| 34 |
+
--hash=sha256:a351af79edefc2a1bd2234bfd8b339935f389209943043913a919df4b0f13300 \
|
| 35 |
+
--hash=sha256:c45317debeb64002e980077642afbd977773a25fa3dfd7ed0c84dccfc1fafcb0 \
|
| 36 |
+
--hash=sha256:c589f06407e8340e81962575fcffbba1e92671879a221186c3d4662de9fe804e \
|
| 37 |
+
--hash=sha256:d1cd3c0b6f2ca25c7d2b1c3adeecbe23e65689839ba73331ebc7d893fcda7ffe \
|
| 38 |
+
--hash=sha256:d2d5434717f33117f29b5691fbdf142d36573d751716249a288fbb96ba26a281 \
|
| 39 |
+
--hash=sha256:d2de9b6fb8b1cf9f03990b299a9112bfdf8b86b6987003ca9e8a6c4f56d39543 \
|
| 40 |
+
--hash=sha256:d6bd20c7a10abcb0fb3d8aaa7508c0bf3d40dfad7515c572014da4b979d3310a \
|
| 41 |
+
--hash=sha256:e175350da05d2087e12cea8e72a70a1a8b14a17e92ed2022952a4419689ede5e \
|
| 42 |
+
--hash=sha256:e1af3ff32b8c38a7d900382646e91f2fc515fd19dea37e9392275a5cbfdbff63 \
|
| 43 |
+
--hash=sha256:e81268da0baa880431b68b1308ab7257eb33f356e57a5f9b1f915dfb13dd1387 \
|
| 44 |
+
--hash=sha256:f0ed1ad9bd862539ea875fb339ecb18fcc4148f8d9908f4502df28f94d23491a \
|
| 45 |
+
--hash=sha256:f0f2f336aa2aee2bce0b0dcc32bbba9178995454c7b979cf6ce086a8801e14c7 \
|
| 46 |
+
--hash=sha256:f24865991b5ed4b038add5edbf424639d1358144f4e2a3e7a84bc6ba23e35074 \
|
| 47 |
+
--hash=sha256:f51bb55122a89f7a0817d7ac2319744b4640b5b446c4c3efcea5764ea99ae509 \
|
| 48 |
+
--hash=sha256:f626a01c0a186b08f7ff61431c01c055961ee28769591efa8800beadd27a2959 \
|
| 49 |
+
--hash=sha256:fbae5d6925d7c26e712f0beda5ed69ebb40e14212c185d129b8dfbfcc335eb48 \
|
| 50 |
+
--hash=sha256:fd1345ae99e17e6910f47ce7d52673c6a1a70820d78b67de1b7abb3af29c426a \
|
| 51 |
+
--hash=sha256:ff3f97b3372c97782e9c6d3d7fdbe83bce8f70de719605bd7ee1839cd1ab360a
|
| 52 |
+
# via
|
| 53 |
+
# gradio
|
| 54 |
+
# standard-aifc
|
| 55 |
+
# standard-sunau
|
| 56 |
+
audioread==3.0.1 \
|
| 57 |
+
--hash=sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33 \
|
| 58 |
+
--hash=sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d
|
| 59 |
+
# via librosa
|
| 60 |
+
certifi==2025.4.26 \
|
| 61 |
+
--hash=sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6 \
|
| 62 |
+
--hash=sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3
|
| 63 |
+
# via
|
| 64 |
+
# httpcore
|
| 65 |
+
# httpx
|
| 66 |
+
# requests
|
| 67 |
+
cffi==1.17.1 \
|
| 68 |
+
--hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \
|
| 69 |
+
--hash=sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36 \
|
| 70 |
+
--hash=sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824 \
|
| 71 |
+
--hash=sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3 \
|
| 72 |
+
--hash=sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed \
|
| 73 |
+
--hash=sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8 \
|
| 74 |
+
--hash=sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903 \
|
| 75 |
+
--hash=sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683 \
|
| 76 |
+
--hash=sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9 \
|
| 77 |
+
--hash=sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c \
|
| 78 |
+
--hash=sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4 \
|
| 79 |
+
--hash=sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65 \
|
| 80 |
+
--hash=sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93 \
|
| 81 |
+
--hash=sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4 \
|
| 82 |
+
--hash=sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3 \
|
| 83 |
+
--hash=sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff \
|
| 84 |
+
--hash=sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5 \
|
| 85 |
+
--hash=sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd \
|
| 86 |
+
--hash=sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5 \
|
| 87 |
+
--hash=sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d \
|
| 88 |
+
--hash=sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e \
|
| 89 |
+
--hash=sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a \
|
| 90 |
+
--hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99
|
| 91 |
+
# via soundfile
|
| 92 |
+
charset-normalizer==3.4.2 \
|
| 93 |
+
--hash=sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7 \
|
| 94 |
+
--hash=sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0 \
|
| 95 |
+
--hash=sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b \
|
| 96 |
+
--hash=sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff \
|
| 97 |
+
--hash=sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e \
|
| 98 |
+
--hash=sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148 \
|
| 99 |
+
--hash=sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a \
|
| 100 |
+
--hash=sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e \
|
| 101 |
+
--hash=sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63 \
|
| 102 |
+
--hash=sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c \
|
| 103 |
+
--hash=sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b \
|
| 104 |
+
--hash=sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0 \
|
| 105 |
+
--hash=sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0 \
|
| 106 |
+
--hash=sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1 \
|
| 107 |
+
--hash=sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981 \
|
| 108 |
+
--hash=sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c \
|
| 109 |
+
--hash=sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980 \
|
| 110 |
+
--hash=sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7 \
|
| 111 |
+
--hash=sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d \
|
| 112 |
+
--hash=sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3 \
|
| 113 |
+
--hash=sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd \
|
| 114 |
+
--hash=sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214 \
|
| 115 |
+
--hash=sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c \
|
| 116 |
+
--hash=sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f \
|
| 117 |
+
--hash=sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691 \
|
| 118 |
+
--hash=sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf \
|
| 119 |
+
--hash=sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b \
|
| 120 |
+
--hash=sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a
|
| 121 |
+
# via requests
|
| 122 |
+
click==8.2.1 ; sys_platform != 'emscripten' \
|
| 123 |
+
--hash=sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202 \
|
| 124 |
+
--hash=sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b
|
| 125 |
+
# via
|
| 126 |
+
# typer
|
| 127 |
+
# uvicorn
|
| 128 |
+
colorama==0.4.6 ; sys_platform == 'win32' \
|
| 129 |
+
--hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
|
| 130 |
+
--hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
|
| 131 |
+
# via
|
| 132 |
+
# click
|
| 133 |
+
# tqdm
|
| 134 |
+
decorator==5.2.1 \
|
| 135 |
+
--hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \
|
| 136 |
+
--hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a
|
| 137 |
+
# via librosa
|
| 138 |
+
fastapi==0.115.12 \
|
| 139 |
+
--hash=sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681 \
|
| 140 |
+
--hash=sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d
|
| 141 |
+
# via gradio
|
| 142 |
+
ffmpy==0.6.0 \
|
| 143 |
+
--hash=sha256:332dd93198a162db61e527e866a04578d3713e577bfe68f2ed26ba9d09dbc948 \
|
| 144 |
+
--hash=sha256:c8369bf45f8bd5285ebad94c4a789a79e7af86eded74c1f8c36eccf57aaea58c
|
| 145 |
+
# via gradio
|
| 146 |
+
filelock==3.18.0 \
|
| 147 |
+
--hash=sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2 \
|
| 148 |
+
--hash=sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de
|
| 149 |
+
# via huggingface-hub
|
| 150 |
+
fsspec==2025.5.1 \
|
| 151 |
+
--hash=sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462 \
|
| 152 |
+
--hash=sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475
|
| 153 |
+
# via
|
| 154 |
+
# gradio-client
|
| 155 |
+
# huggingface-hub
|
| 156 |
+
gradio==5.33.1 \
|
| 157 |
+
--hash=sha256:c4329b04280d62041fbf0113e94fb5c4d20e0555ce1ac69174bf98225350159b \
|
| 158 |
+
--hash=sha256:f74c737aa92fc02b4d7dca7e50ee13ddce548aa16c9fcbe907ceabf93722f94d
|
| 159 |
+
# via
|
| 160 |
+
# gradio-pianoroll
|
| 161 |
+
# utau-webui
|
| 162 |
+
gradio-client==1.10.3 \
|
| 163 |
+
--hash=sha256:941e7f8d9a160f88487e9780a3db2736a40ea2b8b69d53ffdb306e47ef658b76 \
|
| 164 |
+
--hash=sha256:9e99b88e47f05dc3b68e40a3f3f83819f8d0ddcd43466ad385fe42e137825774
|
| 165 |
+
# via gradio
|
| 166 |
+
gradio-pianoroll==0.0.8 \
|
| 167 |
+
--hash=sha256:26abd2c98ccb8bb30e8269324ca8675109a502e266c4e9c8bfff524d1a9c0218 \
|
| 168 |
+
--hash=sha256:f7ac6d27dab2873c35bba5041b94afc9159922be2a9cebe202e8a87f4ec79e86
|
| 169 |
+
# via utau-webui
|
| 170 |
+
groovy==0.1.2 \
|
| 171 |
+
--hash=sha256:25c1dc09b3f9d7e292458aa762c6beb96ea037071bf5e917fc81fb78d2231083 \
|
| 172 |
+
--hash=sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64
|
| 173 |
+
# via gradio
|
| 174 |
+
h11==0.16.0 \
|
| 175 |
+
--hash=sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1 \
|
| 176 |
+
--hash=sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86
|
| 177 |
+
# via
|
| 178 |
+
# httpcore
|
| 179 |
+
# uvicorn
|
| 180 |
+
h5py==3.14.0 \
|
| 181 |
+
--hash=sha256:0cbd41f4e3761f150aa5b662df991868ca533872c95467216f2bec5fcad84882 \
|
| 182 |
+
--hash=sha256:2372116b2e0d5d3e5e705b7f663f7c8d96fa79a4052d250484ef91d24d6a08f4 \
|
| 183 |
+
--hash=sha256:554ef0ced3571366d4d383427c00c966c360e178b5fb5ee5bb31a435c424db0c \
|
| 184 |
+
--hash=sha256:6da62509b7e1d71a7d110478aa25d245dd32c8d9a1daee9d2a42dba8717b047a \
|
| 185 |
+
--hash=sha256:aa4b7bbce683379b7bf80aaba68e17e23396100336a8d500206520052be2f812 \
|
| 186 |
+
--hash=sha256:ae18e3de237a7a830adb76aaa68ad438d85fe6e19e0d99944a3ce46b772c69b3 \
|
| 187 |
+
--hash=sha256:bf4897d67e613ecf5bdfbdab39a1158a64df105827da70ea1d90243d796d367f \
|
| 188 |
+
--hash=sha256:d90e6445ab7c146d7f7981b11895d70bc1dd91278a4f9f9028bc0c95e4a53f13 \
|
| 189 |
+
--hash=sha256:e0045115d83272090b0717c555a31398c2c089b87d212ceba800d3dc5d952e23 \
|
| 190 |
+
--hash=sha256:e8cbaf6910fa3983c46172666b0b8da7b7bd90d764399ca983236f2400436eeb \
|
| 191 |
+
--hash=sha256:ef9603a501a04fcd0ba28dd8f0995303d26a77a980a1f9474b3417543d4c6174
|
| 192 |
+
# via utau-webui
|
| 193 |
+
hf-xet==1.1.3 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' \
|
| 194 |
+
--hash=sha256:30c575a5306f8e6fda37edb866762140a435037365eba7a17ce7bd0bc0216a8b \
|
| 195 |
+
--hash=sha256:7c1a6aa6abed1f696f8099aa9796ca04c9ee778a58728a115607de9cc4638ff1 \
|
| 196 |
+
--hash=sha256:8203f52827e3df65981984936654a5b390566336956f65765a8aa58c362bb841 \
|
| 197 |
+
--hash=sha256:a5f09b1dd24e6ff6bcedb4b0ddab2d81824098bb002cf8b4ffa780545fa348c3 \
|
| 198 |
+
--hash=sha256:b578ae5ac9c056296bb0df9d018e597c8dc6390c5266f35b5c44696003cde9f3 \
|
| 199 |
+
--hash=sha256:b788a61977fbe6b5186e66239e2a329a3f0b7e7ff50dad38984c0c74f44aeca1 \
|
| 200 |
+
--hash=sha256:c3b508b5f583a75641aebf732853deb058953370ce8184f5dabc49f803b0819b \
|
| 201 |
+
--hash=sha256:fd2da210856444a34aad8ada2fc12f70dabed7cc20f37e90754d1d9b43bc0534
|
| 202 |
+
# via huggingface-hub
|
| 203 |
+
httpcore==1.0.9 \
|
| 204 |
+
--hash=sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55 \
|
| 205 |
+
--hash=sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8
|
| 206 |
+
# via httpx
|
| 207 |
+
httpx==0.28.1 \
|
| 208 |
+
--hash=sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc \
|
| 209 |
+
--hash=sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad
|
| 210 |
+
# via
|
| 211 |
+
# gradio
|
| 212 |
+
# gradio-client
|
| 213 |
+
# safehttpx
|
| 214 |
+
huggingface-hub==0.32.6 \
|
| 215 |
+
--hash=sha256:32cde9558c965477556edca72352621def7fbc42e167aaf33f4cdb9af65bb28b \
|
| 216 |
+
--hash=sha256:8e960f23dc57519c6c2a0bbc7e9bc030eaa14e7f2d61f8e68fd3d025dabed2fa
|
| 217 |
+
# via
|
| 218 |
+
# gradio
|
| 219 |
+
# gradio-client
|
| 220 |
+
idna==3.10 \
|
| 221 |
+
--hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \
|
| 222 |
+
--hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3
|
| 223 |
+
# via
|
| 224 |
+
# anyio
|
| 225 |
+
# httpx
|
| 226 |
+
# requests
|
| 227 |
+
jinja2==3.1.6 \
|
| 228 |
+
--hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \
|
| 229 |
+
--hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67
|
| 230 |
+
# via gradio
|
| 231 |
+
joblib==1.5.1 \
|
| 232 |
+
--hash=sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a \
|
| 233 |
+
--hash=sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444
|
| 234 |
+
# via
|
| 235 |
+
# librosa
|
| 236 |
+
# scikit-learn
|
| 237 |
+
lazy-loader==0.4 \
|
| 238 |
+
--hash=sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc \
|
| 239 |
+
--hash=sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1
|
| 240 |
+
# via librosa
|
| 241 |
+
librosa==0.11.0 \
|
| 242 |
+
--hash=sha256:0b6415c4fd68bff4c29288abe67c6d80b587e0e1e2cfb0aad23e4559504a7fa1 \
|
| 243 |
+
--hash=sha256:f5ed951ca189b375bbe2e33b2abd7e040ceeee302b9bbaeeffdfddb8d0ace908
|
| 244 |
+
# via utau-webui
|
| 245 |
+
llvmlite==0.44.0 \
|
| 246 |
+
--hash=sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4 \
|
| 247 |
+
--hash=sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad \
|
| 248 |
+
--hash=sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930 \
|
| 249 |
+
--hash=sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516 \
|
| 250 |
+
--hash=sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf \
|
| 251 |
+
--hash=sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db \
|
| 252 |
+
--hash=sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e \
|
| 253 |
+
--hash=sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc \
|
| 254 |
+
--hash=sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9 \
|
| 255 |
+
--hash=sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d \
|
| 256 |
+
--hash=sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1
|
| 257 |
+
# via
|
| 258 |
+
# numba
|
| 259 |
+
# utau-webui
|
| 260 |
+
markdown-it-py==3.0.0 ; sys_platform != 'emscripten' \
|
| 261 |
+
--hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \
|
| 262 |
+
--hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb
|
| 263 |
+
# via rich
|
| 264 |
+
markupsafe==3.0.2 \
|
| 265 |
+
--hash=sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30 \
|
| 266 |
+
--hash=sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9 \
|
| 267 |
+
--hash=sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396 \
|
| 268 |
+
--hash=sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028 \
|
| 269 |
+
--hash=sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557 \
|
| 270 |
+
--hash=sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a \
|
| 271 |
+
--hash=sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c \
|
| 272 |
+
--hash=sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c \
|
| 273 |
+
--hash=sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22 \
|
| 274 |
+
--hash=sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094 \
|
| 275 |
+
--hash=sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5 \
|
| 276 |
+
--hash=sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225 \
|
| 277 |
+
--hash=sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c \
|
| 278 |
+
--hash=sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87 \
|
| 279 |
+
--hash=sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf \
|
| 280 |
+
--hash=sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb \
|
| 281 |
+
--hash=sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48 \
|
| 282 |
+
--hash=sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c \
|
| 283 |
+
--hash=sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6 \
|
| 284 |
+
--hash=sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd \
|
| 285 |
+
--hash=sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1 \
|
| 286 |
+
--hash=sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d \
|
| 287 |
+
--hash=sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca \
|
| 288 |
+
--hash=sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a \
|
| 289 |
+
--hash=sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe \
|
| 290 |
+
--hash=sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8 \
|
| 291 |
+
--hash=sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f \
|
| 292 |
+
--hash=sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f \
|
| 293 |
+
--hash=sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0 \
|
| 294 |
+
--hash=sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79 \
|
| 295 |
+
--hash=sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430
|
| 296 |
+
# via
|
| 297 |
+
# gradio
|
| 298 |
+
# jinja2
|
| 299 |
+
mdurl==0.1.2 ; sys_platform != 'emscripten' \
|
| 300 |
+
--hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \
|
| 301 |
+
--hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba
|
| 302 |
+
# via markdown-it-py
|
| 303 |
+
msgpack==1.1.0 \
|
| 304 |
+
--hash=sha256:06f5fd2f6bb2a7914922d935d3b8bb4a7fff3a9a91cfce6d06c13bc42bec975b \
|
| 305 |
+
--hash=sha256:071603e2f0771c45ad9bc65719291c568d4edf120b44eb36324dcb02a13bfddf \
|
| 306 |
+
--hash=sha256:0907e1a7119b337971a689153665764adc34e89175f9a34793307d9def08e6ca \
|
| 307 |
+
--hash=sha256:0f92a83b84e7c0749e3f12821949d79485971f087604178026085f60ce109330 \
|
| 308 |
+
--hash=sha256:115a7af8ee9e8cddc10f87636767857e7e3717b7a2e97379dc2054712693e90f \
|
| 309 |
+
--hash=sha256:17fb65dd0bec285907f68b15734a993ad3fc94332b5bb21b0435846228de1f39 \
|
| 310 |
+
--hash=sha256:2137773500afa5494a61b1208619e3871f75f27b03bcfca7b3a7023284140247 \
|
| 311 |
+
--hash=sha256:398b713459fea610861c8a7b62a6fec1882759f308ae0795b5413ff6a160cf3c \
|
| 312 |
+
--hash=sha256:4676e5be1b472909b2ee6356ff425ebedf5142427842aa06b4dfd5117d1ca8a2 \
|
| 313 |
+
--hash=sha256:4a1964df7b81285d00a84da4e70cb1383f2e665e0f1f2a7027e683956d04b734 \
|
| 314 |
+
--hash=sha256:4b51405e36e075193bc051315dbf29168d6141ae2500ba8cd80a522964e31434 \
|
| 315 |
+
--hash=sha256:58dfc47f8b102da61e8949708b3eafc3504509a5728f8b4ddef84bd9e16ad420 \
|
| 316 |
+
--hash=sha256:59caf6a4ed0d164055ccff8fe31eddc0ebc07cf7326a2aaa0dbf7a4001cd823e \
|
| 317 |
+
--hash=sha256:5dbad74103df937e1325cc4bfeaf57713be0b4f15e1c2da43ccdd836393e2ea2 \
|
| 318 |
+
--hash=sha256:65553c9b6da8166e819a6aa90ad15288599b340f91d18f60b2061f402b9a4915 \
|
| 319 |
+
--hash=sha256:7a946a8992941fea80ed4beae6bff74ffd7ee129a90b4dd5cf9c476a30e9708d \
|
| 320 |
+
--hash=sha256:7c9a35ce2c2573bada929e0b7b3576de647b0defbd25f5139dcdaba0ae35a4cc \
|
| 321 |
+
--hash=sha256:a51abd48c6d8ac89e0cfd4fe177c61481aca2d5e7ba42044fd218cfd8ea9899f \
|
| 322 |
+
--hash=sha256:ad33e8400e4ec17ba782f7b9cf868977d867ed784a1f5f2ab46e7ba53b6e1e1b \
|
| 323 |
+
--hash=sha256:b4c01941fd2ff87c2a934ee6055bda4ed353a7846b8d4f341c428109e9fcde8c \
|
| 324 |
+
--hash=sha256:bce7d9e614a04d0883af0b3d4d501171fbfca038f12c77fa838d9f198147a23f \
|
| 325 |
+
--hash=sha256:d46cf9e3705ea9485687aa4001a76e44748b609d260af21c4ceea7f2212a501d \
|
| 326 |
+
--hash=sha256:dd432ccc2c72b914e4cb77afce64aab761c1137cc698be3984eee260bcb2896e
|
| 327 |
+
# via librosa
|
| 328 |
+
numba==0.61.2 \
|
| 329 |
+
--hash=sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2 \
|
| 330 |
+
--hash=sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154 \
|
| 331 |
+
--hash=sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd \
|
| 332 |
+
--hash=sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8 \
|
| 333 |
+
--hash=sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7 \
|
| 334 |
+
--hash=sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546 \
|
| 335 |
+
--hash=sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e \
|
| 336 |
+
--hash=sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140 \
|
| 337 |
+
--hash=sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d \
|
| 338 |
+
--hash=sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18 \
|
| 339 |
+
--hash=sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab
|
| 340 |
+
# via
|
| 341 |
+
# librosa
|
| 342 |
+
# resampy
|
| 343 |
+
# utau-webui
|
| 344 |
+
numpy==2.2.6 \
|
| 345 |
+
--hash=sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff \
|
| 346 |
+
--hash=sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84 \
|
| 347 |
+
--hash=sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6 \
|
| 348 |
+
--hash=sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f \
|
| 349 |
+
--hash=sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b \
|
| 350 |
+
--hash=sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49 \
|
| 351 |
+
--hash=sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571 \
|
| 352 |
+
--hash=sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff \
|
| 353 |
+
--hash=sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4 \
|
| 354 |
+
--hash=sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566 \
|
| 355 |
+
--hash=sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40 \
|
| 356 |
+
--hash=sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd \
|
| 357 |
+
--hash=sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06 \
|
| 358 |
+
--hash=sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282 \
|
| 359 |
+
--hash=sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3 \
|
| 360 |
+
--hash=sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1 \
|
| 361 |
+
--hash=sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c \
|
| 362 |
+
--hash=sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d \
|
| 363 |
+
--hash=sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2 \
|
| 364 |
+
--hash=sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c \
|
| 365 |
+
--hash=sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f \
|
| 366 |
+
--hash=sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd \
|
| 367 |
+
--hash=sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868 \
|
| 368 |
+
--hash=sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d \
|
| 369 |
+
--hash=sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87 \
|
| 370 |
+
--hash=sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa \
|
| 371 |
+
--hash=sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f \
|
| 372 |
+
--hash=sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda \
|
| 373 |
+
--hash=sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249 \
|
| 374 |
+
--hash=sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de \
|
| 375 |
+
--hash=sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8
|
| 376 |
+
# via
|
| 377 |
+
# gradio
|
| 378 |
+
# h5py
|
| 379 |
+
# librosa
|
| 380 |
+
# numba
|
| 381 |
+
# pandas
|
| 382 |
+
# pyworld
|
| 383 |
+
# resampy
|
| 384 |
+
# scikit-learn
|
| 385 |
+
# scipy
|
| 386 |
+
# soundfile
|
| 387 |
+
# soxr
|
| 388 |
+
# utau-webui
|
| 389 |
+
orjson==3.10.18 \
|
| 390 |
+
--hash=sha256:0315317601149c244cb3ecef246ef5861a64824ccbcb8018d32c66a60a84ffbc \
|
| 391 |
+
--hash=sha256:187ec33bbec58c76dbd4066340067d9ece6e10067bb0cc074a21ae3300caa84e \
|
| 392 |
+
--hash=sha256:1ebeda919725f9dbdb269f59bc94f861afbe2a27dce5608cdba2d92772364d1c \
|
| 393 |
+
--hash=sha256:22748de2a07fcc8781a70edb887abf801bb6142e6236123ff93d12d92db3d406 \
|
| 394 |
+
--hash=sha256:2d808e34ddb24fc29a4d4041dcfafbae13e129c93509b847b14432717d94b44f \
|
| 395 |
+
--hash=sha256:303565c67a6c7b1f194c94632a4a39918e067bd6176a48bec697393865ce4f06 \
|
| 396 |
+
--hash=sha256:356b076f1662c9813d5fa56db7d63ccceef4c271b1fb3dd522aca291375fcf17 \
|
| 397 |
+
--hash=sha256:3a83c9954a4107b9acd10291b7f12a6b29e35e8d43a414799906ea10e75438e6 \
|
| 398 |
+
--hash=sha256:3d600be83fe4514944500fa8c2a0a77099025ec6482e8087d7659e891f23058a \
|
| 399 |
+
--hash=sha256:50c15557afb7f6d63bc6d6348e0337a880a04eaa9cd7c9d569bcb4e760a24753 \
|
| 400 |
+
--hash=sha256:559eb40a70a7494cd5beab2d73657262a74a2c59aff2068fdba8f0424ec5b39d \
|
| 401 |
+
--hash=sha256:5adf5f4eed520a4959d29ea80192fa626ab9a20b2ea13f8f6dc58644f6927103 \
|
| 402 |
+
--hash=sha256:6612787e5b0756a171c7d81ba245ef63a3533a637c335aa7fcb8e665f4a0966f \
|
| 403 |
+
--hash=sha256:69c34b9441b863175cc6a01f2935de994025e773f814412030f269da4f7be147 \
|
| 404 |
+
--hash=sha256:7592bb48a214e18cd670974f289520f12b7aed1fa0b2e2616b8ed9e069e08595 \
|
| 405 |
+
--hash=sha256:7ac6bd7be0dcab5b702c9d43d25e70eb456dfd2e119d512447468f6405b4a69c \
|
| 406 |
+
--hash=sha256:86314fdb5053a2f5a5d881f03fca0219bfdf832912aa88d18676a5175c6916b5 \
|
| 407 |
+
--hash=sha256:8e4b2ae732431127171b875cb2668f883e1234711d3c147ffd69fe5be51a8012 \
|
| 408 |
+
--hash=sha256:9dca85398d6d093dd41dc0983cbf54ab8e6afd1c547b6b8a311643917fbf4e0c \
|
| 409 |
+
--hash=sha256:9f72f100cee8dde70100406d5c1abba515a7df926d4ed81e20a9730c062fe9ad \
|
| 410 |
+
--hash=sha256:ad8eacbb5d904d5591f27dee4031e2c1db43d559edb8f91778efd642d70e6bea \
|
| 411 |
+
--hash=sha256:aed411bcb68bf62e85588f2a7e03a6082cc42e5a2796e06e72a962d7c6310b52 \
|
| 412 |
+
--hash=sha256:bb70d489bc79b7519e5803e2cc4c72343c9dc1154258adf2f8925d0b60da7c58 \
|
| 413 |
+
--hash=sha256:c382a5c0b5931a5fc5405053d36c1ce3fd561694738626c77ae0b1dfc0242ca1 \
|
| 414 |
+
--hash=sha256:e0da26957e77e9e55a6c2ce2e7182a36a6f6b180ab7189315cb0995ec362e049 \
|
| 415 |
+
--hash=sha256:e8da3947d92123eda795b68228cafe2724815621fe35e8e320a9e9593a4bcd53 \
|
| 416 |
+
--hash=sha256:e9e86a6af31b92299b00736c89caf63816f70a4001e750bda179e15564d7a034 \
|
| 417 |
+
--hash=sha256:f3c29eb9a81e2fbc6fd7ddcfba3e101ba92eaff455b8d602bf7511088bbc0eae \
|
| 418 |
+
--hash=sha256:f54c1385a0e6aba2f15a40d703b858bedad36ded0491e55d35d905b2c34a4cc3 \
|
| 419 |
+
--hash=sha256:f872bef9f042734110642b7a11937440797ace8c87527de25e0c53558b579ccc \
|
| 420 |
+
--hash=sha256:f9f94cf6d3f9cd720d641f8399e390e7411487e493962213390d1ae45c7814fc
|
| 421 |
+
# via gradio
|
| 422 |
+
packaging==25.0 \
|
| 423 |
+
--hash=sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484 \
|
| 424 |
+
--hash=sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f
|
| 425 |
+
# via
|
| 426 |
+
# gradio
|
| 427 |
+
# gradio-client
|
| 428 |
+
# huggingface-hub
|
| 429 |
+
# lazy-loader
|
| 430 |
+
# pooch
|
| 431 |
+
pandas==2.3.0 \
|
| 432 |
+
--hash=sha256:094e271a15b579650ebf4c5155c05dcd2a14fd4fdd72cf4854b2f7ad31ea30be \
|
| 433 |
+
--hash=sha256:1a881bc1309f3fce34696d07b00f13335c41f5f5a8770a33b09ebe23261cfc67 \
|
| 434 |
+
--hash=sha256:1d2b33e68d0ce64e26a4acc2e72d747292084f4e8db4c847c6f5f6cbe56ed6d8 \
|
| 435 |
+
--hash=sha256:213cd63c43263dbb522c1f8a7c9d072e25900f6975596f883f4bebd77295d4f3 \
|
| 436 |
+
--hash=sha256:2c7e2fc25f89a49a11599ec1e76821322439d90820108309bf42130d2f36c983 \
|
| 437 |
+
--hash=sha256:2eb4728a18dcd2908c7fccf74a982e241b467d178724545a48d0caf534b38ebf \
|
| 438 |
+
--hash=sha256:34600ab34ebf1131a7613a260a61dbe8b62c188ec0ea4c296da7c9a06b004133 \
|
| 439 |
+
--hash=sha256:404d681c698e3c8a40a61d0cd9412cc7364ab9a9cc6e144ae2992e11a2e77a20 \
|
| 440 |
+
--hash=sha256:430a63bae10b5086995db1b02694996336e5a8ac9a96b4200572b413dfdfccb9 \
|
| 441 |
+
--hash=sha256:4930255e28ff5545e2ca404637bcc56f031893142773b3468dc021c6c32a1390 \
|
| 442 |
+
--hash=sha256:6021910b086b3ca756755e86ddc64e0ddafd5e58e076c72cb1585162e5ad259b \
|
| 443 |
+
--hash=sha256:951805d146922aed8357e4cc5671b8b0b9be1027f0619cea132a9f3f65f2f09c \
|
| 444 |
+
--hash=sha256:9ff730713d4c4f2f1c860e36c005c7cefc1c7c80c21c0688fd605aa43c9fcf09 \
|
| 445 |
+
--hash=sha256:b9d8c3187be7479ea5c3d30c32a5d73d62a621166675063b2edd21bc47614027 \
|
| 446 |
+
--hash=sha256:ba24af48643b12ffe49b27065d3babd52702d95ab70f50e1b34f71ca703e2c0d \
|
| 447 |
+
--hash=sha256:bb32dc743b52467d488e7a7c8039b821da2826a9ba4f85b89ea95274f863280f \
|
| 448 |
+
--hash=sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249 \
|
| 449 |
+
--hash=sha256:c6da97aeb6a6d233fb6b17986234cc723b396b50a3c6804776351994f2a658fd \
|
| 450 |
+
--hash=sha256:e1991bbb96f4050b09b5f811253c4f3cf05ee89a589379aa36cd623f21a31d6f \
|
| 451 |
+
--hash=sha256:e78ad363ddb873a631e92a3c063ade1ecfb34cae71e9a2be6ad100f875ac1042 \
|
| 452 |
+
--hash=sha256:f925f1ef673b4bd0271b1809b72b3270384f2b7d9d14a189b12b7fc02574d575
|
| 453 |
+
# via gradio
|
| 454 |
+
pillow==11.2.1 \
|
| 455 |
+
--hash=sha256:062b7a42d672c45a70fa1f8b43d1d38ff76b63421cbbe7f88146b39e8a558d91 \
|
| 456 |
+
--hash=sha256:0c7b29dbd4281923a2bfe562acb734cee96bbb129e96e6972d315ed9f232bef4 \
|
| 457 |
+
--hash=sha256:14e33b28bf17c7a38eede290f77db7c664e4eb01f7869e37fa98a5aa95978941 \
|
| 458 |
+
--hash=sha256:191955c55d8a712fab8934a42bfefbf99dd0b5875078240943f913bb66d46d9f \
|
| 459 |
+
--hash=sha256:1d535df14716e7f8776b9e7fee118576d65572b4aad3ed639be9e4fa88a1cad3 \
|
| 460 |
+
--hash=sha256:21e1470ac9e5739ff880c211fc3af01e3ae505859392bf65458c224d0bf283eb \
|
| 461 |
+
--hash=sha256:225c832a13326e34f212d2072982bb1adb210e0cc0b153e688743018c94a2681 \
|
| 462 |
+
--hash=sha256:25a5f306095c6780c52e6bbb6109624b95c5b18e40aab1c3041da3e9e0cd3e2d \
|
| 463 |
+
--hash=sha256:31df6e2d3d8fc99f993fd253e97fae451a8db2e7207acf97859732273e108406 \
|
| 464 |
+
--hash=sha256:36d6b82164c39ce5482f649b437382c0fb2395eabc1e2b1702a6deb8ad647d6e \
|
| 465 |
+
--hash=sha256:3e645b020f3209a0181a418bffe7b4a93171eef6c4ef6cc20980b30bebf17b7d \
|
| 466 |
+
--hash=sha256:3fe735ced9a607fee4f481423a9c36701a39719252a9bb251679635f99d0f7d2 \
|
| 467 |
+
--hash=sha256:4eb92eca2711ef8be42fd3f67533765d9fd043b8c80db204f16c8ea62ee1a751 \
|
| 468 |
+
--hash=sha256:5119225c622403afb4b44bad4c1ca6c1f98eed79db8d3bc6e4e160fc6339d66c \
|
| 469 |
+
--hash=sha256:598174aef4589af795f66f9caab87ba4ff860ce08cd5bb447c6fc553ffee603c \
|
| 470 |
+
--hash=sha256:63b5dff3a68f371ea06025a1a6966c9a1e1ee452fc8020c2cd0ea41b83e9037b \
|
| 471 |
+
--hash=sha256:74ee3d7ecb3f3c05459ba95eed5efa28d6092d751ce9bf20e3e253a4e497e691 \
|
| 472 |
+
--hash=sha256:750f96efe0597382660d8b53e90dd1dd44568a8edb51cb7f9d5d918b80d4de14 \
|
| 473 |
+
--hash=sha256:78092232a4ab376a35d68c4e6d5e00dfd73454bd12b230420025fbe178ee3b0b \
|
| 474 |
+
--hash=sha256:78afba22027b4accef10dbd5eed84425930ba41b3ea0a86fa8d20baaf19d807f \
|
| 475 |
+
--hash=sha256:7bdb5e09068332578214cadd9c05e3d64d99e0e87591be22a324bdbc18925be0 \
|
| 476 |
+
--hash=sha256:8ce2e8411c7aaef53e6bb29fe98f28cd4fbd9a1d9be2eeea434331aac0536b22 \
|
| 477 |
+
--hash=sha256:9622e3b6c1d8b551b6e6f21873bdcc55762b4b2126633014cea1803368a9aa16 \
|
| 478 |
+
--hash=sha256:9ee66787e095127116d91dea2143db65c7bb1e232f617aa5957c0d9d2a3f23a7 \
|
| 479 |
+
--hash=sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6 \
|
| 480 |
+
--hash=sha256:ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155 \
|
| 481 |
+
--hash=sha256:b0e130705d568e2f43a17bcbe74d90958e8a16263868a12c3e0d9c8162690830 \
|
| 482 |
+
--hash=sha256:b2dbea1012ccb784a65349f57bbc93730b96e85b42e9bf7b01ef40443db720b4 \
|
| 483 |
+
--hash=sha256:d189ba1bebfbc0c0e529159631ec72bb9e9bc041f01ec6d3233d6d82eb823bc1 \
|
| 484 |
+
--hash=sha256:da3104c57bbd72948d75f6a9389e6727d2ab6333c3617f0a89d72d4940aa0443 \
|
| 485 |
+
--hash=sha256:e0b55f27f584ed623221cfe995c912c61606be8513bfa0e07d2c674b4516d9dd \
|
| 486 |
+
--hash=sha256:f91ebf30830a48c825590aede79376cb40f110b387c17ee9bd59932c961044f9 \
|
| 487 |
+
--hash=sha256:fdec757fea0b793056419bca3e9932eb2b0ceec90ef4813ea4c1e072c389eb28 \
|
| 488 |
+
--hash=sha256:fe15238d3798788d00716637b3d4e7bb6bde18b26e5d08335a96e88564a36b6b
|
| 489 |
+
# via gradio
|
| 490 |
+
platformdirs==4.3.8 \
|
| 491 |
+
--hash=sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc \
|
| 492 |
+
--hash=sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4
|
| 493 |
+
# via pooch
|
| 494 |
+
pooch==1.8.2 \
|
| 495 |
+
--hash=sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47 \
|
| 496 |
+
--hash=sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10
|
| 497 |
+
# via librosa
|
| 498 |
+
pycparser==2.22 \
|
| 499 |
+
--hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \
|
| 500 |
+
--hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc
|
| 501 |
+
# via cffi
|
| 502 |
+
pydantic==2.11.5 \
|
| 503 |
+
--hash=sha256:7f853db3d0ce78ce8bbb148c401c2cdd6431b3473c0cdff2755c7690952a7b7a \
|
| 504 |
+
--hash=sha256:f9c26ba06f9747749ca1e5c94d6a85cb84254577553c8785576fd38fa64dc0f7
|
| 505 |
+
# via
|
| 506 |
+
# fastapi
|
| 507 |
+
# gradio
|
| 508 |
+
pydantic-core==2.33.2 \
|
| 509 |
+
--hash=sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56 \
|
| 510 |
+
--hash=sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef \
|
| 511 |
+
--hash=sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a \
|
| 512 |
+
--hash=sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f \
|
| 513 |
+
--hash=sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916 \
|
| 514 |
+
--hash=sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a \
|
| 515 |
+
--hash=sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7 \
|
| 516 |
+
--hash=sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025 \
|
| 517 |
+
--hash=sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849 \
|
| 518 |
+
--hash=sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b \
|
| 519 |
+
--hash=sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e \
|
| 520 |
+
--hash=sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea \
|
| 521 |
+
--hash=sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac \
|
| 522 |
+
--hash=sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162 \
|
| 523 |
+
--hash=sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc \
|
| 524 |
+
--hash=sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1 \
|
| 525 |
+
--hash=sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5 \
|
| 526 |
+
--hash=sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88 \
|
| 527 |
+
--hash=sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290 \
|
| 528 |
+
--hash=sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d \
|
| 529 |
+
--hash=sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc \
|
| 530 |
+
--hash=sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9 \
|
| 531 |
+
--hash=sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9 \
|
| 532 |
+
--hash=sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f \
|
| 533 |
+
--hash=sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5 \
|
| 534 |
+
--hash=sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab \
|
| 535 |
+
--hash=sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1 \
|
| 536 |
+
--hash=sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9 \
|
| 537 |
+
--hash=sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011 \
|
| 538 |
+
--hash=sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6 \
|
| 539 |
+
--hash=sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2 \
|
| 540 |
+
--hash=sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6
|
| 541 |
+
# via pydantic
|
| 542 |
+
pydub==0.25.1 \
|
| 543 |
+
--hash=sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6 \
|
| 544 |
+
--hash=sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f
|
| 545 |
+
# via gradio
|
| 546 |
+
pygments==2.19.1 ; sys_platform != 'emscripten' \
|
| 547 |
+
--hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \
|
| 548 |
+
--hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c
|
| 549 |
+
# via rich
|
| 550 |
+
python-dateutil==2.9.0.post0 \
|
| 551 |
+
--hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
|
| 552 |
+
--hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
|
| 553 |
+
# via pandas
|
| 554 |
+
python-multipart==0.0.20 \
|
| 555 |
+
--hash=sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104 \
|
| 556 |
+
--hash=sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13
|
| 557 |
+
# via gradio
|
| 558 |
+
pytz==2025.2 \
|
| 559 |
+
--hash=sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3 \
|
| 560 |
+
--hash=sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00
|
| 561 |
+
# via pandas
|
| 562 |
+
pyworld==0.3.5 \
|
| 563 |
+
--hash=sha256:1b93e53cddb67a0e4faa34d6cf919ac6c662feb1c8c0ed901d71b595ab396aa3 \
|
| 564 |
+
--hash=sha256:59b48961c2ac34fb01efeb1a77d3eda69c41b676858cbc3a82dfb7602c0c762b \
|
| 565 |
+
--hash=sha256:860c5c3528f1dbc5c68fa71a16e3bb6990244619e5b9baf62952f3a6bfc6131c
|
| 566 |
+
# via utau-webui
|
| 567 |
+
pyyaml==6.0.2 \
|
| 568 |
+
--hash=sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48 \
|
| 569 |
+
--hash=sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133 \
|
| 570 |
+
--hash=sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484 \
|
| 571 |
+
--hash=sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5 \
|
| 572 |
+
--hash=sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc \
|
| 573 |
+
--hash=sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1 \
|
| 574 |
+
--hash=sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652 \
|
| 575 |
+
--hash=sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5 \
|
| 576 |
+
--hash=sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8 \
|
| 577 |
+
--hash=sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476 \
|
| 578 |
+
--hash=sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563 \
|
| 579 |
+
--hash=sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b \
|
| 580 |
+
--hash=sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425 \
|
| 581 |
+
--hash=sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183 \
|
| 582 |
+
--hash=sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab \
|
| 583 |
+
--hash=sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725 \
|
| 584 |
+
--hash=sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e \
|
| 585 |
+
--hash=sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4 \
|
| 586 |
+
--hash=sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba
|
| 587 |
+
# via
|
| 588 |
+
# gradio
|
| 589 |
+
# huggingface-hub
|
| 590 |
+
requests==2.32.4 \
|
| 591 |
+
--hash=sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c \
|
| 592 |
+
--hash=sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422
|
| 593 |
+
# via
|
| 594 |
+
# huggingface-hub
|
| 595 |
+
# pooch
|
| 596 |
+
resampy==0.4.3 \
|
| 597 |
+
--hash=sha256:a0d1c28398f0e55994b739650afef4e3974115edbe96cd4bb81968425e916e47 \
|
| 598 |
+
--hash=sha256:ad2ed64516b140a122d96704e32bc0f92b23f45419e8b8f478e5a05f83edcebd
|
| 599 |
+
# via utau-webui
|
| 600 |
+
rich==14.0.0 ; sys_platform != 'emscripten' \
|
| 601 |
+
--hash=sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0 \
|
| 602 |
+
--hash=sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725
|
| 603 |
+
# via typer
|
| 604 |
+
ruff==0.11.13 ; sys_platform != 'emscripten' \
|
| 605 |
+
--hash=sha256:1808b3ed53e1a777c2ef733aca9051dc9bf7c99b26ece15cb59a0320fbdbd629 \
|
| 606 |
+
--hash=sha256:26816a218ca6ef02142343fd24c70f7cd8c5aa6c203bca284407adf675984432 \
|
| 607 |
+
--hash=sha256:26fa247dc68d1d4e72c179e08889a25ac0c7ba4d78aecfc835d49cbfd60bf514 \
|
| 608 |
+
--hash=sha256:29c3189895a8a6a657b7af4e97d330c8a3afd2c9c8f46c81e2fc5a31866517e3 \
|
| 609 |
+
--hash=sha256:4a9ddd3ec62a9a89578c85842b836e4ac832d4a2e0bfaad3b02243f930ceafcc \
|
| 610 |
+
--hash=sha256:4bdfbf1240533f40042ec00c9e09a3aade6f8c10b6414cf11b519488d2635d46 \
|
| 611 |
+
--hash=sha256:4ffbc82d70424b275b089166310448051afdc6e914fdab90e08df66c43bb5ca9 \
|
| 612 |
+
--hash=sha256:51c3f95abd9331dc5b87c47ac7f376db5616041173826dfd556cfe3d4977f492 \
|
| 613 |
+
--hash=sha256:53b15a9dfdce029c842e9a5aebc3855e9ab7771395979ff85b7c1dedb53ddc2b \
|
| 614 |
+
--hash=sha256:55e4bc3a77842da33c16d55b32c6cac1ec5fb0fbec9c8c513bdce76c4f922165 \
|
| 615 |
+
--hash=sha256:633bf2c6f35678c56ec73189ba6fa19ff1c5e4807a78bf60ef487b9dd272cc71 \
|
| 616 |
+
--hash=sha256:6c51f93029d54a910d3d24f7dd0bb909e31b6cd989a5e4ac513f4eb41629f0dc \
|
| 617 |
+
--hash=sha256:96c27935418e4e8e77a26bb05962817f28b8ef3843a6c6cc49d8783b5507f250 \
|
| 618 |
+
--hash=sha256:ab153241400789138d13f362c43f7edecc0edfffce2afa6a68434000ecd8f69a \
|
| 619 |
+
--hash=sha256:aef9c9ed1b5ca28bb15c7eac83b8670cf3b20b478195bd49c8d756ba0a36cf48 \
|
| 620 |
+
--hash=sha256:b4385285e9179d608ff1d2fb9922062663c658605819a6876d8beef0c30b7f3b \
|
| 621 |
+
--hash=sha256:d237a496e0778d719efb05058c64d28b757c77824e04ffe8796c7436e26712b7 \
|
| 622 |
+
--hash=sha256:d28ce58b5ecf0f43c1b71edffabe6ed7f245d5336b17805803312ec9bc665933
|
| 623 |
+
# via gradio
|
| 624 |
+
safehttpx==0.1.6 \
|
| 625 |
+
--hash=sha256:407cff0b410b071623087c63dd2080c3b44dc076888d8c5823c00d1e58cb381c \
|
| 626 |
+
--hash=sha256:b356bfc82cee3a24c395b94a2dbeabbed60aff1aa5fa3b5fe97c4f2456ebce42
|
| 627 |
+
# via gradio
|
| 628 |
+
scikit-learn==1.7.0 \
|
| 629 |
+
--hash=sha256:014e07a23fe02e65f9392898143c542a50b6001dbe89cb867e19688e468d049b \
|
| 630 |
+
--hash=sha256:0521cb460426c56fee7e07f9365b0f45ec8ca7b2d696534ac98bfb85e7ae4775 \
|
| 631 |
+
--hash=sha256:0b2f8a0b1e73e9a08b7cc498bb2aeab36cdc1f571f8ab2b35c6e5d1c7115d97d \
|
| 632 |
+
--hash=sha256:126c09740a6f016e815ab985b21e3a0656835414521c81fc1a8da78b679bdb75 \
|
| 633 |
+
--hash=sha256:1babf2511e6ffd695da7a983b4e4d6de45dce39577b26b721610711081850906 \
|
| 634 |
+
--hash=sha256:317ca9f83acbde2883bd6bb27116a741bfcb371369706b4f9973cf30e9a03b0d \
|
| 635 |
+
--hash=sha256:34cc8d9d010d29fb2b7cbcd5ccc24ffdd80515f65fe9f1e4894ace36b267ce19 \
|
| 636 |
+
--hash=sha256:5abd2acff939d5bd4701283f009b01496832d50ddafa83c90125a4e41c33e314 \
|
| 637 |
+
--hash=sha256:5b7974f1f32bc586c90145df51130e02267e4b7e77cab76165c76cf43faca0d9 \
|
| 638 |
+
--hash=sha256:63017a5f9a74963d24aac7590287149a8d0f1a0799bbe7173c0d8ba1523293c0 \
|
| 639 |
+
--hash=sha256:9f39f6a811bf3f15177b66c82cbe0d7b1ebad9f190737dcdef77cfca1ea3c19c \
|
| 640 |
+
--hash=sha256:c01e869b15aec88e2cdb73d27f15bdbe03bce8e2fb43afbe77c45d399e73a5a3 \
|
| 641 |
+
--hash=sha256:c2c7243d34aaede0efca7a5a96d67fddaebb4ad7e14a70991b9abee9dc5c0379 \
|
| 642 |
+
--hash=sha256:e39d95a929b112047c25b775035c8c234c5ca67e681ce60d12413afb501129f7 \
|
| 643 |
+
--hash=sha256:e7e7ced20582d3a5516fb6f405fd1d254e1f5ce712bfef2589f51326af6346e8
|
| 644 |
+
# via librosa
|
| 645 |
+
scipy==1.15.3 \
|
| 646 |
+
--hash=sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477 \
|
| 647 |
+
--hash=sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c \
|
| 648 |
+
--hash=sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723 \
|
| 649 |
+
--hash=sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730 \
|
| 650 |
+
--hash=sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539 \
|
| 651 |
+
--hash=sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb \
|
| 652 |
+
--hash=sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6 \
|
| 653 |
+
--hash=sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49 \
|
| 654 |
+
--hash=sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759 \
|
| 655 |
+
--hash=sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8 \
|
| 656 |
+
--hash=sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4 \
|
| 657 |
+
--hash=sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e \
|
| 658 |
+
--hash=sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed \
|
| 659 |
+
--hash=sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5 \
|
| 660 |
+
--hash=sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5 \
|
| 661 |
+
--hash=sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019 \
|
| 662 |
+
--hash=sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e \
|
| 663 |
+
--hash=sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca \
|
| 664 |
+
--hash=sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825 \
|
| 665 |
+
--hash=sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62 \
|
| 666 |
+
--hash=sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb \
|
| 667 |
+
--hash=sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb \
|
| 668 |
+
--hash=sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163 \
|
| 669 |
+
--hash=sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45 \
|
| 670 |
+
--hash=sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7 \
|
| 671 |
+
--hash=sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11 \
|
| 672 |
+
--hash=sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf \
|
| 673 |
+
--hash=sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126
|
| 674 |
+
# via
|
| 675 |
+
# librosa
|
| 676 |
+
# scikit-learn
|
| 677 |
+
# utau-webui
|
| 678 |
+
semantic-version==2.10.0 \
|
| 679 |
+
--hash=sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c \
|
| 680 |
+
--hash=sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177
|
| 681 |
+
# via gradio
|
| 682 |
+
setuptools==80.9.0 \
|
| 683 |
+
--hash=sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922 \
|
| 684 |
+
--hash=sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c
|
| 685 |
+
# via utau-webui
|
| 686 |
+
shellingham==1.5.4 ; sys_platform != 'emscripten' \
|
| 687 |
+
--hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \
|
| 688 |
+
--hash=sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de
|
| 689 |
+
# via typer
|
| 690 |
+
six==1.17.0 \
|
| 691 |
+
--hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
|
| 692 |
+
--hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81
|
| 693 |
+
# via python-dateutil
|
| 694 |
+
sniffio==1.3.1 \
|
| 695 |
+
--hash=sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 \
|
| 696 |
+
--hash=sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc
|
| 697 |
+
# via anyio
|
| 698 |
+
soundfile==0.13.1 \
|
| 699 |
+
--hash=sha256:03267c4e493315294834a0870f31dbb3b28a95561b80b134f0bd3cf2d5f0e618 \
|
| 700 |
+
--hash=sha256:1e70a05a0626524a69e9f0f4dd2ec174b4e9567f4d8b6c11d38b5c289be36ee9 \
|
| 701 |
+
--hash=sha256:743f12c12c4054921e15736c6be09ac26b3b3d603aef6fd69f9dde68748f2593 \
|
| 702 |
+
--hash=sha256:82dc664d19831933fe59adad199bf3945ad06d84bc111a5b4c0d3089a5b9ec33 \
|
| 703 |
+
--hash=sha256:9c9e855f5a4d06ce4213f31918653ab7de0c5a8d8107cd2427e44b42df547deb \
|
| 704 |
+
--hash=sha256:a23c717560da2cf4c7b5ae1142514e0fd82d6bbd9dfc93a50423447142f2c445 \
|
| 705 |
+
--hash=sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b \
|
| 706 |
+
--hash=sha256:c734564fab7c5ddf8e9be5bf70bab68042cd17e9c214c06e365e20d64f9a69d5
|
| 707 |
+
# via
|
| 708 |
+
# librosa
|
| 709 |
+
# utau-webui
|
| 710 |
+
soxr==0.5.0.post1 \
|
| 711 |
+
--hash=sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32 \
|
| 712 |
+
--hash=sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73 \
|
| 713 |
+
--hash=sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc \
|
| 714 |
+
--hash=sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6 \
|
| 715 |
+
--hash=sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1 \
|
| 716 |
+
--hash=sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31
|
| 717 |
+
# via librosa
|
| 718 |
+
standard-aifc==3.13.0 ; python_full_version >= '3.13' \
|
| 719 |
+
--hash=sha256:64e249c7cb4b3daf2fdba4e95721f811bde8bdfc43ad9f936589b7bb2fae2e43 \
|
| 720 |
+
--hash=sha256:f7ae09cc57de1224a0dd8e3eb8f73830be7c3d0bc485de4c1f82b4a7f645ac66
|
| 721 |
+
# via librosa
|
| 722 |
+
standard-chunk==3.13.0 ; python_full_version >= '3.13' \
|
| 723 |
+
--hash=sha256:17880a26c285189c644bd5bd8f8ed2bdb795d216e3293e6dbe55bbd848e2982c \
|
| 724 |
+
--hash=sha256:4ac345d37d7e686d2755e01836b8d98eda0d1a3ee90375e597ae43aaf064d654
|
| 725 |
+
# via standard-aifc
|
| 726 |
+
standard-sunau==3.13.0 ; python_full_version >= '3.13' \
|
| 727 |
+
--hash=sha256:53af624a9529c41062f4c2fd33837f297f3baa196b0cfceffea6555654602622 \
|
| 728 |
+
--hash=sha256:b319a1ac95a09a2378a8442f403c66f4fd4b36616d6df6ae82b8e536ee790908
|
| 729 |
+
# via librosa
|
| 730 |
+
starlette==0.46.2 \
|
| 731 |
+
--hash=sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35 \
|
| 732 |
+
--hash=sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5
|
| 733 |
+
# via
|
| 734 |
+
# fastapi
|
| 735 |
+
# gradio
|
| 736 |
+
threadpoolctl==3.6.0 \
|
| 737 |
+
--hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \
|
| 738 |
+
--hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e
|
| 739 |
+
# via scikit-learn
|
| 740 |
+
tomlkit==0.13.3 \
|
| 741 |
+
--hash=sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1 \
|
| 742 |
+
--hash=sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0
|
| 743 |
+
# via gradio
|
| 744 |
+
tqdm==4.67.1 \
|
| 745 |
+
--hash=sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2 \
|
| 746 |
+
--hash=sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2
|
| 747 |
+
# via huggingface-hub
|
| 748 |
+
typer==0.16.0 ; sys_platform != 'emscripten' \
|
| 749 |
+
--hash=sha256:1f79bed11d4d02d4310e3c1b7ba594183bcedb0ac73b27a9e5f28f6fb5b98855 \
|
| 750 |
+
--hash=sha256:af377ffaee1dbe37ae9440cb4e8f11686ea5ce4e9bae01b84ae7c63b87f1dd3b
|
| 751 |
+
# via gradio
|
| 752 |
+
typing-extensions==4.14.0 \
|
| 753 |
+
--hash=sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4 \
|
| 754 |
+
--hash=sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af
|
| 755 |
+
# via
|
| 756 |
+
# anyio
|
| 757 |
+
# fastapi
|
| 758 |
+
# gradio
|
| 759 |
+
# gradio-client
|
| 760 |
+
# huggingface-hub
|
| 761 |
+
# librosa
|
| 762 |
+
# pydantic
|
| 763 |
+
# pydantic-core
|
| 764 |
+
# typer
|
| 765 |
+
# typing-inspection
|
| 766 |
+
typing-inspection==0.4.1 \
|
| 767 |
+
--hash=sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51 \
|
| 768 |
+
--hash=sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28
|
| 769 |
+
# via pydantic
|
| 770 |
+
tzdata==2025.2 \
|
| 771 |
+
--hash=sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8 \
|
| 772 |
+
--hash=sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9
|
| 773 |
+
# via pandas
|
| 774 |
+
urllib3==2.4.0 \
|
| 775 |
+
--hash=sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466 \
|
| 776 |
+
--hash=sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813
|
| 777 |
+
# via
|
| 778 |
+
# gradio
|
| 779 |
+
# requests
|
| 780 |
+
uvicorn==0.34.3 ; sys_platform != 'emscripten' \
|
| 781 |
+
--hash=sha256:16246631db62bdfbf069b0645177d6e8a77ba950cfedbfd093acef9444e4d885 \
|
| 782 |
+
--hash=sha256:35919a9a979d7a59334b6b10e05d77c1d0d574c50e0fc98b8b1a0f165708b55a
|
| 783 |
+
# via gradio
|
| 784 |
+
websockets==15.0.1 \
|
| 785 |
+
--hash=sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2 \
|
| 786 |
+
--hash=sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5 \
|
| 787 |
+
--hash=sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8 \
|
| 788 |
+
--hash=sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375 \
|
| 789 |
+
--hash=sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597 \
|
| 790 |
+
--hash=sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f \
|
| 791 |
+
--hash=sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3 \
|
| 792 |
+
--hash=sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4 \
|
| 793 |
+
--hash=sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665 \
|
| 794 |
+
--hash=sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22 \
|
| 795 |
+
--hash=sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675 \
|
| 796 |
+
--hash=sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4 \
|
| 797 |
+
--hash=sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65 \
|
| 798 |
+
--hash=sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151 \
|
| 799 |
+
--hash=sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d \
|
| 800 |
+
--hash=sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee \
|
| 801 |
+
--hash=sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa \
|
| 802 |
+
--hash=sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9 \
|
| 803 |
+
--hash=sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe \
|
| 804 |
+
--hash=sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561 \
|
| 805 |
+
--hash=sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215 \
|
| 806 |
+
--hash=sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931 \
|
| 807 |
+
--hash=sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f \
|
| 808 |
+
--hash=sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7
|
| 809 |
+
# via gradio-client
|
straycat.py
ADDED
|
@@ -0,0 +1,825 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
straycat - Yet another WORLD-based UTAU resampler.
|
| 4 |
+
|
| 5 |
+
Original source: https://github.com/UtaUtaUtau/straycat
|
| 6 |
+
Copyright (c) UtaUtaUtau
|
| 7 |
+
Licensed under MIT License
|
| 8 |
+
|
| 9 |
+
This file is part of the straycat project and is used under the terms
|
| 10 |
+
of the MIT License. See the original repository for full license text.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import logging
|
| 14 |
+
logging.basicConfig(format='%(message)s', level=logging.INFO)
|
| 15 |
+
import sys
|
| 16 |
+
import os
|
| 17 |
+
import pyworld as world # Vocoder
|
| 18 |
+
import numpy as np # Numpy <3
|
| 19 |
+
from numba import njit, vectorize, float64, optional # JIT compilation stuff (and ufuncs)
|
| 20 |
+
import soundfile as sf # WAV read + write
|
| 21 |
+
import scipy.signal as signal # for filtering
|
| 22 |
+
import scipy.interpolate as interp # Interpolator for feats
|
| 23 |
+
import scipy.ndimage as ndimage
|
| 24 |
+
import resampy # Resampler (as in sampling rate stuff)
|
| 25 |
+
from pathlib import Path # path manipulation
|
| 26 |
+
import re
|
| 27 |
+
|
| 28 |
+
version = '0.4.0'
|
| 29 |
+
help_string = '''usage: straycat in_file out_file pitch velocity [flags] [offset] [length] [consonant] [cutoff] [volume] [modulation] [tempo] [pitch_string]
|
| 30 |
+
|
| 31 |
+
Resamples using the WORLD Vocoder.
|
| 32 |
+
|
| 33 |
+
arguments:
|
| 34 |
+
\tin_file\t\tPath to input file.
|
| 35 |
+
\tout_file\tPath to output file.
|
| 36 |
+
\tpitch\t\tThe pitch to render on.
|
| 37 |
+
\tvelocity\tThe consonant velocity of the render.
|
| 38 |
+
|
| 39 |
+
optional arguments:
|
| 40 |
+
\tflags\t\tThe flags of the render.
|
| 41 |
+
\toffset\t\tThe offset from the start of the render area of the sample. (default: 0)
|
| 42 |
+
\tlength\t\tThe length of the stretched area in milliseconds. (default: 1000)
|
| 43 |
+
\tconsonant\tThe unstretched area of the render in milliseconds. (default: 0)
|
| 44 |
+
\tcutoff\t\tThe cutoff from the end or from the offset for the render area of the sample. (default: 0)
|
| 45 |
+
\tvolume\t\tThe volume of the render in percentage. (default: 100)
|
| 46 |
+
\tmodulation\tThe pitch modulation of the render in percentage. (default: 0)
|
| 47 |
+
\ttempo\t\tThe tempo of the render. Needs to have a ! at the start. (default: !100)
|
| 48 |
+
\tpitch_string\tThe UTAU pitchbend parameter written in Base64 with RLE encoding. (default: AA)'''
|
| 49 |
+
|
| 50 |
+
notes = {'C' : 0, 'C#' : 1, 'D' : 2, 'D#' : 3, 'E' : 4, 'F' : 5, 'F#' : 6, 'G' : 7, 'G#' : 8, 'A' : 9, 'A#' : 10, 'B' : 11} # Note names lol
|
| 51 |
+
note_re = re.compile(r'([A-G]#?)(-?\d+)') # Note Regex for conversion
|
| 52 |
+
default_fs = 44100 # UTAU only really likes 44.1khz
|
| 53 |
+
fft_size = world.get_cheaptrick_fft_size(default_fs, world.default_f0_floor) # It's just 2048 but you know
|
| 54 |
+
cache_ext = '.sc.npz' # cache file extension
|
| 55 |
+
|
| 56 |
+
# Giving it better range
|
| 57 |
+
f0_floor = world.default_f0_floor
|
| 58 |
+
f0_ceil = 1760
|
| 59 |
+
|
| 60 |
+
# Flags
|
| 61 |
+
flags = ['fe', 'fl', 'fo', 'fv', 'fp', 've', 'vo', 'g', 't', 'A', 'B', 'G', 'P', 'S', 'p', 'R', 'D', 'C']
|
| 62 |
+
flag_re = '|'.join(flags)
|
| 63 |
+
flag_re = f'({flag_re})([+-]?\\d+)?'
|
| 64 |
+
flag_re = re.compile(flag_re)
|
| 65 |
+
|
| 66 |
+
# Utility functions
|
| 67 |
+
@vectorize([float64(float64, float64, float64)], nopython=True)
|
| 68 |
+
def smoothstep(edge0, edge1, x):
|
| 69 |
+
"""Smoothstep function from GLSL that works with numpy arrays."""
|
| 70 |
+
x = (x - edge0) / (edge1 - edge0)
|
| 71 |
+
if x < 0:
|
| 72 |
+
x = 0
|
| 73 |
+
elif x > 1:
|
| 74 |
+
x = 1
|
| 75 |
+
return 3*x*x - 2*x*x*x
|
| 76 |
+
|
| 77 |
+
@vectorize([float64(float64, float64, float64)], nopython=True)
|
| 78 |
+
def clip(x, x_min, x_max):
|
| 79 |
+
"""Clips function. Faster than np.clip somehow"""
|
| 80 |
+
if x < x_min:
|
| 81 |
+
return x_min
|
| 82 |
+
if x > x_max:
|
| 83 |
+
return x_max
|
| 84 |
+
return x
|
| 85 |
+
|
| 86 |
+
@vectorize([float64(float64, float64)], nopython=True)
|
| 87 |
+
def bias(x, a):
|
| 88 |
+
"""Element-wise Schlick bias function."""
|
| 89 |
+
if a == 0:
|
| 90 |
+
return 0
|
| 91 |
+
if a == 1:
|
| 92 |
+
return 1
|
| 93 |
+
return x / ((1 / a - 2) * (1 - x) + 1)
|
| 94 |
+
|
| 95 |
+
def highpass(x, fs=44100, cutoff=3000, order=1):
|
| 96 |
+
"""Butterworth highpass with doubled order because of sosfiltfilt."""
|
| 97 |
+
nyq = 0.5 * fs
|
| 98 |
+
cut = cutoff / nyq
|
| 99 |
+
sos = signal.butter(order, cut, btype='high', output='sos')
|
| 100 |
+
return signal.sosfiltfilt(sos, x)
|
| 101 |
+
|
| 102 |
+
def lowpass(x, fs=44100, cutoff=16000, order=1):
|
| 103 |
+
"""Butterworth lowpass with doubled order because of sosfiltfilt."""
|
| 104 |
+
nyq = 0.5 * fs
|
| 105 |
+
cut = cutoff / nyq
|
| 106 |
+
sos = signal.butter(order, cut, btype='low', output='sos')
|
| 107 |
+
return signal.sosfiltfilt(sos, x)
|
| 108 |
+
|
| 109 |
+
# Pitch string interpreter
|
| 110 |
+
def to_uint6(b64):
|
| 111 |
+
"""Convert one Base64 character to an unsigned integer.
|
| 112 |
+
|
| 113 |
+
Parameters
|
| 114 |
+
----------
|
| 115 |
+
b64 : str
|
| 116 |
+
The Base64 character.
|
| 117 |
+
|
| 118 |
+
Returns
|
| 119 |
+
-------
|
| 120 |
+
int
|
| 121 |
+
The equivalent of the Base64 character as an integer.
|
| 122 |
+
"""
|
| 123 |
+
c = ord(b64) # Convert based on ASCII mapping
|
| 124 |
+
if c >= 97:
|
| 125 |
+
return c - 71
|
| 126 |
+
elif c >= 65:
|
| 127 |
+
return c - 65
|
| 128 |
+
elif c >= 48:
|
| 129 |
+
return c + 4
|
| 130 |
+
elif c == 43:
|
| 131 |
+
return 62
|
| 132 |
+
elif c == 47:
|
| 133 |
+
return 63
|
| 134 |
+
else:
|
| 135 |
+
raise Exception
|
| 136 |
+
|
| 137 |
+
def to_int12(b64):
|
| 138 |
+
"""Converts two Base64 characters to a signed 12-bit integer.
|
| 139 |
+
|
| 140 |
+
Parameters
|
| 141 |
+
----------
|
| 142 |
+
b64 : str
|
| 143 |
+
The Base64 string.
|
| 144 |
+
|
| 145 |
+
Returns
|
| 146 |
+
-------
|
| 147 |
+
int
|
| 148 |
+
The equivalent of the Base64 characters as a signed 12-bit integer (-2047 to 2048)
|
| 149 |
+
"""
|
| 150 |
+
uint12 = to_uint6(b64[0]) << 6 | to_uint6(b64[1]) # Combined uint6 to uint12
|
| 151 |
+
if uint12 >> 11 & 1 == 1: # Check most significant bit to simulate two's complement
|
| 152 |
+
return uint12 - 4096
|
| 153 |
+
else:
|
| 154 |
+
return uint12
|
| 155 |
+
|
| 156 |
+
def to_int12_stream(b64):
|
| 157 |
+
"""Converts a Base64 string to a list of integers.
|
| 158 |
+
|
| 159 |
+
Parameters
|
| 160 |
+
----------
|
| 161 |
+
b64 : str
|
| 162 |
+
The Base64 string.
|
| 163 |
+
|
| 164 |
+
Returns
|
| 165 |
+
-------
|
| 166 |
+
list
|
| 167 |
+
The equivalent of the Base64 string if split every 12-bits and interpreted as a signed 12-bit integer.
|
| 168 |
+
"""
|
| 169 |
+
res = []
|
| 170 |
+
for i in range(0, len(b64), 2):
|
| 171 |
+
res.append(to_int12(b64[i:i+2]))
|
| 172 |
+
return res
|
| 173 |
+
|
| 174 |
+
def pitch_string_to_cents(x):
|
| 175 |
+
"""Converts UTAU's pitchbend argument to an ndarray representing the pitch offset in cents.
|
| 176 |
+
|
| 177 |
+
Parameters
|
| 178 |
+
----------
|
| 179 |
+
x : str
|
| 180 |
+
The pitchbend argument.
|
| 181 |
+
|
| 182 |
+
Returns
|
| 183 |
+
-------
|
| 184 |
+
ndarray
|
| 185 |
+
The pitchbend argument as pitch offset in cents.
|
| 186 |
+
"""
|
| 187 |
+
pitch = x.split('#') # Split RLE Encoding
|
| 188 |
+
res = []
|
| 189 |
+
for i in range(0, len(pitch), 2):
|
| 190 |
+
# Go through each pair
|
| 191 |
+
p = pitch[i:i+2]
|
| 192 |
+
if len(p) == 2:
|
| 193 |
+
# Decode pitch string and extend RLE
|
| 194 |
+
pitch_str, rle = p
|
| 195 |
+
res.extend(to_int12_stream(pitch_str))
|
| 196 |
+
res.extend([res[-1]] * int(rle))
|
| 197 |
+
else:
|
| 198 |
+
# Decode last pitch string without RLE if it exists
|
| 199 |
+
res.extend(to_int12_stream(p[0]))
|
| 200 |
+
res = np.array(res, dtype=np.int32)
|
| 201 |
+
if np.all(res == res[0]):
|
| 202 |
+
return np.zeros(res.shape)
|
| 203 |
+
else:
|
| 204 |
+
return np.concatenate([res, np.zeros(1)])
|
| 205 |
+
|
| 206 |
+
# Pitch conversion
|
| 207 |
+
def note_to_midi(x):
|
| 208 |
+
"""Note name to MIDI note number."""
|
| 209 |
+
note, octave = note_re.match(x).group(1, 2)
|
| 210 |
+
octave = int(octave) + 1
|
| 211 |
+
return octave * 12 + notes[note]
|
| 212 |
+
|
| 213 |
+
def midi_to_hz(x):
|
| 214 |
+
"""MIDI note number to Hertz using equal temperament. A4 = 440 Hz."""
|
| 215 |
+
return 440 * np.exp2((x - 69) / 12)
|
| 216 |
+
|
| 217 |
+
##def hz_to_midi(x):
|
| 218 |
+
## return 12 * np.log2(x / 440) + 69
|
| 219 |
+
|
| 220 |
+
# WAV read/write
|
| 221 |
+
def read_wav(loc):
|
| 222 |
+
"""Read audio files supported by soundfile and resample to 44.1kHz if needed. Mixes down to mono if needed.
|
| 223 |
+
|
| 224 |
+
Parameters
|
| 225 |
+
----------
|
| 226 |
+
loc : str or file
|
| 227 |
+
Input audio file.
|
| 228 |
+
|
| 229 |
+
Returns
|
| 230 |
+
-------
|
| 231 |
+
ndarray
|
| 232 |
+
Data read from WAV file remapped to [-1, 1] and in 44.1kHz
|
| 233 |
+
"""
|
| 234 |
+
if type(loc) == str: # make sure input is Path
|
| 235 |
+
loc = Path(loc)
|
| 236 |
+
|
| 237 |
+
exists = loc.exists()
|
| 238 |
+
if not exists: # check for alternative files
|
| 239 |
+
for ext in sf.available_formats().keys():
|
| 240 |
+
loc = loc.with_suffix('.' + ext.lower())
|
| 241 |
+
exists = loc.exists()
|
| 242 |
+
if exists:
|
| 243 |
+
break
|
| 244 |
+
|
| 245 |
+
if not exists:
|
| 246 |
+
raise FileNotFoundError("No supported audio file was found.")
|
| 247 |
+
|
| 248 |
+
x, fs = sf.read(loc)
|
| 249 |
+
if len(x.shape) == 2:
|
| 250 |
+
# Average all channels... Probably not too good for formats bigger than stereo
|
| 251 |
+
x = np.mean(x, axis=1)
|
| 252 |
+
|
| 253 |
+
if fs != default_fs:
|
| 254 |
+
x = resampy.resample(x, fs, default_fs)
|
| 255 |
+
|
| 256 |
+
return x
|
| 257 |
+
|
| 258 |
+
def save_wav(loc, x):
|
| 259 |
+
"""Save data into a WAV file.
|
| 260 |
+
|
| 261 |
+
Parameters
|
| 262 |
+
----------
|
| 263 |
+
loc : str or file
|
| 264 |
+
Output WAV file.
|
| 265 |
+
|
| 266 |
+
x : ndarray
|
| 267 |
+
Audio data in 44.1kHz within [-1, 1].
|
| 268 |
+
|
| 269 |
+
Returns
|
| 270 |
+
-------
|
| 271 |
+
None
|
| 272 |
+
"""
|
| 273 |
+
sf.write(loc, x, default_fs, 'PCM_16')
|
| 274 |
+
|
| 275 |
+
# Processing WORLD things
|
| 276 |
+
@njit(float64(float64[:], optional(float64), optional(float64)))
|
| 277 |
+
def _jit_base_frq(f0, f0_min, f0_max):
|
| 278 |
+
q = 0
|
| 279 |
+
avg_frq = 0
|
| 280 |
+
tally = 0
|
| 281 |
+
N = len(f0)
|
| 282 |
+
|
| 283 |
+
if f0_min is None:
|
| 284 |
+
f0_min = f0_floor
|
| 285 |
+
|
| 286 |
+
if f0_max is None:
|
| 287 |
+
f0_max = f0_ceil
|
| 288 |
+
|
| 289 |
+
for i in range(N):
|
| 290 |
+
if f0[i] >= f0_min and f0[i] <= f0_max:
|
| 291 |
+
if i < 1:
|
| 292 |
+
q = f0[i+1] - f0[i]
|
| 293 |
+
elif i == N - 1:
|
| 294 |
+
q = f0[i] - f0[i-1]
|
| 295 |
+
else:
|
| 296 |
+
q = (f0[i+1] - f0[i-1]) / 2
|
| 297 |
+
weight = 2 ** (-q * q)
|
| 298 |
+
avg_frq += f0[i] * weight
|
| 299 |
+
tally += weight
|
| 300 |
+
|
| 301 |
+
if tally > 0:
|
| 302 |
+
avg_frq /= tally
|
| 303 |
+
return avg_frq
|
| 304 |
+
|
| 305 |
+
def base_frq(f0, f0_min=None, f0_max=None):
|
| 306 |
+
"""Get average F0 with a stronger bias on flatter areas.
|
| 307 |
+
|
| 308 |
+
Parameters
|
| 309 |
+
----------
|
| 310 |
+
f0 : list or ndarray
|
| 311 |
+
Array of F0 values.
|
| 312 |
+
|
| 313 |
+
f0_min : float, optional
|
| 314 |
+
Lower F0 limit.
|
| 315 |
+
|
| 316 |
+
f0_max : float, optional
|
| 317 |
+
Upper F0 limit.
|
| 318 |
+
|
| 319 |
+
Returns
|
| 320 |
+
-------
|
| 321 |
+
float
|
| 322 |
+
Average F0.
|
| 323 |
+
"""
|
| 324 |
+
return _jit_base_frq(f0, f0_min, f0_max)
|
| 325 |
+
|
| 326 |
+
class Resampler:
|
| 327 |
+
"""
|
| 328 |
+
A class for the UTAU resampling process.
|
| 329 |
+
|
| 330 |
+
Attributes
|
| 331 |
+
----------
|
| 332 |
+
in_file : str
|
| 333 |
+
Path to input file.
|
| 334 |
+
|
| 335 |
+
out_file : str
|
| 336 |
+
Path to output file.
|
| 337 |
+
|
| 338 |
+
pitch : str
|
| 339 |
+
The pitch of the note.
|
| 340 |
+
|
| 341 |
+
velocity : str or float
|
| 342 |
+
The consonant velocity of the note.
|
| 343 |
+
|
| 344 |
+
flags : str
|
| 345 |
+
The flags of the note.
|
| 346 |
+
|
| 347 |
+
offset : str or float
|
| 348 |
+
The offset from the start for the render area of the sample.
|
| 349 |
+
|
| 350 |
+
length : str or int
|
| 351 |
+
The length of the stretched area in milliseconds.
|
| 352 |
+
|
| 353 |
+
consonant : str or float
|
| 354 |
+
The unstretched area of the render.
|
| 355 |
+
|
| 356 |
+
cutoff : str or float
|
| 357 |
+
The cutoff from the end or from the offset for the render area of the sample.
|
| 358 |
+
|
| 359 |
+
volume : str or float
|
| 360 |
+
The volume of the note in percentage.
|
| 361 |
+
|
| 362 |
+
modulation : str or float
|
| 363 |
+
The modulation of the note in percentage.
|
| 364 |
+
|
| 365 |
+
tempo : str
|
| 366 |
+
The tempo of the note.
|
| 367 |
+
|
| 368 |
+
pitch_string : str
|
| 369 |
+
The UTAU pitchbend parameter.
|
| 370 |
+
|
| 371 |
+
Methods
|
| 372 |
+
-------
|
| 373 |
+
render(self):
|
| 374 |
+
The rendering workflow. Immediately starts when class is initialized.
|
| 375 |
+
|
| 376 |
+
get_features(self):
|
| 377 |
+
Gets the WORLD features either from a cached file or generating it if it doesn't exist.
|
| 378 |
+
|
| 379 |
+
generate_features(self, features_path):
|
| 380 |
+
Generates WORLD features and saves it for later.
|
| 381 |
+
|
| 382 |
+
resample(self, features):
|
| 383 |
+
Renders a WAV file using the passed WORLD features.
|
| 384 |
+
"""
|
| 385 |
+
def __init__(self, in_file, out_file, pitch, velocity, flags='', offset=0, length=1000, consonant=0, cutoff=0, volume=100, modulation=0, tempo='!100', pitch_string='AA'):
|
| 386 |
+
"""Initializes the renderer and immediately starts it.
|
| 387 |
+
|
| 388 |
+
Parameters
|
| 389 |
+
---------
|
| 390 |
+
in_file : str
|
| 391 |
+
Path to input file.
|
| 392 |
+
|
| 393 |
+
out_file : str
|
| 394 |
+
Path to output file.
|
| 395 |
+
|
| 396 |
+
pitch : str
|
| 397 |
+
The pitch of the note.
|
| 398 |
+
|
| 399 |
+
velocity : str or float
|
| 400 |
+
The consonant velocity of the note.
|
| 401 |
+
|
| 402 |
+
flags : str
|
| 403 |
+
The flags of the note.
|
| 404 |
+
|
| 405 |
+
offset : str or float
|
| 406 |
+
The offset from the start for the render area of the sample.
|
| 407 |
+
|
| 408 |
+
length : str or int
|
| 409 |
+
The length of the stretched area in milliseconds.
|
| 410 |
+
|
| 411 |
+
consonant : str or float
|
| 412 |
+
The unstretched area of the render.
|
| 413 |
+
|
| 414 |
+
cutoff : str or float
|
| 415 |
+
The cutoff from the end or from the offset for the render area of the sample.
|
| 416 |
+
|
| 417 |
+
volume : str or float
|
| 418 |
+
The volume of the note in percentage.
|
| 419 |
+
|
| 420 |
+
modulation : str or float
|
| 421 |
+
The modulation of the note in percentage.
|
| 422 |
+
|
| 423 |
+
tempo : str
|
| 424 |
+
The tempo of the note.
|
| 425 |
+
|
| 426 |
+
pitch_string : str
|
| 427 |
+
The UTAU pitchbend parameter.
|
| 428 |
+
"""
|
| 429 |
+
self.in_file = Path(in_file)
|
| 430 |
+
self.out_file = out_file
|
| 431 |
+
self.pitch = note_to_midi(pitch)
|
| 432 |
+
self.velocity = float(velocity)
|
| 433 |
+
self.flags = {k : int(v) if v else None for k, v in flag_re.findall(flags.replace('/', ''))}
|
| 434 |
+
self.offset = float(offset)
|
| 435 |
+
self.length = int(length)
|
| 436 |
+
self.consonant = float(consonant)
|
| 437 |
+
self.cutoff = float(cutoff)
|
| 438 |
+
self.volume = float(volume)
|
| 439 |
+
self.modulation = float(modulation)
|
| 440 |
+
self.tempo = float(tempo[1:])
|
| 441 |
+
self.pitchbend = pitch_string_to_cents(pitch_string)
|
| 442 |
+
|
| 443 |
+
self.render()
|
| 444 |
+
|
| 445 |
+
def render(self):
|
| 446 |
+
"""The rendering workflow. Immediately starts when class is initialized.
|
| 447 |
+
|
| 448 |
+
Parameters
|
| 449 |
+
----------
|
| 450 |
+
None
|
| 451 |
+
"""
|
| 452 |
+
features = self.get_features()
|
| 453 |
+
self.resample(features)
|
| 454 |
+
|
| 455 |
+
def get_features(self):
|
| 456 |
+
"""Gets the WORLD features either from a cached file or generating it if it doesn't exist.
|
| 457 |
+
|
| 458 |
+
Parameters
|
| 459 |
+
----------
|
| 460 |
+
None
|
| 461 |
+
|
| 462 |
+
Returns
|
| 463 |
+
-------
|
| 464 |
+
features : dict
|
| 465 |
+
A dictionary of the F0, MGC, BAP, and average F0.
|
| 466 |
+
"""
|
| 467 |
+
# Setup cache path file
|
| 468 |
+
fname = self.in_file.name
|
| 469 |
+
features_path = self.in_file.with_suffix(cache_ext)
|
| 470 |
+
features = None
|
| 471 |
+
|
| 472 |
+
if 'G' in self.flags.keys():
|
| 473 |
+
logging.info('G flag exists. Forcing feature generation.')
|
| 474 |
+
features = self.generate_features(features_path)
|
| 475 |
+
elif os.path.exists(features_path):
|
| 476 |
+
# Load if it exists
|
| 477 |
+
logging.info(f'Reading {fname}{cache_ext}.')
|
| 478 |
+
features = np.load(features_path)
|
| 479 |
+
else:
|
| 480 |
+
# Generate if not
|
| 481 |
+
logging.info(f'{fname}{cache_ext} not found. Generating features.')
|
| 482 |
+
features = self.generate_features(features_path)
|
| 483 |
+
|
| 484 |
+
return features
|
| 485 |
+
|
| 486 |
+
def generate_features(self, features_path):
|
| 487 |
+
"""Generates WORLD features and saves it for later.
|
| 488 |
+
|
| 489 |
+
Parameters
|
| 490 |
+
----------
|
| 491 |
+
features_path : str or file
|
| 492 |
+
The path for caching the features.
|
| 493 |
+
|
| 494 |
+
Returns
|
| 495 |
+
-------
|
| 496 |
+
features : dict
|
| 497 |
+
A dictionary of the F0, MGC, BAP, and average F0.
|
| 498 |
+
"""
|
| 499 |
+
x = read_wav(self.in_file)
|
| 500 |
+
|
| 501 |
+
# Check if audio is long enough
|
| 502 |
+
min_samples = int(default_fs * 0.1) # 최소 100ms
|
| 503 |
+
if len(x) < min_samples:
|
| 504 |
+
logging.warning(f'Audio too short ({len(x)} samples < {min_samples}). Padding with zeros.')
|
| 505 |
+
# 패딩으로 최소 길이 보장
|
| 506 |
+
x = np.pad(x, (0, min_samples - len(x)), mode='constant', constant_values=0)
|
| 507 |
+
|
| 508 |
+
logging.info('Generating F0.')
|
| 509 |
+
f0, t = world.harvest(x, default_fs, f0_floor=f0_floor, f0_ceil=f0_ceil)
|
| 510 |
+
base_f0 = base_frq(f0)
|
| 511 |
+
|
| 512 |
+
logging.info('Generating spectral envelope.')
|
| 513 |
+
sp = world.cheaptrick(x, f0, t, default_fs)
|
| 514 |
+
mgc = world.code_spectral_envelope(sp, default_fs, 64)
|
| 515 |
+
|
| 516 |
+
logging.info('Generating aperiodicity.')
|
| 517 |
+
ap = world.d4c(x, f0, t, default_fs, threshold=0.25)
|
| 518 |
+
bap = world.code_aperiodicity(ap, default_fs)
|
| 519 |
+
|
| 520 |
+
logging.info('Saving features.')
|
| 521 |
+
|
| 522 |
+
features = {'base' : base_f0, 'f0' : f0, 'mgc' : mgc, 'bap' : bap}
|
| 523 |
+
np.savez_compressed(features_path, **features)
|
| 524 |
+
|
| 525 |
+
return features
|
| 526 |
+
|
| 527 |
+
def resample(self, features):
|
| 528 |
+
"""Renders a WAV file using the passed WORLD features.
|
| 529 |
+
|
| 530 |
+
Parameters
|
| 531 |
+
----------
|
| 532 |
+
features : dict
|
| 533 |
+
A dictionary of the F0, MGC, BAP, and average F0.
|
| 534 |
+
|
| 535 |
+
Returns
|
| 536 |
+
-------
|
| 537 |
+
None
|
| 538 |
+
"""
|
| 539 |
+
if self.out_file == 'nul':
|
| 540 |
+
logging.info('Null output file. Skipping...')
|
| 541 |
+
return
|
| 542 |
+
|
| 543 |
+
self.out_file = Path(self.out_file)
|
| 544 |
+
|
| 545 |
+
# Convert percentages to decimal
|
| 546 |
+
vel = np.exp2(1 - self.velocity / 100) # convel is more a multiplier...
|
| 547 |
+
vol = self.volume / 100
|
| 548 |
+
mod = self.modulation / 100
|
| 549 |
+
|
| 550 |
+
logging.info('Decoding WORLD features.')
|
| 551 |
+
# Recalculate spectral envelope and aperiodicity
|
| 552 |
+
sp = world.decode_spectral_envelope(features['mgc'], default_fs, fft_size)
|
| 553 |
+
ap = world.decode_aperiodicity(features['bap'], default_fs, fft_size)
|
| 554 |
+
|
| 555 |
+
# Turn F0 to offset map for modulation
|
| 556 |
+
base_f0 = features['base']
|
| 557 |
+
f0 = features['f0']
|
| 558 |
+
f0[f0 == 0] = base_f0
|
| 559 |
+
f0_off = f0 - base_f0
|
| 560 |
+
|
| 561 |
+
# Calculate temporal positions
|
| 562 |
+
t_area = np.arange(len(f0)) * 0.005
|
| 563 |
+
|
| 564 |
+
logging.info('Calculating timing.') # use seconds instead of 5ms terms cuz someone gave me negative offsets </3
|
| 565 |
+
start = self.offset / 1000 # start time
|
| 566 |
+
end = self.cutoff / 1000 # end time
|
| 567 |
+
if self.cutoff < 0: # deal with relative end time
|
| 568 |
+
end = start - end
|
| 569 |
+
else:
|
| 570 |
+
end = t_area[-1] - end
|
| 571 |
+
con = start + self.consonant / 1000 # consonant
|
| 572 |
+
|
| 573 |
+
logging.info('Preparing interpolators.')
|
| 574 |
+
# Check if we have enough data points for interpolation
|
| 575 |
+
if len(t_area) < 2 or len(f0_off) < 2:
|
| 576 |
+
logging.error(f'Insufficient data for interpolation: t_area={len(t_area)}, f0_off={len(f0_off)}')
|
| 577 |
+
# Create a minimal valid signal
|
| 578 |
+
if len(t_area) < 2:
|
| 579 |
+
t_area = np.array([0.0, 0.01]) # 10ms minimum
|
| 580 |
+
if len(f0_off) < 2:
|
| 581 |
+
f0_off = np.array([0.0, 0.0])
|
| 582 |
+
if len(sp) < 2:
|
| 583 |
+
sp = np.repeat(sp[:1], 2, axis=0) if len(sp) == 1 else np.zeros((2, fft_size//2+1))
|
| 584 |
+
if len(ap) < 2:
|
| 585 |
+
ap = np.repeat(ap[:1], 2, axis=0) if len(ap) == 1 else np.zeros((2, fft_size//2+1))
|
| 586 |
+
|
| 587 |
+
# Make interpolators to render new areas
|
| 588 |
+
f0_off_interp = interp.UnivariateSpline(t_area, f0_off, s=0, ext='const')
|
| 589 |
+
sp_interp = interp.Akima1DInterpolator(t_area, sp)
|
| 590 |
+
ap_interp = interp.Akima1DInterpolator(t_area, ap)
|
| 591 |
+
|
| 592 |
+
# Make new temporal positions array for stretching
|
| 593 |
+
t_consonant = np.linspace(start, con, num=int(vel * self.consonant / 5), endpoint=False) # temporal positions of the unstretched area. can be stretched because of velocity
|
| 594 |
+
# stretched area only needs to stretch if the length required is longer than the stretch area
|
| 595 |
+
length_req = self.length / 1000
|
| 596 |
+
stretch_length = end - con
|
| 597 |
+
if stretch_length > length_req:
|
| 598 |
+
con_idx = int(200 * con) # position of consonant in the temporal positions array ??
|
| 599 |
+
len_idx = int(200 * length_req) # length of length required by 5ms frames
|
| 600 |
+
t_stretch = t_area[con_idx:con_idx+len_idx]
|
| 601 |
+
else:
|
| 602 |
+
t_stretch = np.linspace(con, end, num=int(200 * length_req))
|
| 603 |
+
|
| 604 |
+
t_render = clip(np.concatenate([t_consonant, t_stretch]), 0, t_area[-1]) # concatenate and clip for interpolation
|
| 605 |
+
con = len(t_consonant) # new placement of the consonant, now in 5ms frame terms...
|
| 606 |
+
|
| 607 |
+
logging.info('Interpolating WORLD features.')
|
| 608 |
+
# Interpolate render area
|
| 609 |
+
f0_off_render = f0_off_interp(t_render)
|
| 610 |
+
sp_render = sp_interp(t_render)
|
| 611 |
+
ap_render = clip(ap_interp(t_render), 0, 1) # aperiodicity freaks out if not within [0, 1] range
|
| 612 |
+
|
| 613 |
+
# Calculate new temporal positions for tuning
|
| 614 |
+
t = np.arange(len(sp_render)) * 0.005
|
| 615 |
+
|
| 616 |
+
logging.info('Calculating pitch.')
|
| 617 |
+
# Calculate pitch in MIDI note number terms
|
| 618 |
+
pitch = self.pitchbend / 100 + self.pitch
|
| 619 |
+
t_pitch = 60 * np.arange(len(pitch)) / (self.tempo * 96)
|
| 620 |
+
|
| 621 |
+
# Check if we have enough pitch data points
|
| 622 |
+
if len(pitch) < 2 or len(t_pitch) < 2:
|
| 623 |
+
logging.warning(f'Insufficient pitch data: len(pitch)={len(pitch)}, len(t_pitch)={len(t_pitch)}')
|
| 624 |
+
# Create minimal pitch data
|
| 625 |
+
if len(pitch) < 2:
|
| 626 |
+
pitch = np.array([self.pitch, self.pitch])
|
| 627 |
+
if len(t_pitch) < 2:
|
| 628 |
+
t_pitch = np.array([0.0, 0.01])
|
| 629 |
+
|
| 630 |
+
pitch_interp = interp.Akima1DInterpolator(t_pitch, pitch)
|
| 631 |
+
pitch_render = pitch_interp(clip(t, 0, t_pitch[-1]))
|
| 632 |
+
|
| 633 |
+
logging.info('Checking flags.')
|
| 634 |
+
# Flag interpretation area
|
| 635 |
+
### BEFORE HZ CONVERSION FLAGS ###
|
| 636 |
+
# Pitch offset flag
|
| 637 |
+
if 't' in self.flags.keys():
|
| 638 |
+
pitch_render += self.flags['t'] / 100
|
| 639 |
+
|
| 640 |
+
# Convert pitch to Hertz and add F0 offset for modulation
|
| 641 |
+
f0_render = midi_to_hz(pitch_render) + f0_off_render * mod
|
| 642 |
+
|
| 643 |
+
### BEFORE RENDER FLAGS ###
|
| 644 |
+
# Vocal Fry flag
|
| 645 |
+
if 'fe' in self.flags.keys():
|
| 646 |
+
logging.info('Adding vocal fry.')
|
| 647 |
+
fry = self.flags['fe'] / 1000
|
| 648 |
+
fry_len = 0.075
|
| 649 |
+
fry_offset = 0
|
| 650 |
+
fry_pitch = f0_floor
|
| 651 |
+
if 'fl' in self.flags.keys(): # check length flag
|
| 652 |
+
fry_len = max(self.flags['fl'] / 1000, 0.001)
|
| 653 |
+
|
| 654 |
+
if 'fo' in self.flags.keys():
|
| 655 |
+
fry_offset = self.flags['fo'] / 1000
|
| 656 |
+
|
| 657 |
+
if 'fp' in self.flags.keys():
|
| 658 |
+
fry_pitch = max(self.flags['fp'], 0)
|
| 659 |
+
|
| 660 |
+
# Prepare envelope
|
| 661 |
+
t_fry = t - t[con] - fry_offset # temporal positions centered around the consonant shifted by offset
|
| 662 |
+
amt = smoothstep(-fry - fry_len / 2, -fry + fry_len / 2, t_fry) * smoothstep(fry_len / 2, -fry_len / 2, t_fry) #fry envelope
|
| 663 |
+
|
| 664 |
+
f0_render = f0_render * (1 - amt) + fry_pitch * amt # mix low F0 for fry
|
| 665 |
+
|
| 666 |
+
# Gender/Formant shift flag
|
| 667 |
+
if 'g' in self.flags.keys():
|
| 668 |
+
logging.info('Shifting formants.')
|
| 669 |
+
gender = np.exp2(self.flags['g'] / 120)
|
| 670 |
+
|
| 671 |
+
freq_x = np.linspace(0, 1, fft_size // 2 + 1) # map spectral envelope by frequency instead of time
|
| 672 |
+
sp_render_interp = interp.Akima1DInterpolator(freq_x, sp_render, axis=1)
|
| 673 |
+
|
| 674 |
+
# stretch spectral envelope depending on gender
|
| 675 |
+
freq_x = clip(np.linspace(0, gender, fft_size // 2 + 1), 0, 1) # clip axis because Akima1DInterpolator doesn't extrapolate (or even just extend)
|
| 676 |
+
sp_render = sp_render_interp(freq_x).copy(order='C')
|
| 677 |
+
|
| 678 |
+
# map unvoicedness (kinda like voisona huskiness)
|
| 679 |
+
husk = np.mean(ap_render, axis=1)
|
| 680 |
+
|
| 681 |
+
# Breathiness flag
|
| 682 |
+
if 'B' in self.flags.keys():
|
| 683 |
+
breath = self.flags['B']
|
| 684 |
+
if breath <= 50: # Raise power to flatten smaller areas and keep max aperiodicity
|
| 685 |
+
logging.info('Lowering breathiness.')
|
| 686 |
+
breath = breath / 100
|
| 687 |
+
ap_render = bias(ap_render, breath)
|
| 688 |
+
ap_render[np.isclose(husk, 1),:] = 1 # make sure unvoiced areas stay unvoiced... only happens if breathiness is 0 but too much if statements
|
| 689 |
+
else:
|
| 690 |
+
breath = 0
|
| 691 |
+
|
| 692 |
+
# Distortion flag
|
| 693 |
+
if 'D' in self.flags.keys():
|
| 694 |
+
logging.info('Adding distortion.')
|
| 695 |
+
distortion_amount = clip(self.flags['D'], 0, 100)
|
| 696 |
+
ap_render = ap_render * (distortion_amount / 10)
|
| 697 |
+
f0_render = f0_render + np.random.normal(0, distortion_amount, len(f0_render))
|
| 698 |
+
|
| 699 |
+
# Coarsness flag
|
| 700 |
+
if 'C' in self.flags.keys():
|
| 701 |
+
logging.info('Adding coarseness.')
|
| 702 |
+
coarseness = clip(self.flags['C'], 0, 100)
|
| 703 |
+
for i in range(len(f0_render)):
|
| 704 |
+
if i % 6 == 0:
|
| 705 |
+
f0_render[i] = 60
|
| 706 |
+
|
| 707 |
+
#Peak compressor flag
|
| 708 |
+
flag_peak = self.flags.get('P', 86)
|
| 709 |
+
peak = 1 - flag_peak / 100
|
| 710 |
+
|
| 711 |
+
if flag_peak > 0:
|
| 712 |
+
rms = np.sqrt(2 * np.sum(sp_render, axis=1) / fft_size ** 2 + 0.000001) # get RMS.. i'm not sure if this is right but i think it's fine
|
| 713 |
+
rms_peak = np.max(rms)
|
| 714 |
+
rms_norm = rms / (peak * rms_peak)
|
| 715 |
+
|
| 716 |
+
comp = np.zeros(rms_norm.shape)
|
| 717 |
+
comp[rms_norm >= 1] = rms_norm[rms_norm >= 1] - 1
|
| 718 |
+
comp = (1 - peak) * comp / np.max(comp)
|
| 719 |
+
comp = 1 - comp
|
| 720 |
+
|
| 721 |
+
comp = ndimage.gaussian_filter1d(comp, 6)
|
| 722 |
+
|
| 723 |
+
comp = np.vstack([comp] * sp_render.shape[1]).transpose()
|
| 724 |
+
sp_render *= comp
|
| 725 |
+
ap_render *= comp
|
| 726 |
+
|
| 727 |
+
# remove pitch in areas with max aperiodicity
|
| 728 |
+
f0_render[np.isclose(husk, 1)] = 0
|
| 729 |
+
render = world.synthesize(f0_render, sp_render, ap_render, default_fs)
|
| 730 |
+
|
| 731 |
+
### AFTER RENDER FLAGS ###
|
| 732 |
+
# Max aperiodicity flag
|
| 733 |
+
if 'S' in self.flags.keys():
|
| 734 |
+
amt = clip(self.flags['S'] / 100, 0, 1)
|
| 735 |
+
render_ap = world.synthesize(f0_render, sp_render, np.ones(ap_render.shape), default_fs)
|
| 736 |
+
render = render * (1 - amt) + render_ap * amt
|
| 737 |
+
|
| 738 |
+
if breath > 50: # mix max breathiness signal
|
| 739 |
+
logging.info('Raising breathiness.')
|
| 740 |
+
breath = clip((breath - 50) / 50, 0, 1)
|
| 741 |
+
render_breath = world.synthesize(f0_render, sp_render * np.square(ap_render), np.ones(ap_render.shape), default_fs) # apply band AP on regular specgram, max out ap
|
| 742 |
+
|
| 743 |
+
render = render * (1 - breath) + render_breath * breath # Mix signals
|
| 744 |
+
|
| 745 |
+
t_sample = np.arange(len(render)) / default_fs # temporal position per sample
|
| 746 |
+
if 'fe' in self.flags.keys():
|
| 747 |
+
fry = self.flags['fe'] / 1000
|
| 748 |
+
fry_len = 0.05
|
| 749 |
+
fry_offset = 0
|
| 750 |
+
fry_vol = 0.1
|
| 751 |
+
if 'fl' in self.flags.keys(): # check length flag
|
| 752 |
+
fry_len = max(self.flags['fl'] / 1000, 0.001)
|
| 753 |
+
|
| 754 |
+
if 'fo' in self.flags.keys():
|
| 755 |
+
fry_offset = self.flags['fo'] / 1000
|
| 756 |
+
|
| 757 |
+
if 'fv' in self.flags.keys():
|
| 758 |
+
fry_vol = clip(self.flags['fv'] / 100, 0, 1)
|
| 759 |
+
|
| 760 |
+
# Prepare envelope
|
| 761 |
+
t_fry = t_sample - t[con] - fry_offset # temporal positions centered around the consonant shifted by offset
|
| 762 |
+
amt = smoothstep(-fry - fry_len / 2, -fry + fry_len / 2, t_fry) * smoothstep(fry_len / 2, -fry_len / 2, t_fry) #fry envelope
|
| 763 |
+
env = 1 - amt + fry_vol * amt
|
| 764 |
+
|
| 765 |
+
render_hp = highpass(render, cutoff=300) # add a highpass through the fry area
|
| 766 |
+
render = render * (1 - amt) + render_hp * amt
|
| 767 |
+
render *= env
|
| 768 |
+
|
| 769 |
+
# Fix voicing flag
|
| 770 |
+
if 've' in self.flags.keys():
|
| 771 |
+
logging.info('Fixing voicing.')
|
| 772 |
+
end_breath = self.flags['ve'] / 1000
|
| 773 |
+
render_breath = world.synthesize(f0_render, sp_render * np.square(ap_render), np.ones(ap_render.shape), default_fs) # apply band AP on regular specgram, max out ap
|
| 774 |
+
|
| 775 |
+
offset = 0
|
| 776 |
+
if 'vo' in self.flags.keys(): # check offset flag
|
| 777 |
+
offset = self.flags['vo'] / 1000
|
| 778 |
+
logging.info(offset)
|
| 779 |
+
|
| 780 |
+
amt = smoothstep(-end_breath / 2, end_breath / 2, t_sample - t[con] - offset) # smoothstep with consonant at 0.5
|
| 781 |
+
render = render * (1 - amt) + render_breath * amt # mix sample based on envelope
|
| 782 |
+
|
| 783 |
+
normalize = self.flags.get('p', 6)
|
| 784 |
+
|
| 785 |
+
if normalize >= 0:
|
| 786 |
+
normal = render / np.max(render)
|
| 787 |
+
render = normal * (10 ** (-normalize / 20))
|
| 788 |
+
|
| 789 |
+
### AFTER PEAK NORMALIZATION ###
|
| 790 |
+
# Tremolo flag
|
| 791 |
+
if 'A' in self.flags.keys():
|
| 792 |
+
logging.info('Adding tremolo.')
|
| 793 |
+
tremolo = self.flags['A'] / 100
|
| 794 |
+
|
| 795 |
+
pitch_sample = pitch_interp(clip(t_sample, 0, t_pitch[-1])) # probably bad because of how low the sampling rate is for the pitch
|
| 796 |
+
pitch_smooth = lowpass(pitch_sample, cutoff=8, order=16)
|
| 797 |
+
vibrato = highpass(pitch_smooth, cutoff=4, order=16)
|
| 798 |
+
|
| 799 |
+
amt = np.maximum(tremolo * vibrato + 1, 0)
|
| 800 |
+
render = render * amt
|
| 801 |
+
# Growl flag
|
| 802 |
+
if 'R' in self.flags.keys():
|
| 803 |
+
logging.info('Adding tremolo growl flag.')
|
| 804 |
+
depth = clip(self.flags['R'] / 100, 0, 1)
|
| 805 |
+
|
| 806 |
+
rate = 75
|
| 807 |
+
|
| 808 |
+
time = np.arange(len(render)) / default_fs
|
| 809 |
+
sine_wave = np.sin(2 * np.pi * rate * time)
|
| 810 |
+
|
| 811 |
+
render = render * (2 - depth * sine_wave) / 2
|
| 812 |
+
|
| 813 |
+
render *= vol # volume
|
| 814 |
+
save_wav(self.out_file, render)
|
| 815 |
+
|
| 816 |
+
if __name__ == '__main__':
|
| 817 |
+
logging.info(f'straycat {version}')
|
| 818 |
+
try:
|
| 819 |
+
Resampler(*sys.argv[1:])
|
| 820 |
+
except Exception as e:
|
| 821 |
+
name = e.__class__.__name__
|
| 822 |
+
if name == 'TypeError':
|
| 823 |
+
logging.info(help_string)
|
| 824 |
+
else:
|
| 825 |
+
raise e
|
test_compressed_voicebank.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
압축된 보이스뱅크 테스트 스크립트
|
| 4 |
+
압축 전후의 성능과 정확성을 비교합니다.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import time
|
| 8 |
+
import numpy as np
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from utau_engine import UTAUEngine
|
| 11 |
+
from compressed_utau_engine import CompressedUTAUEngine
|
| 12 |
+
import logging
|
| 13 |
+
|
| 14 |
+
logging.basicConfig(level=logging.INFO)
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
def test_voicebank_comparison():
|
| 18 |
+
"""원본과 압축된 보이스뱅크 비교 테스트"""
|
| 19 |
+
|
| 20 |
+
# 경로 설정
|
| 21 |
+
original_path = "voice/hanseol CVC"
|
| 22 |
+
compressed_path = "voice/hanseol_CVC_compressed.h5"
|
| 23 |
+
|
| 24 |
+
print("🔍 보이스뱅크 비교 테스트 시작")
|
| 25 |
+
print("=" * 50)
|
| 26 |
+
|
| 27 |
+
# 압축된 보이스뱅크가 없으면 종료
|
| 28 |
+
if not Path(compressed_path).exists():
|
| 29 |
+
print("❌ 압축된 보이스뱅크를 찾을 수 없습니다.")
|
| 30 |
+
print("먼저 voice_data_converter.py를 실행하세요.")
|
| 31 |
+
return
|
| 32 |
+
|
| 33 |
+
# 1. 로딩 시간 비교
|
| 34 |
+
print("\n📊 1. 로딩 시간 비교")
|
| 35 |
+
|
| 36 |
+
# 원본 로딩
|
| 37 |
+
if Path(original_path).exists():
|
| 38 |
+
start_time = time.time()
|
| 39 |
+
try:
|
| 40 |
+
original_engine = UTAUEngine(original_path)
|
| 41 |
+
original_load_time = time.time() - start_time
|
| 42 |
+
print(f" 원본 보이스뱅크 로딩: {original_load_time:.2f}초")
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f" 원본 로딩 실패: {e}")
|
| 45 |
+
original_engine = None
|
| 46 |
+
original_load_time = float('inf')
|
| 47 |
+
else:
|
| 48 |
+
print(" 원본 보이스뱅크 없음")
|
| 49 |
+
original_engine = None
|
| 50 |
+
original_load_time = float('inf')
|
| 51 |
+
|
| 52 |
+
# 압축된 버전 로딩
|
| 53 |
+
start_time = time.time()
|
| 54 |
+
try:
|
| 55 |
+
compressed_engine = CompressedUTAUEngine(compressed_path)
|
| 56 |
+
compressed_load_time = time.time() - start_time
|
| 57 |
+
print(f" 압축된 보이스뱅크 로딩: {compressed_load_time:.2f}초")
|
| 58 |
+
|
| 59 |
+
if original_load_time != float('inf'):
|
| 60 |
+
speedup = original_load_time / compressed_load_time
|
| 61 |
+
print(f" 로딩 속도 개선: {speedup:.1f}배")
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f" 압축된 버전 로딩 실패: {e}")
|
| 65 |
+
return
|
| 66 |
+
|
| 67 |
+
# 2. 메모리 사용량 및 압축 정보
|
| 68 |
+
print("\n📊 2. 압축 정보")
|
| 69 |
+
compression_info = compressed_engine.get_compression_info()
|
| 70 |
+
print(f" 원본 크기: {compression_info['original_size_mb']:.1f} MB")
|
| 71 |
+
print(f" 압축 크기: {compression_info['compressed_size_mb']:.1f} MB")
|
| 72 |
+
print(f" 압축율: {compression_info['compression_ratio']:.1f}%")
|
| 73 |
+
|
| 74 |
+
# 3. 음소 수 비교
|
| 75 |
+
print("\n📊 3. 음소 정보")
|
| 76 |
+
compressed_phonemes = compressed_engine.get_available_phonemes()
|
| 77 |
+
print(f" 압축된 버전 음소 수: {len(compressed_phonemes)}개")
|
| 78 |
+
|
| 79 |
+
if original_engine:
|
| 80 |
+
original_phonemes = original_engine.get_available_phonemes()
|
| 81 |
+
print(f" 원본 음소 수: {len(original_phonemes)}개")
|
| 82 |
+
|
| 83 |
+
# 음소 일치도 확인
|
| 84 |
+
original_set = set(original_phonemes)
|
| 85 |
+
compressed_set = set(compressed_phonemes)
|
| 86 |
+
match_rate = len(original_set & compressed_set) / len(original_set) * 100
|
| 87 |
+
print(f" 음소 일치도: {match_rate:.1f}%")
|
| 88 |
+
|
| 89 |
+
# 4. 합성 테스트
|
| 90 |
+
print("\n📊 4. 합성 성능 테스트")
|
| 91 |
+
|
| 92 |
+
# 테스트용 노트 시퀀스
|
| 93 |
+
test_notes = [
|
| 94 |
+
{
|
| 95 |
+
"pitch": 60, # C4
|
| 96 |
+
"startSeconds": 0.0,
|
| 97 |
+
"durationSeconds": 0.5,
|
| 98 |
+
"endSeconds": 0.5,
|
| 99 |
+
"velocity": 100
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"pitch": 64, # E4
|
| 103 |
+
"startSeconds": 0.5,
|
| 104 |
+
"durationSeconds": 0.5,
|
| 105 |
+
"endSeconds": 1.0,
|
| 106 |
+
"velocity": 100
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"pitch": 67, # G4
|
| 110 |
+
"startSeconds": 1.0,
|
| 111 |
+
"durationSeconds": 0.5,
|
| 112 |
+
"endSeconds": 1.5,
|
| 113 |
+
"velocity": 100
|
| 114 |
+
}
|
| 115 |
+
]
|
| 116 |
+
|
| 117 |
+
test_lyrics = ["도", "미", "솔"]
|
| 118 |
+
|
| 119 |
+
# 압축된 버전 합성 테스트
|
| 120 |
+
start_time = time.time()
|
| 121 |
+
compressed_result, compressed_status = compressed_engine.synthesize_sequence(
|
| 122 |
+
test_notes, test_lyrics, tempo=120, volume=100
|
| 123 |
+
)
|
| 124 |
+
compressed_synth_time = time.time() - start_time
|
| 125 |
+
|
| 126 |
+
print(f" 압축된 버전 합성 시간: {compressed_synth_time:.2f}초")
|
| 127 |
+
print(f" 압축된 버전 상태: {compressed_status}")
|
| 128 |
+
|
| 129 |
+
# 원본 합성 테스트 (있을 경우)
|
| 130 |
+
if original_engine:
|
| 131 |
+
start_time = time.time()
|
| 132 |
+
original_result, original_status = original_engine.synthesize_sequence(
|
| 133 |
+
test_notes, test_lyrics, tempo=120, volume=100
|
| 134 |
+
)
|
| 135 |
+
original_synth_time = time.time() - start_time
|
| 136 |
+
|
| 137 |
+
print(f" 원본 합성 시간: {original_synth_time:.2f}초")
|
| 138 |
+
print(f" 원본 상태: {original_status}")
|
| 139 |
+
|
| 140 |
+
if original_synth_time > 0:
|
| 141 |
+
speedup = original_synth_time / compressed_synth_time
|
| 142 |
+
print(f" 합성 속도 개선: {speedup:.1f}배")
|
| 143 |
+
|
| 144 |
+
# 5. 권장사항
|
| 145 |
+
print("\n💡 5. 권장사항")
|
| 146 |
+
print(" ✅ HDF5 압축 방식의 장점:")
|
| 147 |
+
print(" - 단일 파일로 관리 용이")
|
| 148 |
+
print(" - 높은 압축율로 저장공간 절약")
|
| 149 |
+
print(" - 빠른 랜덤 액세스")
|
| 150 |
+
print(" - Hugging Face Spaces 최적화")
|
| 151 |
+
print(" ✅ Gradio/HF Spaces 배포 시:")
|
| 152 |
+
print(" - 압축된 .h5 파일만 업로드")
|
| 153 |
+
print(" - 원본 WAV 파일들은 제외")
|
| 154 |
+
print(" - 빠른 앱 시작 시간")
|
| 155 |
+
print(" - 낮은 스토리지 비용")
|
| 156 |
+
|
| 157 |
+
if __name__ == "__main__":
|
| 158 |
+
test_voicebank_comparison()
|
utau_engine.py
ADDED
|
@@ -0,0 +1,467 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import logging
|
| 4 |
+
import tempfile
|
| 5 |
+
import numpy as np
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
from straycat import Resampler
|
| 11 |
+
|
| 12 |
+
# 로깅 설정
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
@dataclass
|
| 17 |
+
class OtoEntry:
|
| 18 |
+
"""UTAU oto.ini 엔트리 클래스"""
|
| 19 |
+
filename: str # WAV 파일명
|
| 20 |
+
alias: str # 별명 (발음)
|
| 21 |
+
offset: float # 오프셋 (ms)
|
| 22 |
+
consonant: float # 자음 길이 (ms)
|
| 23 |
+
cutoff: float # 컷오프 (ms)
|
| 24 |
+
preutterance: float # 프리유터런스 (ms)
|
| 25 |
+
overlap: float # 오버랩 (ms)
|
| 26 |
+
|
| 27 |
+
@property
|
| 28 |
+
def is_breath(self) -> bool:
|
| 29 |
+
"""숨소리/무음 구간인지 확인"""
|
| 30 |
+
return self.alias.startswith('-') or self.alias.startswith('*')
|
| 31 |
+
|
| 32 |
+
@property
|
| 33 |
+
def clean_alias(self) -> str:
|
| 34 |
+
"""접두사 제거된 순수 별명"""
|
| 35 |
+
alias = self.alias
|
| 36 |
+
if alias.startswith('- '):
|
| 37 |
+
return alias[2:]
|
| 38 |
+
elif alias.startswith('* '):
|
| 39 |
+
return alias[2:]
|
| 40 |
+
elif alias.startswith('-'):
|
| 41 |
+
return alias[1:]
|
| 42 |
+
elif alias.startswith('*'):
|
| 43 |
+
return alias[1:]
|
| 44 |
+
return alias
|
| 45 |
+
|
| 46 |
+
class VoicebankManager:
|
| 47 |
+
"""UTAU 보이스뱅크 관리 클래스"""
|
| 48 |
+
|
| 49 |
+
def __init__(self, voicebank_path: Union[str, Path]):
|
| 50 |
+
self.voicebank_path = Path(voicebank_path)
|
| 51 |
+
self.oto_entries: Dict[str, OtoEntry] = {}
|
| 52 |
+
self.wav_files: Dict[str, Path] = {}
|
| 53 |
+
self.load_voicebank()
|
| 54 |
+
|
| 55 |
+
def load_voicebank(self):
|
| 56 |
+
"""보이스뱅크 로드"""
|
| 57 |
+
if not self.voicebank_path.exists():
|
| 58 |
+
raise FileNotFoundError(f"보이스뱅크 경로를 찾을 수 없습니다: {self.voicebank_path}")
|
| 59 |
+
|
| 60 |
+
# oto.ini 파일 찾기
|
| 61 |
+
oto_file = self.voicebank_path / "oto.ini"
|
| 62 |
+
if not oto_file.exists():
|
| 63 |
+
raise FileNotFoundError(f"oto.ini 파일을 찾을 수 없습니다: {oto_file}")
|
| 64 |
+
|
| 65 |
+
# WAV 파일들 인덱싱
|
| 66 |
+
self._index_wav_files()
|
| 67 |
+
|
| 68 |
+
# oto.ini 파싱
|
| 69 |
+
self._parse_oto_ini(oto_file)
|
| 70 |
+
|
| 71 |
+
logger.info(f"보이스뱅크 로드 완료: {len(self.oto_entries)}개 엔트리, {len(self.wav_files)}개 WAV 파일")
|
| 72 |
+
|
| 73 |
+
def _index_wav_files(self):
|
| 74 |
+
"""WAV 파일들 인덱싱"""
|
| 75 |
+
for wav_file in self.voicebank_path.glob("*.wav"):
|
| 76 |
+
self.wav_files[wav_file.name] = wav_file
|
| 77 |
+
|
| 78 |
+
# 하위 폴더도 검색
|
| 79 |
+
for subfolder in self.voicebank_path.iterdir():
|
| 80 |
+
if subfolder.is_dir():
|
| 81 |
+
for wav_file in subfolder.glob("*.wav"):
|
| 82 |
+
self.wav_files[wav_file.name] = wav_file
|
| 83 |
+
|
| 84 |
+
def _parse_oto_ini(self, oto_file: Path):
|
| 85 |
+
"""oto.ini 파일 파싱"""
|
| 86 |
+
try:
|
| 87 |
+
# 다양한 인코딩으로 시도
|
| 88 |
+
encodings = ['shift_jis', 'utf-8', 'cp932', 'euc-jp']
|
| 89 |
+
content = None
|
| 90 |
+
|
| 91 |
+
for encoding in encodings:
|
| 92 |
+
try:
|
| 93 |
+
with open(oto_file, 'r', encoding=encoding) as f:
|
| 94 |
+
content = f.read()
|
| 95 |
+
logger.info(f"oto.ini를 {encoding} 인코딩으로 읽었습니다.")
|
| 96 |
+
break
|
| 97 |
+
except UnicodeDecodeError:
|
| 98 |
+
continue
|
| 99 |
+
|
| 100 |
+
if content is None:
|
| 101 |
+
raise Exception("oto.ini 파일을 읽을 수 없습니다. 인코딩 문제가 있을 수 있습니다.")
|
| 102 |
+
|
| 103 |
+
# 각 라인 파싱
|
| 104 |
+
for line_num, line in enumerate(content.strip().split('\n'), 1):
|
| 105 |
+
line = line.strip()
|
| 106 |
+
if not line or line.startswith('#'):
|
| 107 |
+
continue
|
| 108 |
+
|
| 109 |
+
try:
|
| 110 |
+
self._parse_oto_line(line)
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.warning(f"oto.ini {line_num}번째 줄 파싱 실패: {e}")
|
| 113 |
+
continue
|
| 114 |
+
|
| 115 |
+
except Exception as e:
|
| 116 |
+
logger.error(f"oto.ini 파싱 실패: {e}")
|
| 117 |
+
raise
|
| 118 |
+
|
| 119 |
+
def _parse_oto_line(self, line: str):
|
| 120 |
+
"""oto.ini 한 줄 파싱"""
|
| 121 |
+
# 형식: filename=alias,offset,consonant,cutoff,preutterance,overlap
|
| 122 |
+
if '=' not in line:
|
| 123 |
+
return
|
| 124 |
+
|
| 125 |
+
filename, params = line.split('=', 1)
|
| 126 |
+
parts = params.split(',')
|
| 127 |
+
|
| 128 |
+
if len(parts) != 6:
|
| 129 |
+
logger.warning(f"잘못된 oto.ini 형식: {line}")
|
| 130 |
+
return
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
alias = parts[0]
|
| 134 |
+
offset = float(parts[1])
|
| 135 |
+
consonant = float(parts[2])
|
| 136 |
+
cutoff = float(parts[3])
|
| 137 |
+
preutterance = float(parts[4])
|
| 138 |
+
overlap = float(parts[5])
|
| 139 |
+
|
| 140 |
+
entry = OtoEntry(
|
| 141 |
+
filename=filename,
|
| 142 |
+
alias=alias,
|
| 143 |
+
offset=offset,
|
| 144 |
+
consonant=consonant,
|
| 145 |
+
cutoff=cutoff,
|
| 146 |
+
preutterance=preutterance,
|
| 147 |
+
overlap=overlap
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
self.oto_entries[alias] = entry
|
| 151 |
+
|
| 152 |
+
except ValueError as e:
|
| 153 |
+
logger.warning(f"oto.ini 파라미터 파싱 실패: {line} - {e}")
|
| 154 |
+
|
| 155 |
+
def get_sample_for_phoneme(self, phoneme: str) -> Optional[OtoEntry]:
|
| 156 |
+
"""음소에 해당하는 샘플 찾기"""
|
| 157 |
+
# 정확한 매치 먼저 시도
|
| 158 |
+
if phoneme in self.oto_entries:
|
| 159 |
+
return self.oto_entries[phoneme]
|
| 160 |
+
|
| 161 |
+
# 유사한 발음 찾기
|
| 162 |
+
candidates = []
|
| 163 |
+
for alias in self.oto_entries:
|
| 164 |
+
entry = self.oto_entries[alias]
|
| 165 |
+
if entry.clean_alias == phoneme:
|
| 166 |
+
candidates.append(entry)
|
| 167 |
+
|
| 168 |
+
if candidates:
|
| 169 |
+
# 숨소리가 아닌 것을 우선
|
| 170 |
+
non_breath = [c for c in candidates if not c.is_breath]
|
| 171 |
+
return non_breath[0] if non_breath else candidates[0]
|
| 172 |
+
|
| 173 |
+
return None
|
| 174 |
+
|
| 175 |
+
def get_wav_path(self, filename: str) -> Optional[Path]:
|
| 176 |
+
"""WAV 파일 경로 가져오기"""
|
| 177 |
+
return self.wav_files.get(filename)
|
| 178 |
+
|
| 179 |
+
def list_available_phonemes(self) -> List[str]:
|
| 180 |
+
"""사용 가능한 음소 목록"""
|
| 181 |
+
return list(set(entry.clean_alias for entry in self.oto_entries.values()))
|
| 182 |
+
|
| 183 |
+
class UTAUEngine:
|
| 184 |
+
"""UTAU 호환 음성 합성 엔진"""
|
| 185 |
+
|
| 186 |
+
def __init__(self, voicebank_path: Union[str, Path]):
|
| 187 |
+
self.voicebank = VoicebankManager(voicebank_path)
|
| 188 |
+
self.default_phoneme = "あ" # 기본 음소
|
| 189 |
+
|
| 190 |
+
def synthesize_sequence(self,
|
| 191 |
+
notes: List[Dict],
|
| 192 |
+
lyrics: List[str],
|
| 193 |
+
tempo: int = 120,
|
| 194 |
+
volume: int = 100) -> Tuple[Optional[str], str]:
|
| 195 |
+
"""노트 시퀀스와 가사로 음성 합성"""
|
| 196 |
+
|
| 197 |
+
if len(notes) != len(lyrics):
|
| 198 |
+
return None, "노트와 가사의 개수가 일치하지 않습니다."
|
| 199 |
+
|
| 200 |
+
if not notes:
|
| 201 |
+
return None, "노트가 없습니다."
|
| 202 |
+
|
| 203 |
+
try:
|
| 204 |
+
# 전체 길이 계산 - 초 단위로 계산
|
| 205 |
+
max_end_time_seconds = max(note.get('endSeconds', note.get('startSeconds', 0) + note.get('durationSeconds', 0.5)) for note in notes)
|
| 206 |
+
max_end_time = max_end_time_seconds * 1000 # 밀리초로 변환
|
| 207 |
+
sample_rate = 44100
|
| 208 |
+
total_samples = int(max_end_time * sample_rate / 1000) + sample_rate
|
| 209 |
+
final_audio = np.zeros(total_samples)
|
| 210 |
+
|
| 211 |
+
synthesized_count = 0
|
| 212 |
+
|
| 213 |
+
for i, (note, lyric) in enumerate(zip(notes, lyrics)):
|
| 214 |
+
try:
|
| 215 |
+
# 음소로 변환 (간단한 일본어 음소 매핑)
|
| 216 |
+
phoneme = self._lyric_to_phoneme(lyric)
|
| 217 |
+
|
| 218 |
+
# 보이스뱅크에서 샘플 찾기
|
| 219 |
+
oto_entry = self.voicebank.get_sample_for_phoneme(phoneme)
|
| 220 |
+
if not oto_entry:
|
| 221 |
+
logger.warning(f"음소 '{phoneme}' (가사: '{lyric}')에 해당하는 샘플을 찾을 수 없습니다.")
|
| 222 |
+
continue
|
| 223 |
+
|
| 224 |
+
# WAV 파일 경로
|
| 225 |
+
wav_path = self.voicebank.get_wav_path(oto_entry.filename)
|
| 226 |
+
if not wav_path or not wav_path.exists():
|
| 227 |
+
logger.warning(f"WAV 파일을 찾을 수 없습니다: {oto_entry.filename}")
|
| 228 |
+
continue
|
| 229 |
+
|
| 230 |
+
# 음성 합성
|
| 231 |
+
synth_audio = self._synthesize_note(note, oto_entry, wav_path, tempo, volume)
|
| 232 |
+
if synth_audio is not None:
|
| 233 |
+
# 오디오 믹싱
|
| 234 |
+
start_sample = int(note.get('startSeconds', 0) * sample_rate) # 초 단위를 샘플로 변환
|
| 235 |
+
end_sample = start_sample + len(synth_audio)
|
| 236 |
+
|
| 237 |
+
if end_sample <= len(final_audio):
|
| 238 |
+
final_audio[start_sample:end_sample] += synth_audio
|
| 239 |
+
else:
|
| 240 |
+
# 버퍼 확장
|
| 241 |
+
new_size = end_sample + sample_rate
|
| 242 |
+
new_final_audio = np.zeros(new_size)
|
| 243 |
+
new_final_audio[:len(final_audio)] = final_audio
|
| 244 |
+
new_final_audio[start_sample:end_sample] += synth_audio
|
| 245 |
+
final_audio = new_final_audio
|
| 246 |
+
|
| 247 |
+
synthesized_count += 1
|
| 248 |
+
logger.info(f"노트 {i+1} 합성 완료: {lyric} -> {phoneme}")
|
| 249 |
+
|
| 250 |
+
except Exception as e:
|
| 251 |
+
logger.error(f"노트 {i+1} 합성 실패: {e}")
|
| 252 |
+
continue
|
| 253 |
+
|
| 254 |
+
if synthesized_count == 0:
|
| 255 |
+
return None, "합성된 노트가 없습니다."
|
| 256 |
+
|
| 257 |
+
# 최종 오디오 정규화
|
| 258 |
+
if np.max(np.abs(final_audio)) > 0:
|
| 259 |
+
final_audio = final_audio / np.max(np.abs(final_audio)) * 0.8
|
| 260 |
+
|
| 261 |
+
# 파일 저장
|
| 262 |
+
output_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 263 |
+
sf.write(output_file.name, final_audio, sample_rate)
|
| 264 |
+
output_file.close()
|
| 265 |
+
|
| 266 |
+
duration_sec = len(final_audio) / sample_rate
|
| 267 |
+
return output_file.name, f"✅ UTAU 합성 완료: {synthesized_count}/{len(notes)}개 노트, {duration_sec:.1f}초"
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
error_msg = f"❌ UTAU 합성 중 오류: {str(e)}"
|
| 271 |
+
logger.error(error_msg)
|
| 272 |
+
return None, error_msg
|
| 273 |
+
|
| 274 |
+
def _lyric_to_phoneme(self, lyric: str) -> str:
|
| 275 |
+
"""가사를 음소로 변환 (한국어 + 일본어 매핑)"""
|
| 276 |
+
# 공백 제거
|
| 277 |
+
lyric = lyric.strip()
|
| 278 |
+
|
| 279 |
+
# 빈 가사면 기본값 반환
|
| 280 |
+
if not lyric:
|
| 281 |
+
return self.default_phoneme
|
| 282 |
+
|
| 283 |
+
# 한국어 음소 매핑 (hanseol CVC용)
|
| 284 |
+
korean_map = {
|
| 285 |
+
# 기본 모음
|
| 286 |
+
'아': 'a', '이': 'i', '우': 'u', '에': 'e', '오': 'o', '으': 'eu', '어': 'eo',
|
| 287 |
+
# 기본 자음+모음
|
| 288 |
+
'바': 'ba', '비': 'bi', '부': 'bu', '베': 'be', '보': 'bo', '브': 'beu', '버': 'beo',
|
| 289 |
+
'다': 'da', '디': 'di', '두': 'du', '데': 'de', '도': 'do', '드': 'deu', '더': 'deo',
|
| 290 |
+
'가': 'ga', '기': 'gi', '구': 'gu', '게': 'ge', '고': 'go', '그': 'geu', '거': 'geo',
|
| 291 |
+
'하': 'ha', '히': 'hi', '후': 'hu', '헤': 'he', '호': 'ho', '흐': 'heu', '허': 'heo',
|
| 292 |
+
'자': 'ja', '지': 'ji', '주': 'ju', '제': 'je', '조': 'jo', '즈': 'jeu', '저': 'jeo',
|
| 293 |
+
'카': 'ka', '키': 'ki', '쿠': 'ku', '케': 'ke', '코': 'ko', '크': 'keu', '커': 'keo',
|
| 294 |
+
'라': 'la', '리': 'li', '루': 'lu', '레': 'le', '로': 'lo', '르': 'leu', '러': 'leo',
|
| 295 |
+
'마': 'ma', '미': 'mi', '무': 'mu', '메': 'me', '모': 'mo', '므': 'meu', '머': 'meo',
|
| 296 |
+
'나': 'na', '니': 'ni', '누': 'nu', '네': 'ne', '노': 'no', '느': 'neu', '너': 'neo',
|
| 297 |
+
'파': 'pa', '피': 'pi', '푸': 'pu', '페': 'pe', '포': 'po', '프': 'peu', '퍼': 'peo',
|
| 298 |
+
'라': 'ra', '리': 'ri', '루': 'ru', '레': 're', '로': 'ro', '르': 'reu', '러': 'reo',
|
| 299 |
+
'사': 'sa', '시': 'si', '수': 'su', '세': 'se', '소': 'so', '스': 'seu', '서': 'seo',
|
| 300 |
+
'타': 'ta', '티': 'ti', '투': 'tu', '테': 'te', '토': 'to', '트': 'teu', '터': 'teo',
|
| 301 |
+
# 복합모음
|
| 302 |
+
'야': 'ya', '예': 'ye', '여': 'yeo', '요': 'yo', '유': 'yu', '의': 'eui',
|
| 303 |
+
'와': 'wa', '웨': 'we', '위': 'wi', '워': 'weo',
|
| 304 |
+
# 기타 한국어 도레미
|
| 305 |
+
'도': 'do', '레': 're', '미': 'mi', '파': 'fa', '솔': 'so', '라': 'la', '시': 'si'
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
# 한국어 매핑 시도
|
| 309 |
+
if lyric in korean_map:
|
| 310 |
+
return korean_map[lyric]
|
| 311 |
+
|
| 312 |
+
# 로마자 -> 히라가나 변환 (일본어)
|
| 313 |
+
romaji_map = {
|
| 314 |
+
'a': 'あ', 'i': 'い', 'u': 'う', 'e': 'え', 'o': 'お',
|
| 315 |
+
'ka': 'か', 'ki': 'き', 'ku': 'く', 'ke': 'け', 'ko': 'こ',
|
| 316 |
+
'sa': 'さ', 'shi': 'し', 'su': 'す', 'se': 'せ', 'so': 'そ',
|
| 317 |
+
'ta': 'た', 'chi': 'ち', 'tsu': 'つ', 'te': 'て', 'to': 'と',
|
| 318 |
+
'na': 'な', 'ni': 'に', 'nu': 'ぬ', 'ne': 'ね', 'no': 'の',
|
| 319 |
+
'ha': 'は', 'hi': 'ひ', 'fu': 'ふ', 'he': 'へ', 'ho': 'ほ',
|
| 320 |
+
'ma': 'ま', 'mi': 'み', 'mu': 'む', 'me': 'め', 'mo': 'も',
|
| 321 |
+
'ya': 'や', 'yu': 'ゆ', 'yo': 'よ',
|
| 322 |
+
'ra': 'ら', 'ri': 'り', 'ru': 'る', 're': 'れ', 'ro': 'ろ',
|
| 323 |
+
'wa': 'わ', 'wo': 'を', 'n': 'ん'
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
# 로마자 변환 시도
|
| 327 |
+
lyric_lower = lyric.lower()
|
| 328 |
+
if lyric_lower in romaji_map:
|
| 329 |
+
return romaji_map[lyric_lower]
|
| 330 |
+
|
| 331 |
+
# 이미 음소인 경우 (hanseol CVC 직접 입력)
|
| 332 |
+
available_phonemes = self.voicebank.list_available_phonemes()
|
| 333 |
+
if lyric in available_phonemes:
|
| 334 |
+
return lyric
|
| 335 |
+
|
| 336 |
+
# 기본값 반환
|
| 337 |
+
logger.warning(f"알 수 없는 가사: '{lyric}', 기본 음소 '{self.default_phoneme}' 사용")
|
| 338 |
+
return self.default_phoneme
|
| 339 |
+
|
| 340 |
+
def _synthesize_note(self,
|
| 341 |
+
note: Dict,
|
| 342 |
+
oto_entry: OtoEntry,
|
| 343 |
+
wav_path: Path,
|
| 344 |
+
tempo: int,
|
| 345 |
+
volume: int) -> Optional[np.ndarray]:
|
| 346 |
+
"""개별 노트 합성"""
|
| 347 |
+
try:
|
| 348 |
+
# MIDI 노트를 노트 이름으로 변환
|
| 349 |
+
note_name = self._midi_to_note_name(note['pitch'])
|
| 350 |
+
|
| 351 |
+
# 노트 길이 검증 및 조정
|
| 352 |
+
min_duration = 200 # 최소 200ms
|
| 353 |
+
duration = max(note.get('durationSeconds', 0.5) * 1000, min_duration) # 초를 밀리초로 변환
|
| 354 |
+
|
| 355 |
+
# 임시 출력 파일
|
| 356 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
| 357 |
+
temp_output = temp_file.name
|
| 358 |
+
|
| 359 |
+
try:
|
| 360 |
+
# UTAU 파라미터 검증 및 조정
|
| 361 |
+
offset = max(oto_entry.offset, 0) # 음수 오프셋 방지
|
| 362 |
+
consonant = max(oto_entry.consonant if oto_entry.consonant > 0 else 50, 10) # 최소 10ms
|
| 363 |
+
cutoff = max(oto_entry.cutoff if oto_entry.cutoff > 0 else 0, 0)
|
| 364 |
+
|
| 365 |
+
# WAV 파일 길이 확인
|
| 366 |
+
try:
|
| 367 |
+
info = sf.info(wav_path)
|
| 368 |
+
wav_duration_ms = (info.frames / info.samplerate) * 1000
|
| 369 |
+
|
| 370 |
+
# 오프셋이 WAV 파일보다 긴 경우 조정
|
| 371 |
+
if offset >= wav_duration_ms - 100: # 100ms 여유
|
| 372 |
+
offset = max(0, wav_duration_ms - 200)
|
| 373 |
+
logger.warning(f"오프셋이 너무 큽니다. {offset}ms로 조정했습니다.")
|
| 374 |
+
|
| 375 |
+
# 자음 길이가 너무 긴 경우 조정
|
| 376 |
+
max_consonant = min(duration / 2, wav_duration_ms - offset - 50)
|
| 377 |
+
consonant = min(consonant, max_consonant)
|
| 378 |
+
|
| 379 |
+
except Exception as e:
|
| 380 |
+
logger.warning(f"WAV 파일 정보 확인 실패: {e}")
|
| 381 |
+
|
| 382 |
+
# 최소 길이 보장
|
| 383 |
+
if consonant < 10:
|
| 384 |
+
consonant = 10
|
| 385 |
+
if duration < consonant + 50:
|
| 386 |
+
duration = consonant + 50
|
| 387 |
+
|
| 388 |
+
logger.info(f"합성 파라미터: offset={offset:.1f}ms, consonant={consonant:.1f}ms, duration={duration:.1f}ms")
|
| 389 |
+
|
| 390 |
+
# straycat으로 합성
|
| 391 |
+
resampler = Resampler(
|
| 392 |
+
in_file=str(wav_path),
|
| 393 |
+
out_file=temp_output,
|
| 394 |
+
pitch=note_name,
|
| 395 |
+
velocity=note.get('velocity', 100),
|
| 396 |
+
length=duration,
|
| 397 |
+
volume=volume,
|
| 398 |
+
flags='',
|
| 399 |
+
offset=offset,
|
| 400 |
+
consonant=consonant,
|
| 401 |
+
cutoff=cutoff,
|
| 402 |
+
modulation=0,
|
| 403 |
+
tempo=f'!{tempo}'
|
| 404 |
+
)
|
| 405 |
+
|
| 406 |
+
# 합성된 오디오 로드
|
| 407 |
+
if os.path.exists(temp_output):
|
| 408 |
+
synth_audio, _ = sf.read(temp_output)
|
| 409 |
+
|
| 410 |
+
# 결과 검증
|
| 411 |
+
if len(synth_audio) == 0:
|
| 412 |
+
logger.warning("합성된 오디오가 비어있습니다.")
|
| 413 |
+
return None
|
| 414 |
+
|
| 415 |
+
return synth_audio
|
| 416 |
+
else:
|
| 417 |
+
logger.warning("합성 결과 파일이 생성되지 않았습니다.")
|
| 418 |
+
return None
|
| 419 |
+
|
| 420 |
+
except Exception as e:
|
| 421 |
+
logger.error(f"straycat 합성 실패: {e}")
|
| 422 |
+
return None
|
| 423 |
+
|
| 424 |
+
finally:
|
| 425 |
+
# 임시 파일 정리
|
| 426 |
+
if os.path.exists(temp_output):
|
| 427 |
+
try:
|
| 428 |
+
os.unlink(temp_output)
|
| 429 |
+
except:
|
| 430 |
+
pass
|
| 431 |
+
|
| 432 |
+
except Exception as e:
|
| 433 |
+
logger.error(f"노트 합성 실패: {e}")
|
| 434 |
+
return None
|
| 435 |
+
|
| 436 |
+
return None
|
| 437 |
+
|
| 438 |
+
def _midi_to_note_name(self, midi_note: int) -> str:
|
| 439 |
+
"""MIDI 노트를 노트 이름으로 변환"""
|
| 440 |
+
notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
| 441 |
+
octave = (midi_note // 12) - 1
|
| 442 |
+
note = notes[midi_note % 12]
|
| 443 |
+
return f"{note}{octave}"
|
| 444 |
+
|
| 445 |
+
def get_available_phonemes(self) -> List[str]:
|
| 446 |
+
"""사용 가능한 음소 목록 반환"""
|
| 447 |
+
return self.voicebank.list_available_phonemes()
|
| 448 |
+
|
| 449 |
+
# 테스트 함수
|
| 450 |
+
def test_utau_engine():
|
| 451 |
+
"""UTAU 엔진 테스트"""
|
| 452 |
+
try:
|
| 453 |
+
voicebank_path = "voice/hanseol CVC"
|
| 454 |
+
engine = UTAUEngine(voicebank_path)
|
| 455 |
+
|
| 456 |
+
print(f"hanseol CVC 보이스뱅크 로드 완료!")
|
| 457 |
+
print(f"사용 가능한 음소: {len(engine.get_available_phonemes())}개")
|
| 458 |
+
print(f"첫 10개 음소: {engine.get_available_phonemes()[:10]}")
|
| 459 |
+
|
| 460 |
+
return engine
|
| 461 |
+
|
| 462 |
+
except Exception as e:
|
| 463 |
+
print(f"UTAU 엔진 테스트 실패: {e}")
|
| 464 |
+
return None
|
| 465 |
+
|
| 466 |
+
if __name__ == "__main__":
|
| 467 |
+
test_utau_engine()
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
voice/hanseol_CVC_compressed.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1c9292e72186f3701d906307e393bb8ca6b0342cde6e883caae513daf2eff61
|
| 3 |
+
size 35548060
|
voice/test_voice.sc.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dd4f9cdc03422f3febdf9787cb6f41592b51b11ff9ced53e7baf67e844b5858
|
| 3 |
+
size 108682
|
voice/test_voice.wav
ADDED
|
Binary file (88.2 kB). View file
|
|
|
voice_data_converter.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import h5py
|
| 2 |
+
import numpy as np
|
| 3 |
+
import soundfile as sf
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import logging
|
| 6 |
+
import json
|
| 7 |
+
import gzip
|
| 8 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 9 |
+
import shutil
|
| 10 |
+
from utau_engine import VoicebankManager, OtoEntry
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
class VoiceDataCompressor:
|
| 15 |
+
"""음성 데이터를 HDF5 형태로 압축/관리하는 클래스"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, output_path: str = "voice_data.h5"):
|
| 18 |
+
self.output_path = Path(output_path)
|
| 19 |
+
self.compression = 'gzip' # 압축 알고리즘
|
| 20 |
+
self.compression_opts = 6 # 압축 레벨 (0-9)
|
| 21 |
+
|
| 22 |
+
def convert_voicebank_to_hdf5(self, voicebank_path: Union[str, Path]) -> bool:
|
| 23 |
+
"""보이스뱅크를 HDF5 형태로 변환"""
|
| 24 |
+
try:
|
| 25 |
+
voicebank_path = Path(voicebank_path)
|
| 26 |
+
logger.info(f"보이스뱅크 변환 시작: {voicebank_path}")
|
| 27 |
+
|
| 28 |
+
# VoicebankManager로 기존 데이터 로드
|
| 29 |
+
vb_manager = VoicebankManager(voicebank_path)
|
| 30 |
+
|
| 31 |
+
# HDF5 파일 생성
|
| 32 |
+
with h5py.File(self.output_path, 'w') as h5file:
|
| 33 |
+
# 메타데이터 그룹
|
| 34 |
+
meta_group = h5file.create_group('metadata')
|
| 35 |
+
|
| 36 |
+
# oto.ini 정보 저장
|
| 37 |
+
oto_data = {}
|
| 38 |
+
for alias, entry in vb_manager.oto_entries.items():
|
| 39 |
+
oto_data[alias] = {
|
| 40 |
+
'filename': entry.filename,
|
| 41 |
+
'alias': entry.alias,
|
| 42 |
+
'offset': entry.offset,
|
| 43 |
+
'consonant': entry.consonant,
|
| 44 |
+
'cutoff': entry.cutoff,
|
| 45 |
+
'preutterance': entry.preutterance,
|
| 46 |
+
'overlap': entry.overlap
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
# JSON으로 직렬화하여 압축 저장
|
| 50 |
+
oto_json = json.dumps(oto_data, ensure_ascii=False, indent=2)
|
| 51 |
+
oto_compressed = gzip.compress(oto_json.encode('utf-8'))
|
| 52 |
+
meta_group.create_dataset('oto_data', data=np.frombuffer(oto_compressed, dtype=np.uint8))
|
| 53 |
+
|
| 54 |
+
# 보이스뱅크 정보
|
| 55 |
+
meta_group.attrs['voicebank_name'] = voicebank_path.name
|
| 56 |
+
meta_group.attrs['total_entries'] = len(vb_manager.oto_entries)
|
| 57 |
+
meta_group.attrs['total_wav_files'] = len(vb_manager.wav_files)
|
| 58 |
+
|
| 59 |
+
# 오디오 데이터 그룹
|
| 60 |
+
audio_group = h5file.create_group('audio_data')
|
| 61 |
+
|
| 62 |
+
# 각 WAV 파일 처리
|
| 63 |
+
processed_files = set()
|
| 64 |
+
total_original_size = 0
|
| 65 |
+
|
| 66 |
+
for filename, wav_path in vb_manager.wav_files.items():
|
| 67 |
+
if filename in processed_files:
|
| 68 |
+
continue
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
# 오디오 로드
|
| 72 |
+
audio_data, sample_rate = sf.read(wav_path)
|
| 73 |
+
|
| 74 |
+
# 파일 크기 계산
|
| 75 |
+
total_original_size += wav_path.stat().st_size
|
| 76 |
+
|
| 77 |
+
# 스테레오 → 모노 변환
|
| 78 |
+
if len(audio_data.shape) > 1:
|
| 79 |
+
audio_data = np.mean(audio_data, axis=1)
|
| 80 |
+
|
| 81 |
+
# 파일별 그룹 생성
|
| 82 |
+
file_group = audio_group.create_group(filename.replace('.wav', ''))
|
| 83 |
+
|
| 84 |
+
# 오디오 데이터 저장 (압축 적용)
|
| 85 |
+
file_group.create_dataset(
|
| 86 |
+
'audio',
|
| 87 |
+
data=audio_data.astype(np.float32),
|
| 88 |
+
compression=self.compression,
|
| 89 |
+
compression_opts=self.compression_opts,
|
| 90 |
+
shuffle=True, # 압축 효율 향상
|
| 91 |
+
fletcher32=True # 체크섬 추가
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# 메타데이터 저장
|
| 95 |
+
file_group.attrs['sample_rate'] = sample_rate
|
| 96 |
+
file_group.attrs['duration'] = len(audio_data) / sample_rate
|
| 97 |
+
file_group.attrs['original_filename'] = filename
|
| 98 |
+
|
| 99 |
+
processed_files.add(filename)
|
| 100 |
+
logger.info(f"변환 완료: {filename} ({len(audio_data)} samples)")
|
| 101 |
+
|
| 102 |
+
except Exception as e:
|
| 103 |
+
logger.error(f"파일 처리 실패 {wav_path}: {e}")
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
# 압축 통계
|
| 107 |
+
compressed_size = self.output_path.stat().st_size
|
| 108 |
+
compression_ratio = (1 - compressed_size / total_original_size) * 100
|
| 109 |
+
|
| 110 |
+
meta_group.attrs['original_size_bytes'] = total_original_size
|
| 111 |
+
meta_group.attrs['compressed_size_bytes'] = compressed_size
|
| 112 |
+
meta_group.attrs['compression_ratio_percent'] = compression_ratio
|
| 113 |
+
|
| 114 |
+
logger.info(f"변환 완료!")
|
| 115 |
+
logger.info(f"원본 크기: {total_original_size / (1024*1024):.1f} MB")
|
| 116 |
+
logger.info(f"���축 크기: {compressed_size / (1024*1024):.1f} MB")
|
| 117 |
+
logger.info(f"압축율: {compression_ratio:.1f}%")
|
| 118 |
+
|
| 119 |
+
return True
|
| 120 |
+
|
| 121 |
+
except Exception as e:
|
| 122 |
+
logger.error(f"HDF5 변환 실패: {e}")
|
| 123 |
+
return False
|
| 124 |
+
|
| 125 |
+
class CompressedVoicebankManager:
|
| 126 |
+
"""압축된 HDF5 보이스뱅크를 관리하는 클래스"""
|
| 127 |
+
|
| 128 |
+
def __init__(self, hdf5_path: Union[str, Path]):
|
| 129 |
+
self.hdf5_path = Path(hdf5_path)
|
| 130 |
+
self.oto_entries: Dict[str, OtoEntry] = {}
|
| 131 |
+
self._audio_cache: Dict[str, Tuple[np.ndarray, int]] = {}
|
| 132 |
+
self.cache_size_limit = 50 # 캐시할 최대 오디오 파일 수
|
| 133 |
+
|
| 134 |
+
if not self.hdf5_path.exists():
|
| 135 |
+
raise FileNotFoundError(f"압축된 보이스뱅크를 찾을 수 없습니다: {hdf5_path}")
|
| 136 |
+
|
| 137 |
+
self.load_metadata()
|
| 138 |
+
|
| 139 |
+
def load_metadata(self):
|
| 140 |
+
"""HDF5에서 메타데이터 로드"""
|
| 141 |
+
try:
|
| 142 |
+
with h5py.File(self.hdf5_path, 'r') as h5file:
|
| 143 |
+
# oto.ini 데이터 로드
|
| 144 |
+
oto_compressed = h5file['metadata']['oto_data'][:]
|
| 145 |
+
oto_json = gzip.decompress(oto_compressed.tobytes()).decode('utf-8')
|
| 146 |
+
oto_data = json.loads(oto_json)
|
| 147 |
+
|
| 148 |
+
# OtoEntry 객체로 변환
|
| 149 |
+
for alias, data in oto_data.items():
|
| 150 |
+
self.oto_entries[alias] = OtoEntry(
|
| 151 |
+
filename=data['filename'],
|
| 152 |
+
alias=data['alias'],
|
| 153 |
+
offset=data['offset'],
|
| 154 |
+
consonant=data['consonant'],
|
| 155 |
+
cutoff=data['cutoff'],
|
| 156 |
+
preutterance=data['preutterance'],
|
| 157 |
+
overlap=data['overlap']
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
# 메타데이터 로그
|
| 161 |
+
meta = h5file['metadata']
|
| 162 |
+
logger.info(f"압축된 보이스뱅크 로드: {meta.attrs['voicebank_name']}")
|
| 163 |
+
logger.info(f"총 {meta.attrs['total_entries']}개 엔트리")
|
| 164 |
+
logger.info(f"압축율: {meta.attrs['compression_ratio_percent']:.1f}%")
|
| 165 |
+
|
| 166 |
+
except Exception as e:
|
| 167 |
+
logger.error(f"메타데이터 로드 실패: {e}")
|
| 168 |
+
raise
|
| 169 |
+
|
| 170 |
+
def get_audio_data(self, filename: str) -> Optional[Tuple[np.ndarray, int]]:
|
| 171 |
+
"""특정 파일의 오디오 데이터 로드 (캐싱 지원)"""
|
| 172 |
+
base_filename = filename.replace('.wav', '')
|
| 173 |
+
|
| 174 |
+
# 캐시 확인
|
| 175 |
+
if base_filename in self._audio_cache:
|
| 176 |
+
return self._audio_cache[base_filename]
|
| 177 |
+
|
| 178 |
+
try:
|
| 179 |
+
with h5py.File(self.hdf5_path, 'r') as h5file:
|
| 180 |
+
if base_filename not in h5file['audio_data']:
|
| 181 |
+
return None
|
| 182 |
+
|
| 183 |
+
file_group = h5file['audio_data'][base_filename]
|
| 184 |
+
audio_data = file_group['audio'][:]
|
| 185 |
+
sample_rate = file_group.attrs['sample_rate']
|
| 186 |
+
|
| 187 |
+
# 캐시 관리 (LRU 방식)
|
| 188 |
+
if len(self._audio_cache) >= self.cache_size_limit:
|
| 189 |
+
# 가장 오래된 항목 제거
|
| 190 |
+
oldest_key = next(iter(self._audio_cache))
|
| 191 |
+
del self._audio_cache[oldest_key]
|
| 192 |
+
|
| 193 |
+
# 캐시에 저장
|
| 194 |
+
result = (audio_data, int(sample_rate))
|
| 195 |
+
self._audio_cache[base_filename] = result
|
| 196 |
+
|
| 197 |
+
return result
|
| 198 |
+
|
| 199 |
+
except Exception as e:
|
| 200 |
+
logger.error(f"오디오 데이터 로드 실패 {filename}: {e}")
|
| 201 |
+
return None
|
| 202 |
+
|
| 203 |
+
def get_sample_for_phoneme(self, phoneme: str) -> Optional[OtoEntry]:
|
| 204 |
+
"""음소에 해당하는 샘플 찾기 (기존 로직과 동일)"""
|
| 205 |
+
# 정확한 매치 먼저 시도
|
| 206 |
+
if phoneme in self.oto_entries:
|
| 207 |
+
return self.oto_entries[phoneme]
|
| 208 |
+
|
| 209 |
+
# 유사한 발음 찾기
|
| 210 |
+
candidates = []
|
| 211 |
+
for alias in self.oto_entries:
|
| 212 |
+
entry = self.oto_entries[alias]
|
| 213 |
+
if entry.clean_alias == phoneme:
|
| 214 |
+
candidates.append(entry)
|
| 215 |
+
|
| 216 |
+
if candidates:
|
| 217 |
+
# 숨소리가 아닌 것을 우선
|
| 218 |
+
non_breath = [c for c in candidates if not c.is_breath]
|
| 219 |
+
return non_breath[0] if non_breath else candidates[0]
|
| 220 |
+
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
+
def list_available_phonemes(self) -> List[str]:
|
| 224 |
+
"""사용 가능한 음소 목록"""
|
| 225 |
+
return list(set(entry.clean_alias for entry in self.oto_entries.values()))
|
| 226 |
+
|
| 227 |
+
def get_compression_info(self) -> Dict[str, any]:
|
| 228 |
+
"""압축 정보 반환"""
|
| 229 |
+
try:
|
| 230 |
+
with h5py.File(self.hdf5_path, 'r') as h5file:
|
| 231 |
+
meta = h5file['metadata']
|
| 232 |
+
return {
|
| 233 |
+
'voicebank_name': meta.attrs['voicebank_name'],
|
| 234 |
+
'total_entries': meta.attrs['total_entries'],
|
| 235 |
+
'original_size_mb': meta.attrs['original_size_bytes'] / (1024*1024),
|
| 236 |
+
'compressed_size_mb': meta.attrs['compressed_size_bytes'] / (1024*1024),
|
| 237 |
+
'compression_ratio': meta.attrs['compression_ratio_percent'],
|
| 238 |
+
'file_path': str(self.hdf5_path)
|
| 239 |
+
}
|
| 240 |
+
except Exception as e:
|
| 241 |
+
logger.error(f"압축 정보 로드 실패: {e}")
|
| 242 |
+
return {}
|
| 243 |
+
|
| 244 |
+
def convert_voicebank_to_compressed_format(voicebank_path: str, output_path: str = None) -> bool:
|
| 245 |
+
"""보이스뱅크를 압축 형태로 변환하는 편의 함수"""
|
| 246 |
+
if output_path is None:
|
| 247 |
+
voicebank_name = Path(voicebank_path).name.replace(' ', '_')
|
| 248 |
+
output_path = f"voice/{voicebank_name}_compressed.h5"
|
| 249 |
+
|
| 250 |
+
converter = VoiceDataCompressor(output_path)
|
| 251 |
+
return converter.convert_voicebank_to_hdf5(voicebank_path)
|
| 252 |
+
|
| 253 |
+
if __name__ == "__main__":
|
| 254 |
+
# 테스트용 변환
|
| 255 |
+
success = convert_voicebank_to_compressed_format("voice/hanseol CVC")
|
| 256 |
+
if success:
|
| 257 |
+
print("✅ 보이스뱅크 압축 변환 완료!")
|
| 258 |
+
|
| 259 |
+
# 압축된 버전 테스트
|
| 260 |
+
compressed_vb = CompressedVoicebankManager("voice/hanseol_CVC_compressed.h5")
|
| 261 |
+
print(f"📊 압축 정보: {compressed_vb.get_compression_info()}")
|
| 262 |
+
print(f"🎤 사용 가능한 음소: {len(compressed_vb.list_available_phonemes())}개")
|
| 263 |
+
else:
|
| 264 |
+
print("❌ 보이스뱅크 압축 실패!")
|