import tempfile
import numpy as np
import soundfile as sf
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
import logging
from straycat import Resampler
from voice_data_converter import CompressedVoicebankManager, OtoEntry

logger = logging.getLogger(__name__)

class CompressedUTAUEngine:
    """압축된 HDF5 보이스뱅크를 사용하는 UTAU 호환 음성 합성 엔진"""

    def __init__(self, compressed_voicebank_path: Union[str, Path]):
        self.voicebank = CompressedVoicebankManager(compressed_voicebank_path)
        self.default_phoneme = "あ"  # 기본 음소
        logger.info(f"압축된 UTAU 엔진 초기화 완료")

    def synthesize_sequence(self,
                          notes: List[Dict],
                          lyrics: List[str],
                          tempo: int = 120,
                          volume: int = 100) -> Tuple[Optional[str], str]:
        """노트 시퀀스와 가사로 음성 합성"""

        if len(notes) != len(lyrics):
            return None, "노트와 가사의 개수가 일치하지 않습니다."

        if not notes:
            return None, "합성할 노트가 없습니다."

        try:
            # 전체 시퀀스 길이 계산
            max_end_time = max(note.get('endSeconds',
                                      note.get('startSeconds', 0) + note.get('durationSeconds', 0.5))
                              for note in notes)

            sample_rate = 44100
            total_samples = int(max_end_time * sample_rate) + sample_rate
            final_audio = np.zeros(total_samples)

            # 각 노트 합성
            for i, (note, lyric) in enumerate(zip(notes, lyrics)):
                try:
                    # 음소 변환
                    phoneme = self._lyric_to_phoneme(lyric)

                    # oto 엔트리 찾기
                    oto_entry = self.voicebank.get_sample_for_phoneme(phoneme)
                    if not oto_entry:
                        logger.warning(f"음소 '{phoneme}'에 해당하는 샘플을 찾을 수 없음")
                        continue

                    # 오디오 데이터 로드 (압축된 데이터에서)
                    audio_result = self.voicebank.get_audio_data(oto_entry.filename)
                    if not audio_result:
                        logger.warning(f"오디오 파일 로드 실패: {oto_entry.filename}")
                        continue

                    source_audio, source_sample_rate = audio_result

                    # 노트 합성
                    synth_audio = self._synthesize_note(
                        note, oto_entry, source_audio, source_sample_rate, tempo, volume
                    )

                    if synth_audio is not None:
                        # 시간 위치 계산 및 오디오 배치
                        start_sample = int(note.get('startSeconds', 0) * sample_rate)
                        end_sample = start_sample + len(synth_audio)

                        if end_sample <= len(final_audio):
                            final_audio[start_sample:end_sample] += synth_audio * (note.get('velocity', 100) / 100)
                        else:
                            # 버퍼 확장
                            new_size = end_sample + sample_rate
                            new_final_audio = np.zeros(new_size)
                            new_final_audio[:len(final_audio)] = final_audio
                            new_final_audio[start_sample:end_sample] += synth_audio * (note.get('velocity', 100) / 100)
                            final_audio = new_final_audio

                        logger.info(f"노트 {i+1} 합성 완료: {phoneme}")

                except Exception as e:
                    logger.error(f"노트 {i+1} 합성 실패: {e}")
                    continue

            # 최종 오디오 정규화
            if np.max(np.abs(final_audio)) > 0:
                final_audio = final_audio / np.max(np.abs(final_audio)) * 0.85

            # 임시 파일 저장
            output_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
            sf.write(output_file.name, final_audio, sample_rate)
            output_file.close()

            duration_sec = len(final_audio) / sample_rate
            return output_file.name, f"✅ 압축된 보이스뱅크로 합성 완료: {len(notes)}개 노트, {duration_sec:.1f}초"

        except Exception as e:
            logger.error(f"시퀀스 합성 실패: {e}")
            return None, f"❌ 합성 실패: {str(e)}"

    def _lyric_to_phoneme(self, lyric: str) -> str:
        """가사를 음소로 변환 (기존 로직과 동일)"""
        lyric = lyric.strip()
        if not lyric:
            return self.default_phoneme

        # 한글 → 일본어 음소 변환 (간단한 매핑)
        hangul_to_japanese = {
            '가': 'ka', '나': 'na', '다': 'da', '라': 'ra', '마': 'ma',
            '바': 'ba', '사': 'sa', '아': 'a', '자': 'za', '차': 'cha',
            '카': 'ka', '타': 'ta', '파': 'pa', '하': 'ha',
            '거': 'ke', '너': 'ne', '더': 'de', '러': 're', '머': 'me',
            '버': 'be', '서': 'se', '어': 'e', '저': 'ze', '처': 'che',
            '커': 'ke', '터': 'te', '퍼': 'pe', '허': 'he',
            '고': 'ko', '노': 'no', '도': 'do', '로': 'ro', '모': 'mo',
            '보': 'bo', '소': 'so', '오': 'o', '조': 'zo', '초': 'cho',
            '코': 'ko', '토': 'to', '포': 'po', '호': 'ho',
            '구': 'ku', '누': 'nu', '두': 'du', '루': 'ru', '무': 'mu',
            '부': 'bu', '수': 'su', '우': 'u', '주': 'zu', '추': 'chu',
            '쿠': 'ku', '투': 'tu', '푸': 'pu', '후': 'hu',
            '기': 'ki', '니': 'ni', '디': 'di', '리': 'ri', '미': 'mi',
            '비': 'bi', '시': 'si', '이': 'i', '지': 'zi', '치': 'chi',
            '키': 'ki', '티': 'ti', '피': 'pi', '히': 'hi',
            '도': 'do', '레': 're', '미': 'mi', '파': 'pa', '솔': 'so', '라': 'ra', '시': 'si'
        }

        if lyric in hangul_to_japanese:
            return hangul_to_japanese[lyric]

        return lyric if lyric in self.voicebank.oto_entries else self.default_phoneme

    def _synthesize_note(self,
                        note: Dict,
                        oto_entry: OtoEntry,
                        source_audio: np.ndarray,
                        source_sample_rate: int,
                        tempo: int,
                        volume: int) -> Optional[np.ndarray]:
        """개별 노트 합성 (압축된 오디오 데이터 사용)"""

        try:
            # 임시 파일에 원본 오디오 저장
            temp_input = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
            sf.write(temp_input.name, source_audio, source_sample_rate)
            temp_input.close()

            # 출력 파일
            temp_output = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
            temp_output.close()

            # 노트 정보 추출
            pitch = note['pitch']
            duration_ms = note.get('durationSeconds', 0.5) * 1000
            velocity = note.get('velocity', 100)

            # MIDI 노트를 음계명으로 변환
            note_name = self._midi_to_note_name(pitch)

            # straycat Resampler로 합성
            resampler = Resampler(
                in_file=temp_input.name,
                out_file=temp_output.name,
                pitch=note_name,
                velocity=velocity,
                length=max(duration_ms, 200),  # 최소 200ms
                volume=volume,
                offset=oto_entry.offset,
                consonant=oto_entry.consonant,
                cutoff=oto_entry.cutoff,
                modulation=10,
                tempo=f'!{tempo}'
            )

            # 합성된 오디오 로드
            if Path(temp_output.name).exists():
                synth_audio, _ = sf.read(temp_output.name)

                # 정리
                Path(temp_input.name).unlink(missing_ok=True)
                Path(temp_output.name).unlink(missing_ok=True)

                return synth_audio
            else:
                logger.error(f"합성된 파일이 생성되지 않음: {temp_output.name}")
                return None

        except Exception as e:
            logger.error(f"노트 합성 실패: {e}")
            return None

    def _midi_to_note_name(self, midi_note: int) -> str:
        """MIDI 노트 번호를 음계명으로 변환"""
        notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
        octave = (midi_note // 12) - 1
        note = notes[midi_note % 12]
        return f"{note}{octave}"

    def get_available_phonemes(self) -> List[str]:
        """사용 가능한 음소 목록 반환"""
        return self.voicebank.list_available_phonemes()

    def get_compression_info(self) -> Dict[str, any]:
        """압축 정보 반환"""
        return self.voicebank.get_compression_info()