Spaces:

minjune121
/

boolook_model

Running

File size: 28,913 Bytes

a4acd69
 
a8f1c4e
46301e8
 
 
 
 
a4acd69
 
4feabfc
 
062c63f
8a1d76f
46301e8
 
a4acd69
 
f428a77
a8f1c4e
a4acd69
5f89afc
a4acd69
bc11a33
a4acd69
5f89afc
a4acd69
 
 
 
5f89afc
 
4feabfc
a4acd69
0ce3b2b
a4acd69
46301e8
 
 
 
 
f428a77
a4acd69
46301e8
 
 
 
 
 
8a1d76f
46301e8
4a3afe3
bc11a33
a4acd69
a8f1c4e
 
 
 
 
 
 
 
46301e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4acd69
4a3afe3
bc11a33
5f89afc
bc11a33
a4acd69
bc11a33
a8f1c4e
a4acd69
5f89afc
bc11a33
4a3afe3
bc11a33
4a3afe3
bc11a33
5f89afc
bc11a33
a4acd69
 
5f89afc
a8f1c4e
4a3afe3
bc11a33
4a3afe3
ca68191
4a3afe3
a4acd69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f89afc
 
 
 
 
 
 
a8f1c4e
5f89afc
 
4a3afe3
a4acd69
46301e8
 
 
 
 
 
 
a4acd69
 
 
 
 
 
 
 
 
 
 
 
5f89afc
a4acd69
 
 
 
 
 
 
5f89afc
 
 
b9098cc
a4acd69
0ce3b2b
a4acd69
a8f1c4e
a4acd69
a8f1c4e
 
 
a4acd69
4a3afe3
a8f1c4e
a4acd69
5f89afc
a8f1c4e
4a3afe3
5f89afc
4a3afe3
a8f1c4e
a4acd69
5f89afc
a4acd69
5f89afc
 
 
4a3afe3
5f89afc
4a3afe3
5f89afc
a8f1c4e
a4acd69
4a3afe3
5f89afc
 
 
 
a8f1c4e
5f89afc
 
a8f1c4e
 
 
 
5f89afc
a8f1c4e
 
 
5f89afc
 
 
4a3afe3
5f89afc
4a3afe3
5f89afc
bc11a33
5f89afc
 
a8f1c4e
5f89afc
a8f1c4e
5f89afc
a8f1c4e
bc11a33
4a3afe3
a8f1c4e
 
 
 
 
 
0ce3b2b
4a3afe3
a4acd69
a8f1c4e
a4acd69
 
bc11a33
a4acd69
5f89afc
a4acd69
5f89afc
 
a4acd69
5f89afc
f428a77
a4acd69
 
5f89afc
 
4a3afe3
a4acd69
 
 
 
 
 
 
 
 
 
 
 
 
f428a77
46301e8
 
 
 
 
 
5f89afc
46301e8
 
 
 
 
 
 
 
 
 
a4acd69
f428a77
5f89afc
 
 
 
a8f1c4e
5f89afc
 
41a5227
5f89afc
 
 
 
a4acd69
46301e8
a4acd69
 
 
845db5f
a8f1c4e
 
 
 
 
 
bc11a33
a4acd69
5f89afc
 
845db5f
a8f1c4e
5f89afc
 
 
 
 
 
a8f1c4e
5f89afc
 
 
a8f1c4e
 
5f89afc
a8f1c4e
 
 
5f89afc
a8f1c4e
 
 
5f89afc
 
 
 
 
4a3afe3
5f89afc
a4acd69
46301e8
f428a77
 
 
845db5f
f428a77
 
 
 
4a3afe3
f428a77
 
845db5f
f428a77
 
 
 
 
 
 
 
 
 
 
 
 
46301e8
a4acd69
5f89afc
a4acd69
5f89afc
a4acd69
 
 
a8f1c4e
a4acd69
 
6a318c2
a8f1c4e
a4acd69
 
 
 
 
f428a77
a4acd69
5f89afc
 
 
a8f1c4e
 
 
 
 
5f89afc
 
 
 
 
 
 
 
 
 
 
4a3afe3
a4acd69
f428a77
6a318c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4acd69
 
46301e8
bc11a33
a4acd69
 
 
46301e8
a4acd69
 
 
a8f1c4e
5f89afc
a8f1c4e
5f89afc
a4acd69
bc11a33
4a3afe3
0ce3b2b
46301e8
a4acd69
5f89afc
a4acd69
bc11a33
a8f1c4e
6a318c2
ca68191
6a318c2
bc11a33
6a318c2
a4acd69
8a1d76f
6a318c2
 
 
 
 
 
 
 
 
 
 
5f89afc
6a318c2
5f89afc
a4acd69
 
 
 
bc11a33
4a3afe3
a4acd69
845db5f
6a318c2
a4acd69
845db5f
46301e8
a4acd69
845db5f
 
a4acd69
6a318c2
a4acd69
 
4a3afe3
6a318c2
a4acd69
b9098cc
a8f1c4e
6a318c2
 
a8f1c4e
46301e8
f428a77
 
 
46301e8
a4acd69
46301e8
5f89afc
46301e8
a4acd69
 
6a318c2
a4acd69
062c63f
bc11a33
46301e8
6a318c2
 
 
 
 
46301e8
 
a4acd69
bc11a33
f428a77
46301e8
f428a77
 
 
a4acd69
46301e8
a8f1c4e
46301e8
5f89afc
a4acd69
6a318c2
 
 
 
 
 
 
 
 
 
 
 
bc11a33
a4acd69
0ce3b2b
6a318c2
062c63f
bc11a33
 
46301e8

"""
Boolook - 음성 기반 감정 분석 책 추천 (HuggingFace Spaces)
수정사항:
  - final_emotion_model_v3.pth (ResNet-SE + BiLSTM + Attention) 커스텀 모델 통합
  - superb/wav2vec2-base-superb-er 대신 커스텀 모델로 음성 감정 분류
  - 모델 클래스 정의 (SEBlock, ResBlock, AttentionPooling, EmotionResNet) 포함
  - Mel-spectrogram 전처리 + TTA(n_tta=8) 추론 + temperature scaling 적용
  - 4클래스(Angry/Happy/Neutral/Sad) → 한국어 감정 레이블 매핑
"""

import gradio as gr
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
import csv
import json
import threading
import warnings
import logging
from pathlib import Path
from datetime import datetime
from collections import defaultdict
from typing import Dict, List, Tuple
from transformers import pipeline as hf_pipeline
from sentence_transformers import SentenceTransformer, util as sbert_util

warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# ============================================================
# 설정
# ============================================================
BOOK_DB_PATH        = Path("book_db_final.csv")
FEEDBACK_PATH       = Path("user_feedback.csv")
SBERT_CACHE_PATH    = Path("book_embeddings.pkl")
EMOTION_MODEL_PATH  = Path("final_emotion_model_v3.pth")
SAMPLE_RATE         = 16000
MAX_EMBEDDING_BATCH = 128

# Mel-spectrogram 파라미터 (학습 시 사용한 값과 동일하게 맞출 것)
N_MELS    = 64
N_FFT     = 1024
HOP_LEN   = 512
MAX_FRAMES = 128   # 시간 축 고정 길이

device = 0 if torch.cuda.is_available() else -1
torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"디바이스: {'GPU' if device == 0 else 'CPU'}")

# ============================================================
# 전역 상태 (백그라운드 로딩용)
# ============================================================
df              = pd.DataFrame()
book_embeddings = torch.tensor([])
_data_ready     = False
_data_lock      = threading.Lock()

# ============================================================
# ① 커스텀 감정 모델 아키텍처 정의
# ============================================================

class SEBlock(nn.Module):
    """Squeeze-and-Excitation Block"""
    def __init__(self, channels: int, reduction: int = 16):
        super().__init__()
        self.excitation = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid(),
        )

    def forward(self, x):
        # x: (B, C, H, W)
        b, c, _, _ = x.shape
        w = x.mean(dim=[2, 3])          # global avg pool
        w = self.excitation(w).view(b, c, 1, 1)
        return x * w


class ResBlock(nn.Module):
    """ResNet Basic Block with SE"""
    def __init__(self, in_ch: int, out_ch: int, stride: int = 1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, 3, stride=stride, padding=1, bias=False)
        self.bn1   = nn.BatchNorm2d(out_ch)
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False)
        self.bn2   = nn.BatchNorm2d(out_ch)
        self.se    = SEBlock(out_ch, reduction=max(1, out_ch // 16))

        self.shortcut = nn.Sequential()
        if stride != 1 or in_ch != out_ch:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_ch, out_ch, 1, stride=stride, bias=False),
                nn.BatchNorm2d(out_ch),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = self.bn2(self.conv2(out))
        out = self.se(out)
        out = F.relu(out + self.shortcut(x), inplace=True)
        return out


class AttentionPooling(nn.Module):
    """Temporal Attention Pooling"""
    def __init__(self, hidden: int):
        super().__init__()
        self.attn = nn.Linear(hidden, 1)

    def forward(self, x):
        # x: (B, T, H)
        w = torch.softmax(self.attn(x), dim=1)   # (B, T, 1)
        return (x * w).sum(dim=1)                 # (B, H)


class EmotionResNet(nn.Module):
    """
    ResNet-SE + 2-layer BiLSTM + Attention Pooling + Classifier
    입력: (B, 1, N_MELS, T) Mel-spectrogram
    출력: (B, num_classes) logits
    """
    def __init__(self, num_classes: int = 4):
        super().__init__()
        # CNN stem
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 64, 7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
        )
        # ResNet layers
        self.layer1 = nn.Sequential(ResBlock(64,  64),  ResBlock(64,  64))
        self.layer2 = nn.Sequential(ResBlock(64,  128, stride=2), ResBlock(128, 128))
        self.layer3 = nn.Sequential(ResBlock(128, 256, stride=2), ResBlock(256, 256))

        # BiLSTM (2 layers)
        self.bilstm = nn.LSTM(
            input_size=256, hidden_size=256,
            num_layers=2, batch_first=True,
            bidirectional=True, dropout=0.3,
        )

        # Attention
        self.attention = AttentionPooling(hidden=512)

        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        # CNN
        x = F.relu(self.conv1(x), inplace=True)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        # (B, C, H, W) → temporal sequence: global-avg over freq axis
        x = x.mean(dim=2)          # (B, C, W)
        x = x.permute(0, 2, 1)    # (B, T, C)

        # BiLSTM
        x, _ = self.bilstm(x)     # (B, T, 512)

        # Attention pooling
        x = self.attention(x)     # (B, 512)

        return self.classifier(x)


# ============================================================
# ② 커스텀 감정 모델 로드
# ============================================================
_emotion_model      = None
_emotion_classes    = ["Angry", "Happy", "Neutral", "Sad"]
_emotion_label_enc  = None
_emotion_temp       = 1.0
_emotion_n_tta      = 1

def _load_emotion_model():
    global _emotion_model, _emotion_classes, _emotion_label_enc, _emotion_temp, _emotion_n_tta
    if not EMOTION_MODEL_PATH.exists():
        logger.error(f"{EMOTION_MODEL_PATH} 파일이 없습니다. 커스텀 감정 모델을 사용하지 않습니다.")
        return

    try:
        ckpt = torch.load(EMOTION_MODEL_PATH, map_location="cpu", weights_only=False)

        _emotion_classes   = [str(c) for c in ckpt.get("classes", _emotion_classes)]
        _emotion_label_enc = ckpt.get("label_encoder", None)
        _emotion_temp      = float(ckpt.get("temperature", 1.0))
        _emotion_n_tta     = int(ckpt.get("n_tta", 1))

        model = EmotionResNet(num_classes=len(_emotion_classes))
        model.load_state_dict(ckpt["model_state_dict"])
        model.to(torch_device)
        model.eval()

        _emotion_model = model
        logger.info(
            f"커스텀 감정 모델 로드 완료 | "
            f"클래스: {_emotion_classes} | "
            f"val_acc: {ckpt.get('val_accuracy', 'N/A')} | "
            f"val_f1: {ckpt.get('best_val_f1', 'N/A'):.4f} | "
            f"temp: {_emotion_temp} | TTA: {_emotion_n_tta}"
        )
    except Exception as e:
        logger.error(f"커스텀 감정 모델 로드 실패: {e}")

_load_emotion_model()

# ============================================================
# ③ Mel-spectrogram 전처리
# ============================================================
def _compute_melspec(y: np.ndarray, sr: int) -> torch.Tensor:
    """
    오디오 배열 → (1, 1, N_MELS, MAX_FRAMES) 텐서
    librosa 없이 torch만 사용하는 간이 구현
    """
    try:
        import librosa
        mel = librosa.feature.melspectrogram(
            y=y, sr=sr,
            n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LEN,
        )
        mel_db = librosa.power_to_db(mel, ref=np.max)
    except ImportError:
        # librosa 없을 때 torch STFT로 대체
        wav = torch.tensor(y, dtype=torch.float32)
        window = torch.hann_window(N_FFT)
        stft = torch.stft(wav, N_FFT, HOP_LEN, return_complex=True, window=window)
        power = stft.abs() ** 2                         # (freq, T)
        # 간이 mel filterbank (삼각형 근사)
        mel_fb = torch.zeros(N_MELS, power.shape[0])
        for m in range(N_MELS):
            mel_fb[m, m * (power.shape[0] // N_MELS):
                       (m + 1) * (power.shape[0] // N_MELS)] = 1.0
        mel = mel_fb @ power                            # (N_MELS, T)
        mel_db = (mel + 1e-6).log().numpy()

    # 정규화
    mel_db = (mel_db - mel_db.mean()) / (mel_db.std() + 1e-6)

    # 시간 축 패딩/자르기
    T = mel_db.shape[1]
    if T < MAX_FRAMES:
        mel_db = np.pad(mel_db, ((0, 0), (0, MAX_FRAMES - T)), mode="constant")
    else:
        mel_db = mel_db[:, :MAX_FRAMES]

    # (1, 1, N_MELS, MAX_FRAMES)
    tensor = torch.tensor(mel_db, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
    return tensor.to(torch_device)


# ============================================================
# ④ TTA 추론
# ============================================================
def _tta_augment(spec: torch.Tensor) -> torch.Tensor:
    """단순 시간 이동 augmentation for TTA"""
    shift = np.random.randint(-MAX_FRAMES // 8, MAX_FRAMES // 8)
    return torch.roll(spec, shift, dims=-1)


def _infer_emotion_model(y: np.ndarray, sr: int) -> Dict[str, float]:
    """커스텀 모델 추론 → 클래스별 확률 dict (원본 영어 레이블)"""
    if _emotion_model is None:
        return {c: 0.0 for c in _emotion_classes}

    try:
        spec = _compute_melspec(y, sr)   # (1, 1, N_MELS, T)

        logits_list = []
        with torch.no_grad():
            n = max(1, _emotion_n_tta)
            for i in range(n):
                inp = _tta_augment(spec) if i > 0 else spec
                logits = _emotion_model(inp)              # (1, num_classes)
                logits_list.append(logits)

        avg_logits = torch.stack(logits_list).mean(dim=0)          # (1, C)
        probs = torch.softmax(avg_logits / _emotion_temp, dim=-1)  # temperature scaling
        probs = probs[0].cpu().numpy()

        return {cls: float(p) for cls, p in zip(_emotion_classes, probs)}

    except Exception as e:
        logger.error(f"커스텀 모델 추론 실패: {e}")
        return {c: 0.0 for c in _emotion_classes}


# ============================================================
# 모델 로딩 (STT, SBERT)
# ============================================================
logger.info("모델 로딩 중...")

stt_model = None
try:
    stt_model = hf_pipeline(
        "automatic-speech-recognition",
        model="openai/whisper-small",
        device=device,
        chunk_length_s=30,
    )
    logger.info("STT 모델 로드 완료 (whisper-small)")
except Exception as e:
    logger.error(f"STT 로드 실패: {e}")

sbert_model = None
try:
    sbert_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
    sbert_model.max_seq_length = 384
    if torch.cuda.is_available():
        sbert_model = sbert_model.to("cuda")
    logger.info("SBERT 모델 로드 완료")
except Exception as e:
    logger.error(f"SBERT 로드 실패: {e}")

logger.info("모델 로딩 완료!")

# ============================================================
# 감정 레이블 & 설명
# ============================================================
_EMOTION_DESCS = {
    "기쁨": "행복하고 즐겁고 유쾌한 기분",
    "신뢰": "따뜻하고 안정적이며 가족과 우정 같은 유대감",
    "공포": "무섭고 긴장되며 스릴 있는 공포와 불안",
    "놀람": "반전과 충격, 예상치 못한 경이로움",
    "슬픔": "슬프고 외롭고 이별과 상실의 감정",
    "혐오": "부조리와 불평등, 위선에 대한 비판과 풍자",
    "분노": "분노와 저항, 투쟁과 갈등",
    "기대": "성장과 도전, 모험과 희망",
}
_EMOTION_LABELS = list(_EMOTION_DESCS.keys())

_LABEL_EMBS = None
if sbert_model:
    try:
        _LABEL_EMBS = sbert_model.encode(
            list(_EMOTION_DESCS.values()),
            convert_to_tensor=True,
            show_progress_bar=False,
        )
    except Exception as e:
        logger.error(f"감정 레이블 임베딩 실패: {e}")

# 커스텀 모델 영어 레이블 → 한국어 매핑
_CUSTOM_LABEL_MAP: Dict[str, str] = {
    "Happy":   "기쁨",
    "Sad":     "슬픔",
    "Angry":   "분노",
    "Neutral": "신뢰",
}

_KEYWORD_BOOSTS = {
    "슬픔": ["슬프", "우울", "눈물", "힘들", "외로"],
    "분노": ["화나", "짜증", "열받", "빡치", "억울"],
    "기쁨": ["행복", "좋다", "기쁘", "즐겁", "신나"],
    "공포": ["무섭", "두렵", "걱정", "불안"],
    "놀람": ["놀랐", "깜짝", "충격"],
    "신뢰": ["믿음", "사랑", "따뜻", "고마"],
    "기대": ["기대", "희망", "설레"],
}

# ============================================================
# 세션 피드백
# ============================================================
class SessionFeedback:
    def __init__(self):
        self.accepted_counts = defaultdict(int)
        self.rejected_counts = defaultdict(int)

    def score_multiplier(self, emotion: str) -> float:
        acc = self.accepted_counts.get(emotion, 0)
        rej = self.rejected_counts.get(emotion, 0)
        return max(0.5, min(2.0, 1.0 + (0.1 * acc) - (0.1 * rej)))

_session = SessionFeedback()

# ============================================================
# 도서 데이터 로드 (백그라운드 전용)
# ============================================================
def load_book_data():
    global df, book_embeddings, _data_ready

    if not BOOK_DB_PATH.exists():
        logger.error(f"{BOOK_DB_PATH} 파일이 없습니다.")
        return

    try:
        _df = pd.read_csv(BOOK_DB_PATH, encoding="utf-8-sig").fillna("")
        logger.info(f"{len(_df)}권 로드 완료")
    except Exception as e:
        logger.error(f"CSV 로드 실패: {e}")
        return

    emb_cache = {}
    if SBERT_CACHE_PATH.exists():
        try:
            with open(SBERT_CACHE_PATH, "rb") as f:
                emb_cache = pickle.load(f)
            logger.info(f"임베딩 캐시: {len(emb_cache)}개")
        except Exception as e:
            logger.warning(f"캐시 로드 실패: {e}")

    missing = [i for i, row in _df.iterrows() if str(row["isbn"]) not in emb_cache]
    if missing and sbert_model:
        logger.info(f"신규 임베딩 계산: {len(missing)}권")
        try:
            for start in range(0, len(missing), MAX_EMBEDDING_BATCH):
                batch = missing[start:start + MAX_EMBEDDING_BATCH]
                texts = [
                    (str(_df.at[i, "title"]) + " " + str(_df.at[i, "content"]))[:500]
                    for i in batch
                ]
                vecs = sbert_model.encode(
                    texts, convert_to_tensor=False, show_progress_bar=False,
                    batch_size=MAX_EMBEDDING_BATCH,
                )
                for i, vec in zip(batch, vecs):
                    emb_cache[str(_df.at[i, "isbn"])] = vec
                if (start // MAX_EMBEDDING_BATCH) % 10 == 0:
                    logger.info(f"  진행: {start}/{len(missing)}")

            with open(SBERT_CACHE_PATH, "wb") as f:
                pickle.dump(emb_cache, f)
            logger.info("임베딩 저장 완료")
        except Exception as e:
            logger.error(f"임베딩 계산 실패: {e}")

    try:
        emb_matrix = np.stack([
            emb_cache.get(str(row["isbn"]), np.zeros(384))
            for _, row in _df.iterrows()
        ])
        _book_emb = torch.tensor(emb_matrix, dtype=torch.float32)
        if torch.cuda.is_available():
            _book_emb = _book_emb.to("cuda")
    except Exception as e:
        logger.error(f"임베딩 행렬 생성 실패: {e}")
        _book_emb = torch.tensor([])

    with _data_lock:
        df              = _df
        book_embeddings = _book_emb
        _data_ready     = True

    logger.info("백그라운드 데이터 로드 완료!")

threading.Thread(target=load_book_data, daemon=True).start()

# ============================================================
# 감정 분석
# ============================================================
def text_emotion_scores(text: str) -> Dict[str, float]:
    scores = {emo: 0.0 for emo in _EMOTION_LABELS}
    if not text or not sbert_model or _LABEL_EMBS is None:
        return scores

    try:
        user_emb   = sbert_model.encode(text, convert_to_tensor=True, show_progress_bar=False)
        cos_scores = sbert_util.cos_sim(user_emb, _LABEL_EMBS)[0]
        for i, label in enumerate(_EMOTION_LABELS):
            scores[label] = float(cos_scores[i].item())
    except Exception as e:
        logger.error(f"텍스트 감정 실패: {e}")

    text_lower = text.lower()
    for emotion, keywords in _KEYWORD_BOOSTS.items():
        for kw in keywords:
            if kw in text_lower:
                scores[emotion] += 0.15
                break

    total = sum(scores.values())
    if total > 0:
        scores = {k: v / total for k, v in scores.items()}
    return scores


def audio_emotion_scores(y: np.ndarray, sr: int) -> Dict[str, float]:
    """
    커스텀 모델(final_emotion_model_v3.pth)로 음성 감정 점수 반환.
    영어 4클래스 확률을 한국어 8클래스 공간으로 매핑.
    """
    base = {emo: 0.0 for emo in _EMOTION_LABELS}

    raw = _infer_emotion_model(y, sr)   # {"Happy": 0.6, "Sad": 0.2, ...}
    if not raw or all(v == 0 for v in raw.values()):
        return base

    for eng_label, prob in raw.items():
        kor_label = _CUSTOM_LABEL_MAP.get(eng_label)
        if kor_label and kor_label in base:
            base[kor_label] += prob

    return base


def fused_emotion(t_scores: Dict[str, float], a_scores: Dict[str, float]) -> Tuple[str, Dict[str, float]]:
    if all(v == 0 for v in a_scores.values()):
        combined = t_scores
    else:
        a_max  = max(a_scores.values()) or 1.0
        a_norm = {e: v / a_max for e, v in a_scores.items()}
        combined = {
            emo: (t_scores[emo] * 0.6) + (a_norm[emo] * 0.4)
            for emo in _EMOTION_LABELS
        }
    top_emotion = max(combined, key=combined.get)
    return top_emotion, combined


# ============================================================
# 추천
# ============================================================
def get_recommendations(user_input: str, emotion: str, top_n: int = 3) -> List[Dict]:
    with _data_lock:
        ready = _data_ready
        _df   = df
        _emb  = book_embeddings

    if not ready or sbert_model is None or _df.empty or len(_emb) == 0:
        return []

    try:
        session_w = _session.score_multiplier(emotion)
        user_vec  = sbert_model.encode(user_input, convert_to_tensor=True, show_progress_bar=False)
        cos_sims  = sbert_util.cos_sim(user_vec, _emb)[0]
        if torch.cuda.is_available():
            cos_sims = cos_sims.cpu()
        cos_sims = cos_sims.numpy()

        fb_weights = _load_feedback_weights()
        results = []
        for idx, (_, row) in enumerate(_df.iterrows()):
            if idx >= len(cos_sims):
                break
            fb_boost = fb_weights.get((emotion, str(row["title"])), 0) * 0.1
            cosine   = float(cos_sims[idx])
            final    = cosine * session_w + fb_boost
            results.append({
                "isbn":      str(row.get("isbn", "")),
                "title":     str(row.get("title", "")),
                "author":    str(row.get("author", "-")),
                "publisher": str(row.get("publisher", "-")),
                "content":   str(row.get("content", ""))[:150],
                "img_url":   str(row.get("img_url", "")),
                "score":     round(final, 3),
            })

        results.sort(key=lambda x: x["score"], reverse=True)
        return results[:top_n]
    except Exception as e:
        logger.error(f"추천 실패: {e}")
        return []


# ============================================================
# 추천 결과 → JSON 렌더링
# ============================================================
def _render_books_json(user_input: str, emotion: str, combined: Dict[str, float], books: List[Dict]) -> str:
    if not books:
        return json.dumps({"error": "추천할 책을 찾지 못했습니다."}, ensure_ascii=False, indent=2)

    output = {
        "user_input":    user_input,
        "emotion":       emotion,
        "emotion_score": round(combined.get(emotion, 0.0), 3),
        "recommendation_books": [
            {
                "isbn":      b["isbn"],
                "title":     b["title"],
                "author":    b["author"],
                "publisher": b["publisher"],
                "content":   b["content"],
                "img_url":   b["img_url"],
            }
            for b in books
        ],
    }
    return json.dumps(output, ensure_ascii=False, indent=2)


# ============================================================
# 피드백
# ============================================================
def _load_feedback_weights() -> Dict[Tuple[str, str], float]:
    if not FEEDBACK_PATH.exists():
        return {}
    try:
        fb_df   = pd.read_csv(FEEDBACK_PATH, encoding="utf-8-sig", on_bad_lines="skip")
        weights = {}
        for _, row in fb_df.iterrows():
            key          = (str(row.get("emotion", "")), str(row.get("title", "")))
            accepted     = int(row.get("accepted", 0))
            weights[key] = weights.get(key, 0) + (1.0 if accepted == 1 else -0.5)
        return weights
    except Exception:
        return {}


def save_feedback_csv(isbn: str, title: str, emotion: str, accepted: int, rank: int):
    try:
        data = {
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "isbn":      isbn,
            "title":     title.replace("\n", " ").replace("\r", " "),
            "emotion":   emotion,
            "accepted":  accepted,
            "rank":      rank,
        }
        pd.DataFrame([data]).to_csv(
            FEEDBACK_PATH, mode="a", index=False,
            header=not FEEDBACK_PATH.exists(),
            encoding="utf-8-sig", quoting=csv.QUOTE_ALL,
        )
        if accepted == 1:
            _session.accepted_counts[emotion] += 1
        else:
            _session.rejected_counts[emotion] += 1
    except Exception as e:
        logger.error(f"피드백 저장 실패: {e}")


def api_feedback(feedback_data) -> str:
    """
    클라이언트 전송 형식:
    {
      "9788901234567": {"emotion": "기쁨", "accepted": 1, "rank": 1},
      "9788907654321": {"emotion": "기쁨", "accepted": 0, "rank": 2}
    }
    """
    try:
        data = json.loads(feedback_data) if isinstance(feedback_data, str) else feedback_data
        with _data_lock:
            _df = df

        saved = []
        for isbn, info in data.items():
            emotion  = str(info.get("emotion", ""))
            accepted = int(info.get("accepted", 0))
            rank     = int(info.get("rank", 1))

            row   = _df[_df["isbn"].astype(str) == isbn]
            title = row["title"].iloc[0] if not row.empty else isbn

            save_feedback_csv(isbn, title, emotion, accepted, rank)
            saved.append(isbn)

        return json.dumps({"status": "ok", "saved": saved}, ensure_ascii=False, indent=2)
    except Exception as e:
        logger.error(f"API 피드백 실패: {e}")
        return json.dumps({"error": str(e)}, ensure_ascii=False, indent=2)

def get_feedback_stats() -> str:
    if not FEEDBACK_PATH.exists():
        return "아직 피드백이 없습니다."
    try:
        fb_df = pd.read_csv(FEEDBACK_PATH, encoding="utf-8-sig", on_bad_lines="skip")
        total = len(fb_df)
        if total == 0:
            return "아직 피드백이 없습니다."
        emo_counts = fb_df.groupby("emotion")["accepted"].agg(["count", "sum"])
        lines = [f"**총 피드백: {total}건**\n"]
        for emo, row_s in emo_counts.iterrows():
            count    = int(row_s["count"])
            accepted = int(row_s["sum"])
            rate     = (accepted / count * 100) if count > 0 else 0
            lines.append(f"- {emo}: {count}건 (수락률 {rate:.0f}%)")
        return "\n".join(lines)
    except Exception as e:
        return f"통계 로드 실패: {e}"


# ============================================================
# 메인 처리
# ============================================================
def process_voice(audio_input):
    if not _data_ready:
        return json.dumps({"error": "도서 데이터 로딩 중입니다. 잠시 후 다시 시도해주세요."}, ensure_ascii=False, indent=2), []
    if audio_input is None:
        return json.dumps({"error": "음성을 입력해주세요."}, ensure_ascii=False, indent=2), []
    if stt_model is None:
        return json.dumps({"error": "STT 모델이 로드되지 않았습니다."}, ensure_ascii=False, indent=2), []

    try:
        # filepath(str) vs numpy tuple 분기
        if isinstance(audio_input, str):
            import soundfile as sf
            y, sr = sf.read(audio_input)
            y = y.astype(np.float32)
            if y.ndim > 1:       # 스테레오 → 모노
                y = y.mean(axis=1)
        else:
            sr, y = audio_input  # 마이크 numpy fallback
            y = y.astype(np.float32)

        if len(y) == 0:
            return json.dumps({"error": "음성이 너무 짧습니다."}, ensure_ascii=False, indent=2), []

        max_v = np.max(np.abs(y))
        if max_v > 0:
            y = y / max_v

        stt_result = stt_model({"sampling_rate": sr, "raw": y})
        user_input = stt_result["text"].strip()

        if not user_input:
            return json.dumps({"error": "음성이 인식되지 않았습니다."}, ensure_ascii=False, indent=2), []

        t_scores = text_emotion_scores(user_input)
        a_scores = audio_emotion_scores(y, sr)         # ← 커스텀 모델 사용
        top_label, combined = fused_emotion(t_scores, a_scores)
        books      = get_recommendations(user_input, top_label, top_n=3)
        books_json = _render_books_json(user_input, top_label, combined, books)

        return books_json, books

    except Exception as e:
        logger.error(f"처리 오류: {e}")
        return json.dumps({"error": str(e)}, ensure_ascii=False, indent=2), []


def run_analysis(audio):
    books_json, books = process_voice(audio)
    return books_json, books


# ============================================================
# Gradio UI
# ============================================================
with gr.Blocks(theme=gr.themes.Soft(), title="Boolook") as demo:
    gr.Markdown("""
    # Boolook — 음성 기반 감정 분석 책 추천
    당신의 감정을 말로 표현하면, AI가 딱 맞는 책을 추천해드립니다.
    **사용법:** 마이크로 감정 표현 → 분석하기
    """)

    state_books = gr.State([])

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 음성 입력")
            audio_in    = gr.Audio(
                sources=["microphone", "upload"],
                type="filepath",
                label="마이크 또는 파일 업로드",
            )
            analyze_btn = gr.Button("분석하기", variant="primary", size="lg")
            gr.Markdown("예: '오늘 너무 슬퍼요', '행복한 기분이에요'")

        with gr.Column(scale=1):
            out_books_json = gr.Code(
                label="분석 결과 & 추천 도서",
                language="json",
                interactive=False,
            )

    with gr.Accordion("통계", open=False):
        stats_md    = gr.Markdown("새로고침을 눌러주세요.")
        refresh_btn = gr.Button("통계 새로고침")
        refresh_btn.click(fn=get_feedback_stats, outputs=stats_md)

    # 피드백 API 엔드포인트 (클라이언트 전용, UI 미노출)
    with gr.Row(visible=False):
        fb_api_in  = gr.Textbox()
        fb_api_out = gr.Textbox()
        fb_api_btn = gr.Button()
        fb_api_btn.click(
            fn=api_feedback,
            inputs=fb_api_in,
            outputs=fb_api_out,
            api_name="feedback",
        )

    analyze_btn.click(
        fn=run_analysis,
        inputs=audio_in,
        outputs=[out_books_json, state_books],
    )

if __name__ == "__main__":
    demo.launch()