Spaces:

ayaka68
/

voice2place

Sleeping

App Files Files Community

ayaka68 commited on Sep 5, 2025

Commit

7cc64b6

verified ·

1 Parent(s): e204bd0

Update app.py

Browse files

Files changed (1) hide show

app.py +384 -159

app.py CHANGED Viewed

@@ -1,26 +1,54 @@
 # =========================
-# app.py (AIモデル搭載版)
 # =========================
 import os
-import io
-import uuid
-import datetime as dt
-import csv
-import base64
-import random
 import warnings
-# --- 警告の抑制 ---
 warnings.filterwarnings('ignore')
-# --- ライブラリのインポート ---
 import numpy as np
 import soundfile as sf
 import streamlit as st
 from audiorecorder import audiorecorder
 from pydub import AudioSegment
 import torch
-from transformers import AutoModelForAudioClassification, AutoFeatureExtractor
 # =========================
 # 架空の場所データ
@@ -46,12 +74,11 @@ REASON_TAGS = ["静けさ","緑","水辺","発散","創作","交流","体験","
 # =========================
 # AIモデル関連の関数
 # =========================
 @st.cache_resource
 def load_model():
-    """AIモデルをロードしてStreamlitのキャッシュに保存"""
     try:
-        model_name = "Mizuiro-inc/emotion2vec-base-japanese"
         with st.spinner('AIモデルを初回ロード中... (数分かかる場合があります)'):
             feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
@@ -60,248 +87,446 @@ def load_model():
         return feature_extractor, model
     except Exception as e:
         st.error(f"モデルのロードに失敗しました: {e}")
-        st.stop()
-def predict_emotion(audio_bytes):
-    """音声データからAIが感情を予測する"""
     try:
-        feature_extractor, model = load_model()
-        # 音声データを16kHzのWAV形式に変換
         wav_bytes_16k = to_wav_bytes(audio_bytes, target_sr=16000)
         y, sr = sf.read(io.BytesIO(wav_bytes_16k), dtype="float32")
-        # 音声が長すぎる場合は最初の30秒のみ使用
-        max_duration = 30  # 秒
         max_samples = sr * max_duration
         if len(y) > max_samples:
             y = y[:max_samples]
             st.warning("音声が30秒を超えているため、最初の30秒のみを分析します")
-        # 特徴量を抽出し、PyTorchテンソルに変換
         inputs = feature_extractor(y, sampling_rate=sr, return_tensors="pt", padding=True)
-        # AIモデルで予測を実行
         with torch.no_grad():
             logits = model(**inputs).logits
-        # 最も確率の高い感情ラベルを取得
         predicted_id = torch.argmax(logits, dim=-1).item()
         predicted_label = model.config.id2label[predicted_id]
-        # 各感情の確率も計算 (表示用)
         probabilities = torch.softmax(logits, dim=-1)[0]
         all_scores = {model.config.id2label[i]: prob.item() for i, prob in enumerate(probabilities)}
-        return predicted_label, all_scores
     except Exception as e:
-        st.error(f"感情予測中にエラーが発生しました: {e}")
-        return "neutral", {"neutral": 1.0}
 # =========================
-# 汎用関数
 # =========================
 def to_wav_bytes(any_bytes: bytes, target_sr=16000, mono=True) -> bytes:
-    """様々な形式の音声をWAV形式のbytesに変換"""
-    if not any_bytes:
-        st.error("音声が空です。")
         st.stop()
     try:
         seg = AudioSegment.from_file(io.BytesIO(any_bytes))
-        if mono:
-            seg = seg.set_channels(1)
-        if target_sr:
-            seg = seg.set_frame_rate(target_sr)
-        buf = io.BytesIO()
-        seg.export(buf, format="wav")
-        return buf.getvalue()
     except Exception as e:
-        st.error(f"音声ファイルを処理できませんでした: {e}")
         st.stop()
 def audio_player_bytes(b: bytes, mime="audio/wav"):
-    """音声データをUIに表示するためのHTMLを生成"""
-    if not b:
         return
     b64 = base64.b64encode(b).decode("utf-8")
     st.markdown(
-        f'<audio controls preload="metadata" style="width:100%">'
-        f'<source src="data:{mime};base64,{b64}" type="{mime}">'
-        f'</audio>',
-        unsafe_allow_html=True
     )
-def score_places_by_ai(emo_label, top_k=4):
-    """AIの感情ラベルに基づいて場所を推薦する"""
-    # emotion2vec-base-japaneseの実際のラベルに対応
-    label_to_emo_key = {
-        'happy': ['joy', 'surprise'],
-        'sad': ['calm', 'joy'],
-        'angry': ['release', 'calm'],
-        'neutral': ['calm', 'surprise', 'joy'],
-        'surprise': ['surprise', 'joy'],
-        'disgust': ['release', 'calm'],
-        'fear': ['calm', 'release']
     }
-    priors = label_to_emo_key.get(emo_label, ['calm', 'joy'])
     scored = []
     for p in PLACES:
         base = 0.5
-        if p["emo_key"] == priors[0]:
-            base += 0.5
-        if len(priors) > 1 and p["emo_key"] == priors[1]:
-            base += 0.25
-        scored.append((base + random.uniform(-0.02, 0.02), p))
     scored.sort(key=lambda x: x[0], reverse=True)
-    # 多様性を確保するロジック
-    candidates = [p for _, p in scored]
     picked, seen = [], set()
     for p in candidates:
-        if p["emo_key"] not in seen:
-            picked.append(p)
-            seen.add(p["emo_key"])
-        if len(picked) >= top_k:
-            break
     if len(picked) < top_k:
         for p in candidates:
-            if p not in picked:
                 picked.append(p)
-            if len(picked) >= top_k:
-                break
     return picked
 # =========================
-# メイン処理（ここから実行開始）
 # =========================
 def main():
     st.set_page_config(page_title="Voice→Place Recommender", page_icon="🎙️", layout="centered")
-    st.title("🎙️ 声の感情で『架空の場所』をレコメンド (AI版)")
     st.caption("録音→AI感情推定→上位スポット→評価→CSV保存（匿名）")
-    # ---- Session state 初期化 ----
-    for key, default in [("wav_bytes", None), ("recs", None), ("emo_label", None), ("scores", None), ("rec_key", 0)]:
-        if key not in st.session_state:
-            st.session_state[key] = default
-    # ---- 1) 録音 / アップロード ----
     st.subheader("1) 録音またはアップロード")
-    tab_rec, tab_upload = st.tabs(["🎤 録音する", "📁 ファイルを使う"])
     with tab_rec:
         audio = audiorecorder("録音開始 ▶", "録音停止 ■", key=f"rec_{st.session_state['rec_key']}")
         if len(audio) > 0:
-            buf = io.BytesIO()
-            audio.export(buf, format="wav")
             st.session_state["wav_bytes"] = buf.getvalue()
-            audio_player_bytes(st.session_state["wav_bytes"])
         if st.button("🧹 クリアして新しく録音", use_container_width=True):
-            for k in ["wav_bytes", "recs", "emo_label", "scores"]:
                 st.session_state[k] = None
             st.session_state["rec_key"] += 1
             st.rerun()
     with tab_upload:
-        up = st.file_uploader("WAV/MP3/M4A を選択", type=["wav", "mp3", "m4a"])
-        if up:
-            st.session_state["wav_bytes"] = up.read()
-            audio_player_bytes(st.session_state["wav_bytes"])
-    # ---- 2) 同意 ----
     st.subheader("2) 同意")
-    consent = st.radio("研究利用の同意", ["保存しない（体験のみ）", "匿名で保存する"], horizontal=True)
-    # ---- 推定 & レコメンド実行 ----
-    if st.button("🔍 AIで推定 & レコメンド", type="primary", use_container_width=True,
                  disabled=(st.session_state["wav_bytes"] is None)):
-        with st.spinner('AIが感情を分析中...🤖'):
             raw_bytes = st.session_state["wav_bytes"]
-            emo_label, all_scores = predict_emotion(raw_bytes)
-            st.session_state.update({
-                "emo_label": emo_label,
-                "scores": all_scores,
-                "recs": score_places_by_ai(emo_label)
-            })
         st.success("分析が完了しました！")
-    # ---- 結果表示 ----
-    if st.session_state.get("recs"):
-        emo_label = st.session_state["emo_label"]
         scores = st.session_state["scores"]
         recs = st.session_state["recs"]
-        st.subheader("分析結果")
         # 感情の日本語表示
         emotion_japanese = {
-            'happy': '😊 喜び',
-            'sad': '😢 悲しみ',
-            'angry': '😠 怒り',
-            'neutral': '😐 中立',
-            'surprise': '😲 驚き',
-            'disgust': '😤 嫌悪',
-            'fear': '😨 恐怖'
         }
-        col1, col2 = st.columns([0.6, 0.4])
-        with col1:
-            display_emotion = emotion_japanese.get(emo_label, emo_label)
-            st.success(f"**AIの推定感情: {display_emotion}**")
-            # スコアを日本語で表示
-            japanese_scores = {}
-            for label, score in scores.items():
-                jp_label = emotion_japanese.get(label, label)
-                japanese_scores[jp_label] = score
-            st.write("感情スコアの詳細:")
-            st.bar_chart(japanese_scores)
-        with col2:
-            st.write("この感情におすすめの場所:")
-            if recs:
-                st.image(recs[0]["image"], use_container_width=True)
-                st.markdown(f"**{recs[0]['name']}**")
-                st.caption(f"タグ: {', '.join(recs[0]['tags'])}")
         st.subheader("3) おすすめ（上位4件）")
         cols = st.columns(4)
         for i, p in enumerate(recs[:4]):
-            with cols[i]:
-                if "image" in p:
                     st.image(p["image"], use_container_width=True)
                 st.markdown(f"**{p['name']}**")
                 st.caption(f"タグ: {', '.join(p['tags'])}")
-        # ---- 4) 評価入力 & 5) 保存 ----
-        with st.form("feedback_form"):
-            st.subheader("4) 評価")
-            choice_name = st.selectbox("第一候補を選んでください", [p["name"] for p in recs[:4]])
-            rating_like = st.slider("行ってみたい度（★）", 1, 5, 4)
-            rating_vibe = st.slider("気分に合う度（🎯）", 1, 5, 4)
-            reasons = st.multiselect("理由タグ（1–3個）", REASON_TAGS, max_selections=3)
-            comment = st.text_input("ひとことコメント（任意・20字）", max_chars=20)
-            if st.form_submit_button("💾 ログ保存", use_container_width=True):
-                st.info("ログ保存機能は現在開発中です。")
-    # ---- フッター ----
     st.divider()
-    if st.button("▶ 次の人を試す（状態をクリア）", use_container_width=True):
-        for k in ["wav_bytes", "recs", "emo_label", "scores"]:
-            if st.session_state.get(k):
-                st.session_state[k] = None
         st.session_state["rec_key"] += 1
         st.rerun()
-# =========================
 # エントリーポイント
-# =========================
 if __name__ == "__main__":
     main()

 # =========================
+# streamlit_app.py 日本語AIモデル版
 # =========================
 import os
+import tempfile
 import warnings
+import logging
+# ロギングレベルを設定してFontconfigの警告を抑制
+logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR)
+logging.getLogger('matplotlib').setLevel(logging.ERROR)
+# すべての警告を抑制
 warnings.filterwarnings('ignore')
+# 権限/キャッシュ対策
+os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false"
+os.environ["NUMBA_DISABLE_JIT"] = "1"
+# Matplotlibの設定ファイルを作成
+mpl_config_dir = tempfile.mkdtemp()
+os.environ["MPLCONFIGDIR"] = mpl_config_dir
+# matplotlibrcファイルを作成
+matplotlibrc_path = os.path.join(mpl_config_dir, 'matplotlibrc')
+with open(matplotlibrc_path, 'w') as f:
+    f.write("""
+backend: Agg
+font.family: sans-serif
+font.sans-serif: DejaVu Sans
+axes.unicode_minus: False
+""")
+# その他のインポート
+import io, uuid, datetime as dt, csv, base64, json, random
 import numpy as np
 import soundfile as sf
 import streamlit as st
 from audiorecorder import audiorecorder
 from pydub import AudioSegment
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib import rcParams
 import torch
+from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
+# フォント設定
+rcParams["font.family"] = "DejaVu Sans"
+rcParams["axes.unicode_minus"] = False
 # =========================
 # 架空の場所データ
 # =========================
 # AIモデル関連の関数
 # =========================
 @st.cache_resource
 def load_model():
+    """日本語音声感情認識モデルをロード"""
     try:
+        model_name = "imprt/kushinada-hubert-base-jtes-er"
         with st.spinner('AIモデルを初回ロード中... (数分かかる場合があります)'):
             feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
         return feature_extractor, model
     except Exception as e:
         st.error(f"モデルのロードに失敗しました: {e}")
+        st.info("音声特徴量ベースの分析に切り替えます")
+        return None, None
+def predict_emotion_ai(audio_bytes):
+    """AIモデルで音声から感情を予測"""
+    feature_extractor, model = load_model()
+    if feature_extractor is None or model is None:
+        # AIモデルが使えない場合は特徴量ベースにフォールバック
+        return predict_emotion_features(audio_bytes)
     try:
+        # 音声データを16kHzに変換
         wav_bytes_16k = to_wav_bytes(audio_bytes, target_sr=16000)
         y, sr = sf.read(io.BytesIO(wav_bytes_16k), dtype="float32")
+        # 30秒以上の場合は最初の30秒のみ使用
+        max_duration = 30
         max_samples = sr * max_duration
         if len(y) > max_samples:
             y = y[:max_samples]
             st.warning("音声が30秒を超えているため、最初の30秒のみを分析します")
+        # 特徴量抽出と予測
         inputs = feature_extractor(y, sampling_rate=sr, return_tensors="pt", padding=True)
         with torch.no_grad():
             logits = model(**inputs).logits
+        # 予測結果
         predicted_id = torch.argmax(logits, dim=-1).item()
         predicted_label = model.config.id2label[predicted_id]
+        # 確率スコア
         probabilities = torch.softmax(logits, dim=-1)[0]
         all_scores = {model.config.id2label[i]: prob.item() for i, prob in enumerate(probabilities)}
+        return predicted_label, all_scores, "AI"
     except Exception as e:
+        st.warning(f"AI予測��にエラーが発生しました: {e}")
+        return predict_emotion_features(audio_bytes)
 # =========================
+# 音声特徴量ベースの関数（フォールバック用）
 # =========================
+def extract_features(y, sr):
+    """音声から特徴量を抽出"""
+    # 簡易トリム
+    abs_y = np.abs(y)
+    thr = 0.01 * (abs_y.max() + 1e-9)
+    idx = np.where(abs_y > thr)[0]
+    if idx.size >= 2:
+        y = y[idx[0]:idx[-1]+1]
+    # RMS（エネルギー）
+    energy_mean = float(np.sqrt(np.mean(y**2) + 1e-12))
+    # スペクトル重心
+    n = len(y)
+    win = np.hanning(n) if n >= 512 else np.ones_like(y)
+    y_win = y * win
+    spec = np.fft.rfft(y_win)
+    mag = np.abs(spec) + 1e-12
+    freqs = np.fft.rfftfreq(len(y_win), d=1.0/sr)
+    sc_mean = float((freqs * mag).sum() / mag.sum())
+    # ZCR（符号反転率）
+    zc = (y[:-1] * y[1:] < 0).astype(np.float32)
+    zcr_mean = float(zc.mean()) if zc.size else 0.0
+    # F0（基本周波数）
+    fmin, fmax = 80.0, 600.0
+    if len(y) < int(sr / fmin) + 2:
+        f0_est = 0.0
+    else:
+        corr = np.correlate(y, y, mode='full')[len(y)-1:]
+        lmin = max(1, int(sr / fmax))
+        lmax = min(len(corr) - 1, int(sr / fmin))
+        seg = corr[lmin:lmax] if lmax > lmin else np.array([])
+        if seg.size > 0:
+            lag = lmin + int(np.argmax(seg))
+            f0_est = float(sr / lag) if lag > 0 else 0.0
+        else:
+            f0_est = 0.0
+    return {
+        "f0_mean": float(f0_est),
+        "energy_mean": energy_mean,
+        "spec_centroid": sc_mean,
+        "zcr_mean": zcr_mean,
+        "duration": len(y)/sr
+    }
+def predict_emotion_features(audio_bytes):
+    """音声特徴量から感情を推定（フォールバック）"""
+    wav_bytes_16k = to_wav_bytes(audio_bytes, target_sr=16000)
+    y, sr = sf.read(io.BytesIO(wav_bytes_16k), dtype="float32")
+    feat = extract_features(y, sr)
+    # 特徴量から感情を推定
+    f0 = feat["f0_mean"]
+    en = feat["energy_mean"]
+    z = feat["zcr_mean"]
+    # Arousal/Valenceを計算
+    arousal = float(np.tanh(160*en + 4*z))
+    valence = float(np.tanh(((f0-170)/120) + 15*en))
+    # 感情ラベルを決定
+    if valence >= 0.22 and arousal >= 0.22:
+        label = "happiness"
+    elif valence >= 0.22 and arousal < 0.22:
+        label = "neutral"  # calm
+    elif valence < 0.10 and arousal >= 0.30:
+        label = "anger"
+    elif valence < 0.10 and arousal < 0.18:
+        label = "sadness"
+    else:
+        label = "neutral"
+    # 擬似的なスコア
+    scores = {
+        "happiness": 0.0,
+        "anger": 0.0,
+        "sadness": 0.0,
+        "neutral": 0.0
+    }
+    scores[label] = 0.7
+    scores["neutral"] += 0.3
+    return label, scores, "Features"
+# =========================
+# 共通関数
+# =========================
 def to_wav_bytes(any_bytes: bytes, target_sr=16000, mono=True) -> bytes:
+    """音声をWAV形式に変換"""
+    if not any_bytes or len(any_bytes) == 0:
+        st.error("音声が空です。録音やアップロードを確認してください。")
         st.stop()
     try:
         seg = AudioSegment.from_file(io.BytesIO(any_bytes))
     except Exception as e:
+        st.error(f"音声を読み込めませんでした: {e}")
         st.stop()
+    if mono: seg = seg.set_channels(1)
+    if target_sr: seg = seg.set_frame_rate(target_sr)
+    buf = io.BytesIO()
+    seg.export(buf, format="wav")
+    return buf.getvalue()
 def audio_player_bytes(b: bytes, mime="audio/wav"):
+    """音声プレイヤーを表示"""
+    if not b:
         return
     b64 = base64.b64encode(b).decode("utf-8")
     st.markdown(
+        f"""
+        <audio controls preload="metadata" style="width:100%">
+          <source src="data:{mime};base64,{b64}" type="{mime}">
+          Your browser does not support the audio element.
+        </audio>
+        """,
+        unsafe_allow_html=True,
     )
+def score_places(emo_label, top_k=4, diversity=True):
+    """感情に基づいて場所を推薦"""
+    # JTESの感情ラベルと場所のマッピング
+    EMO_MAP_PRIORS = {
+        "happiness": ["joy", "surprise"],
+        "anger": ["release", "calm"],
+        "sadness": ["calm", "joy"],
+        "neutral": ["calm", "surprise", "joy"],
+        # 特徴量ベースのラベル用
+        "joy": ["joy","surprise"],
+        "calm": ["calm","joy"],
+        "surprise": ["surprise","joy"],
+        "release": ["release","calm"],
     }
+    priors = EMO_MAP_PRIORS.get(emo_label, ["calm","joy","surprise"])
     scored = []
     for p in PLACES:
         base = 0.5
+        if p["emo_key"] == priors[0]: base += 0.5
+        if len(priors) > 1 and p["emo_key"] == priors[1]: base += 0.25
+        jitter = random.uniform(-0.02, 0.02)
+        scored.append((base + jitter, p))
     scored.sort(key=lambda x: x[0], reverse=True)
+    candidates = [p for _, p in scored[:max(top_k, 4)]]
+    if not diversity:
+        return candidates[:top_k]
+    # 多様化
     picked, seen = [], set()
     for p in candidates:
+        k = p["emo_key"]
+        if k not in seen:
+            picked.append(p); seen.add(k)
+        if len(picked) >= top_k: break
     if len(picked) < top_k:
         for p in candidates:
+            if p not in picked:
                 picked.append(p)
+            if len(picked) >= top_k: break
     return picked
 # =========================
+# 感情マップ描画
+# =========================
+def plot_emotion_map(emotion_label, scores, method="AI"):
+    """感情分析結果をビジュアル化"""
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), dpi=150)
+    # 左: 感情スコアの棒グラフ
+    emotion_japanese = {
+        'happiness': '😊 喜び',
+        'anger': '😠 怒り',
+        'sadness': '😢 悲しみ',
+        'neutral': '😐 中立'
+    }
+    labels = []
+    values = []
+    colors = []
+    color_map = {
+        'happiness': '#FF6B6B',
+        'anger': '#FFA94D',
+        'sadness': '#868E96',
+        'neutral': '#51CF66'
+    }
+    for label, score in scores.items():
+        jp_label = emotion_japanese.get(label, label)
+        labels.append(jp_label)
+        values.append(score)
+        colors.append(color_map.get(label, '#74C0FC'))
+    bars = ax1.bar(labels, values, color=colors, alpha=0.8)
+    ax1.set_ylim(0, 1)
+    ax1.set_ylabel('Score', fontsize=12)
+    ax1.set_title(f'Emotion Scores ({method})', fontsize=14, fontweight='bold')
+    ax1.grid(axis='y', alpha=0.3)
+    # 数値を棒の上に表示
+    for bar, value in zip(bars, values):
+        height = bar.get_height()
+        ax1.text(bar.get_x() + bar.get_width()/2., height + 0.01,
+                f'{value:.2f}', ha='center', va='bottom', fontsize=10)
+    # 右: 感情の円グラフ
+    sizes = [score for score in scores.values() if score > 0.05]
+    labels_pie = [emotion_japanese.get(label, label) for label, score in scores.items() if score > 0.05]
+    colors_pie = [color_map.get(label, '#74C0FC') for label, score in scores.items() if score > 0.05]
+    wedges, texts, autotexts = ax2.pie(sizes, labels=labels_pie, colors=colors_pie,
+                                        autopct='%1.0f%%', startangle=90,
+                                        textprops={'fontsize': 11})
+    # 推定された感情を強調
+    current_jp = emotion_japanese.get(emotion_label, emotion_label)
+    ax2.set_title(f'Result: {current_jp}', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    return fig
+# =========================
+# メイン処理
 # =========================
 def main():
     st.set_page_config(page_title="Voice→Place Recommender", page_icon="🎙️", layout="centered")
+    st.title("🎙️ 声の感情で『架空の場所』をレコメンド")
     st.caption("録音→AI感情推定→上位スポット→評価→CSV保存（匿名）")
+    # Session state 初期化
+    for key, default in [
+        ("wav_bytes", None), ("recs", None), ("feat", None),
+        ("emotion_label", None), ("scores", None), ("method", None),
+        ("rec_key", 0),
+    ]:
+        if key not in st.session_state: st.session_state[key] = default
+    # UI: 録音 / アップロード
     st.subheader("1) 録音またはアップロード")
+    # 403エラーの対処法
+    with st.warning("⚠️ ファイルアップロードで403エラーが出る場合"):
+        st.markdown("""
+        **推奨方法：録音機能を使用してください**
+        1. 🎤 **録音する**タブを使用
+        2. PCやスマホで音声を再生しながら録音
+        3. または直接マイクに向かって話す
+        """)
+    tab_rec, tab_upload = st.tabs(["🎤 録音する（推奨）", "📁 ファイルを使う"])
     with tab_rec:
         audio = audiorecorder("録音開始 ▶", "録音停止 ■", key=f"rec_{st.session_state['rec_key']}")
         if len(audio) > 0:
+            buf = io.BytesIO(); audio.export(buf, format="wav")
             st.session_state["wav_bytes"] = buf.getvalue()
+            audio_player_bytes(st.session_state["wav_bytes"], mime="audio/wav")
+            st.caption(f"録音サイズ: {len(st.session_state['wav_bytes']) / 1024:.1f} KB")
         if st.button("🧹 クリアして新しく録音", use_container_width=True):
+            for k in ["wav_bytes","recs","feat","emotion_label","scores","method"]:
                 st.session_state[k] = None
             st.session_state["rec_key"] += 1
             st.rerun()
     with tab_upload:
+        uploaded_file = st.file_uploader(
+            "音声ファイルを選択（WAV推奨）",
+            type=["wav", "mp3", "m4a"],
+            accept_multiple_files=False
+        )
+        if uploaded_file is not None:
+            try:
+                bytes_data = uploaded_file.getvalue()
+                st.session_state["wav_bytes"] = bytes_data
+                st.success(f"✅ ファイル読み込み成功: {uploaded_file.name}")
+                st.caption(f"ファイルサイズ: {len(bytes_data) / 1024:.1f} KB")
+                audio_player_bytes(bytes_data, mime="audio/wav")
+            except Exception as e:
+                st.error(f"❌ ファイル読み込みエラー")
+                st.exception(e)
+                st.info("💡 代わりに録音機能をお試しください")
+    # UI: 同意
     st.subheader("2) 同意")
+    consent = st.radio("研究利用の同意（匿名IDで特徴量と評価を保存します）",
+                       ["保存しない（体験のみ）", "匿名で保存する"], horizontal=True)
+    save_audio = st.checkbox("音声ファイルも保存する（任意）", value=False)
+    # 分析方法の選択
+    analysis_method = st.radio("分析方法",
+                               ["AIモデル（推奨）", "音声特徴量ベース"],
+                               horizontal=True)
+    # 推定 & レコメンド
+    if st.button("🔍 推定 & レコメンド", type="primary", use_container_width=True,
                  disabled=(st.session_state["wav_bytes"] is None)):
+        with st.spinner('感情を分析中...'):
             raw_bytes = st.session_state["wav_bytes"]
+            if analysis_method == "AIモデル（推奨）":
+                emotion_label, scores, method = predict_emotion_ai(raw_bytes)
+            else:
+                emotion_label, scores, method = predict_emotion_features(raw_bytes)
+            # 状態に保存
+            st.session_state["emotion_label"] = emotion_label
+            st.session_state["scores"] = scores
+            st.session_state["method"] = method
+            st.session_state["recs"] = score_places(emotion_label, top_k=4, diversity=True)
         st.success("分析が完了しました！")
+    # 表示（推定が完了していれば出す）
+    if st.session_state["recs"] is not None:
+        emotion_label = st.session_state["emotion_label"]
         scores = st.session_state["scores"]
+        method = st.session_state["method"]
         recs = st.session_state["recs"]
         # 感情の日本語表示
         emotion_japanese = {
+            'happiness': '喜び',
+            'anger': '怒り',
+            'sadness': '悲しみ',
+            'neutral': '中立',
+            'joy': '喜び',
+            'calm': '落ち着き',
+            'surprise': '驚き',
+            'release': '発散'
         }
+        display_emotion = emotion_japanese.get(emotion_label, emotion_label)
+        st.success(f"推定感情: **{display_emotion}**")
+        # 感情の説明
+        emotion_explanations = {
+            "happiness": "喜びや楽しさを感じています",
+            "joy": "喜びや楽しさを感じています",
+            "calm": "落ち着いて穏やかな状態です",
+            "surprise": "驚きや興奮を感じています",
+            "anger": "怒りやイライラを感じています",
+            "sadness": "悲しみや元気のない状態です",
+            "neutral": "特に強い感情はない中立状態です",
+            "release": "発散や解放を求めています"
+        }
+        if emotion_label in emotion_explanations:
+            st.info(f"💡 {emotion_explanations[emotion_label]}")
+        # 感情マップ表示
+        st.subheader("感情分析結果")
+        fig = plot_emotion_map(emotion_label, scores, method)
+        st.pyplot(fig, clear_figure=True)
+        # おすすめ表示（上位4件）
         st.subheader("3) おすすめ（上位4件）")
         cols = st.columns(4)
         for i, p in enumerate(recs[:4]):
+            with cols[i % 4]:
+                if "image" in p:
                     st.image(p["image"], use_container_width=True)
                 st.markdown(f"**{p['name']}**")
                 st.caption(f"タグ: {', '.join(p['tags'])}")
+        # 評価入力
+        st.subheader("4) 評価")
+        choice_name = st.selectbox("第一候補を選んでください", [p["name"] for p in recs[:4]])
+        rating_like = st.slider("行ってみたい度（★）", 1, 5, 4)
+        rating_vibe = st.slider("気分に合う度（🎯）", 1, 5, 4)
+        reasons = st.multiselect("理由タグ（1–3個）", REASON_TAGS, max_selections=3)
+        comment = st.text_input("ひとことコメント（任意・20字）", max_chars=20)
+        # 保存
+        if st.button("💾 ログ保存", use_container_width=True):
+            consent_research = (consent == "匿名で保存する")
+            if not consent_research:
+                st.info("体験のみモードです。研究ログは保存しません。")
+            else:
+                st.success("保存機能は開発中です。")
     st.divider()
+    if st.button("▶ 次の人を録音する（状態をクリア）", use_container_width=True):
+        for k in ["wav_bytes","recs","emotion_label","scores","method"]:
+            st.session_state[k] = None
         st.session_state["rec_key"] += 1
         st.rerun()
 # エントリーポイント
 if __name__ == "__main__":
     main()