Spaces:

ayaka68
/

voice2place

Sleeping

App Files Files Community

ayaka68 commited on Sep 4, 2025

Commit

8a8b25f

verified ·

1 Parent(s): c17f977

Upload 10 files

Browse files

Files changed (11) hide show

.gitattributes +6 -0
.streamlit:config.toml +2 -0
app.py +249 -0
hf.yaml +2 -0
images/aqua_museum.png +3 -0
images/lib_silent.png +3 -0
images/roof_garden.png +3 -0
images/shade_bol.png +3 -0
images/silent_atlier.png +3 -0
images/wind_root.png +3 -0
requirements.txt.txt +8 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/aqua_museum.png filter=lfs diff=lfs merge=lfs -text
+images/lib_silent.png filter=lfs diff=lfs merge=lfs -text
+images/roof_garden.png filter=lfs diff=lfs merge=lfs -text
+images/shade_bol.png filter=lfs diff=lfs merge=lfs -text
+images/silent_atlier.png filter=lfs diff=lfs merge=lfs -text
+images/wind_root.png filter=lfs diff=lfs merge=lfs -text

.streamlit:config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [browser]
2	+ gatherUsageStats = false

app.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import io, uuid, datetime as dt, csv
+import numpy as np
+import librosa, soundfile as sf
+import streamlit as st
+from audiorecorder import audiorecorder
+from pydub import AudioSegment
+st.set_page_config(page_title="Voice→Place Recommender", page_icon="🎙️", layout="centered")
+st.title("🎙️ 声の感情で『架空の場所』をレコメンド")
+st.caption("録音→感情推定（Arousal/Valence）→上位3スポット→評価→CSV保存（匿名）")
+# =========================
+# 架空の場所データ
+# =========================
+PLACES = [
+    {"place_id":"lib_silent", "name":"無音図書館",
+     "tags":["静けさ","集中","屋内"], "emo_key":"calm",
+     "image":"images/lib_silent.jpg"},
+    {"place_id":"aqua_museum", "name":"深海ガラス館",
+     "tags":["発見","学習","ひんやり","屋内"], "emo_key":"surprise",
+     "image":"images/aqua_museum.jpg"},
+    {"place_id":"roof_garden", "name":"雨上がりの屋上庭園",
+     "tags":["開放","共有","屋外","緑"], "emo_key":"joy",
+     "image":"images/roof_garden.jpg"},
+    {"place_id":"boulder_warehouse", "name":"影のボルダリング倉庫",
+	"tags":["発散","身体活動","屋内"], "emo_key":"release",
+	"image":"images/shade_bol.jpg"},
+    {"place_id":"atelier_mono", "name":"静寂のアトリエ",
+	"tags":["創作","集中","屋内"], "emo_key":"calm",
+	"image":"images/silent_atlier.jpg"},
+    {"place_id":"wind_birch", "name":"風鳴りの白樺道",
+	"tags":["自然","散歩","屋外","緑"], "emo_key":"joy",
+	"image":"images/wind_root.jpg"}
+]
+REASON_TAGS = ["静けさ","緑","水辺","発散","創作","交流","体験","学習","屋内","屋外","没入","回復"]
+# =========================
+# 特徴量抽出・推定ロジック
+# =========================
+def extract_features(y, sr):
+    yt, _ = librosa.effects.trim(y, top_db=30)
+    f0, _, _ = librosa.pyin(yt, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
+    f0_mean = np.nanmean(f0); f0_med = np.nanmedian(f0)
+    rms = librosa.feature.rms(y=yt).flatten(); energy_mean = float(np.mean(rms))
+    spec_cent = librosa.feature.spectral_centroid(y=yt, sr=sr).flatten(); sc_mean = float(np.mean(spec_cent))
+    zcr = librosa.feature.zero_crossing_rate(yt).flatten(); zcr_mean = float(np.mean(zcr))
+    return {
+        "f0_mean": float(f0_mean if not np.isnan(f0_mean) else 0.0),
+        "f0_med":  float(f0_med  if not np.isnan(f0_med)  else 0.0),
+        "energy_mean": energy_mean,
+        "spec_centroid": sc_mean,
+        "zcr_mean": zcr_mean,
+        "duration": len(yt)/sr
+    }
+def av_from_features(feat):
+    f0 = feat["f0_mean"]; en = feat["energy_mean"]; z = feat["zcr_mean"]
+    arousal = float(np.tanh((en*200) + (z*5)))
+    valence = float(np.tanh(((f0-170)/120) + en*30))
+    return arousal, valence
+def label_from_av(arousal, valence):
+    if valence >= 0.15 and arousal >= 0.15: return "joy"
+    if valence >= 0.15 and arousal < 0.15:  return "calm"
+    if valence < 0.15 and arousal >= 0.25: return "arousal_high_neg"
+    if arousal >= 0.15:                    return "surprise"
+    return "neutral"
+EMO_MAP_PRIORS = {
+    "joy": ["joy","surprise"], "calm": ["calm","joy"],
+    "surprise": ["surprise","joy"], "arousal_high_neg": ["release","surprise"],
+    "neutral": ["calm","joy","surprise"]
+}
+def score_places(emo_label):
+    priors = EMO_MAP_PRIORS.get(emo_label, ["calm","joy","surprise"])
+    scored = []
+    for p in PLACES:
+        base = 0.5
+        if p["emo_key"] == priors[0]: base += 0.5
+        if len(priors) > 1 and p["emo_key"] == priors[1]: base += 0.25
+        scored.append((base, p))
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [p for _, p in scored][:3]
+# =========================
+# ログ保存
+# =========================
+def ensure_logs():
+    import os
+    os.makedirs("logs", exist_ok=True)
+    path = "logs/oc_sessions.csv"
+    if not os.path.exists(path):
+        with open(path, "w", newline="", encoding="utf-8") as f:
+            csv.writer(f).writerow([
+                "session_id","ts","consent_research","save_audio",
+                "f0_mean","energy_mean","spec_centroid","zcr_mean","duration",
+                "arousal","valence","emo_label",
+                "exposed_ids","choice_id","rating_like","rating_vibe","reason_tags","comment"
+            ])
+    return path
+def append_log(row_dict):
+    path = ensure_logs()
+    with open(path, "a", newline="", encoding="utf-8") as f:
+        csv.writer(f).writerow([
+            row_dict.get("session_id"), row_dict.get("ts"),
+            row_dict.get("consent_research"), row_dict.get("save_audio"),
+            row_dict.get("f0_mean"), row_dict.get("energy_mean"),
+            row_dict.get("spec_centroid"), row_dict.get("zcr_mean"),
+            row_dict.get("duration"),
+            row_dict.get("arousal"), row_dict.get("valence"), row_dict.get("emo_label"),
+            ",".join(row_dict.get("exposed_ids", [])),
+            row_dict.get("choice_id"),
+            row_dict.get("rating_like"), row_dict.get("rating_vibe"),
+            "|".join(row_dict.get("reason_tags", [])),
+            row_dict.get("comment","")
+        ])
+# =========================
+# 音声をWAVに正規化
+# =========================
+def to_wav_bytes(any_bytes: bytes, target_sr=16000, mono=True) -> bytes:
+    if not any_bytes or len(any_bytes) == 0:
+        st.error("音声が空です。録音やアップロードを確認してください。"); st.stop()
+    try:
+        seg = AudioSegment.from_file(io.BytesIO(any_bytes))
+    except Exception as e:
+        st.error(f"音声を読み込めませんでした: {e}"); st.stop()
+    if mono: seg = seg.set_channels(1)
+    if target_sr: seg = seg.set_frame_rate(target_sr)
+    buf = io.BytesIO(); seg.export(buf, format="wav"); return buf.getvalue()
+# =========================
+# Session state 初期化
+# =========================
+for key, default in [
+    ("wav_bytes", None), ("recs", None), ("feat", None),
+    ("arousal", None), ("valence", None), ("emo_label", None)
+]:
+    if key not in st.session_state: st.session_state[key] = default
+# =========================
+# UI: 録音 / アップロード
+# =========================
+st.subheader("1) 録音またはアップロード")
+tab_rec, tab_upload = st.tabs(["🎤 録音する", "📁 ファイルを使う"])
+with tab_rec:
+    audio = audiorecorder("録音開始 ▶", "録音停止 ■")
+    if len(audio) > 0:
+        buf = io.BytesIO(); audio.export(buf, format="wav")
+        st.session_state["wav_bytes"] = buf.getvalue()
+        st.audio(st.session_state["wav_bytes"], format="audio/wav")
+        st.caption(f"録音サイズ: {len(st.session_state['wav_bytes'])} bytes")
+with tab_upload:
+    up = st.file_uploader("WAV/MP3/M4A を選択", type=["wav","mp3","m4a"])
+    if up is not None:
+        st.session_state["wav_bytes"] = up.read()
+        st.audio(st.session_state["wav_bytes"])
+        st.caption(f"アップロードサイズ: {len(st.session_state['wav_bytes'])} bytes")
+# =========================
+# UI: 同意
+# =========================
+st.subheader("2) 同意")
+consent = st.radio("研究利用の同意（匿名IDで特徴量と評価を保存します）",
+                   ["保存しない（体験のみ）", "匿名で保存する"], horizontal=True)
+save_audio = st.checkbox("音声ファイルも保存する（任意）", value=False)
+# =========================
+# 推定 & レコメンド
+# =========================
+if st.button("🔍 推定 & レコメンド", type="primary", use_container_width=True,
+             disabled=(st.session_state["wav_bytes"] is None)):
+    raw_bytes = st.session_state["wav_bytes"]
+    wav_bytes_fixed = to_wav_bytes(raw_bytes, target_sr=16000, mono=True)
+    try:
+        y, sr = librosa.load(io.BytesIO(wav_bytes_fixed), sr=16000, mono=True)
+    except Exception as e:
+        st.error(f"音声読み込みでエラー: {e}"); st.stop()
+    feat = extract_features(y, sr)
+    arousal, valence = av_from_features(feat)
+    emo_label = label_from_av(arousal, valence)
+    # 状態に保存（rerun 対策）
+    st.session_state["feat"] = feat
+    st.session_state["arousal"] = arousal
+    st.session_state["valence"] = valence
+    st.session_state["emo_label"] = emo_label
+    st.session_state["recs"] = score_places(emo_label)
+# 表示（推定が完了していれば出す）
+if st.session_state["recs"] is not None:
+    feat = st.session_state["feat"]; arousal = st.session_state["arousal"]
+    valence = st.session_state["valence"]; emo_label = st.session_state["emo_label"]
+    recs = st.session_state["recs"]
+    st.success(f"推定感情: **{emo_label}**  | Arousal: {arousal:.2f} / Valence: {valence:.2f}")
+    st.caption(f"F0_mean={feat['f0_mean']:.1f} Hz, Energy={feat['energy_mean']:.4f}, ZCR={feat['zcr_mean']:.3f}")
+    st.subheader("3) おすすめ（上位3件）")
+    cols = st.columns(3)
+    for i, p in enumerate(recs):
+        with cols[i]:
+            st.markdown(f"**{p['name']}**")
+            st.caption(f"タグ: {', '.join(p['tags'])}")
+    # =========================
+    # 4) 評価入力
+    # =========================
+    st.subheader("4) 評価")
+    choice_name = st.selectbox("第一候補を選んでください", [p["name"] for p in recs])
+    rating_like = st.slider("行ってみたい度（★）", 1, 5, 4)
+    rating_vibe = st.slider("気分に合う度（🎯）", 1, 5, 4)
+    reasons = st.multiselect("理由タグ（1–3個）", REASON_TAGS, max_selections=3)
+    comment = st.text_input("ひとことコメント（任意・20字）", max_chars=20)
+    # =========================
+    # 5) 保存
+    # =========================
+    if st.button("💾 ログ保存", use_container_width=True):
+        consent_research = (consent == "匿名で保存する")
+        if not consent_research:
+            st.info("体験のみモードです。研究ログは保存しません。")
+        else:
+            exposed_ids = [p["place_id"] for p in recs]
+            choice_id = next(p["place_id"] for p in recs if p["name"] == choice_name)
+            row = {
+                "session_id": f"oc-{uuid.uuid4().hex[:8]}",
+                "ts": dt.datetime.now().isoformat(timespec="seconds"),
+                "consent_research": consent_research,
+                "save_audio": (save_audio and consent_research),
+                "f0_mean": feat["f0_mean"], "energy_mean": feat["energy_mean"],
+                "spec_centroid": feat["spec_centroid"], "zcr_mean": feat["zcr_mean"],
+                "duration": feat["duration"],
+                "arousal": arousal, "valence": valence, "emo_label": emo_label,
+                "exposed_ids": exposed_ids, "choice_id": choice_id,
+                "rating_like": rating_like, "rating_vibe": rating_vibe,
+                "reason_tags": reasons, "comment": comment,
+            }
+            append_log(row)
+            if row["save_audio"]:
+                import os; os.makedirs("logs", exist_ok=True)
+                with open(f"logs/{row['session_id']}.wav", "wb") as f:
+                    f.write(st.session_state["wav_bytes"])
+            st.success("保存しました（logs/oc_sessions.csv）。")