sound-classify / app.py
Isk5434's picture
Upload app.py with huggingface_hub
8471ef2 verified
# -*- coding: utf-8 -*-
import numpy as np
import librosa
import gradio as gr
from dataclasses import dataclass
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
"""音声分類.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1_wgDxDY2B0aYe_zbc1yZ603NBXlRc6Xe
"""
# ESN
@dataclass
class ESNConfig:
n_res: int = 500
spectral_radius: float = 0.9
leaking_rate: float = 0.3
input_scale: float = 0.5
sparsity: float = 0.95
ridge_alpha: float = 1e-3
seed: int = 42
class ESNClassifier:
def __init__(self, cfg: ESNConfig, n_in: int, n_classes: int):
self.cfg = cfg
self.n_in = n_in
self.n_classes = n_classes
self.rng = np.random.default_rng(cfg.seed)
self.W_in = (self.rng.uniform(-1, 1, (cfg.n_res, n_in)) * cfg.input_scale).astype(np.float32)
W = self.rng.uniform(-1, 1, (cfg.n_res, cfg.n_res)).astype(np.float32)
mask = self.rng.uniform(0, 1, W.shape) < cfg.sparsity
W[mask] = 0.0
v = self.rng.normal(size=(cfg.n_res,)).astype(np.float32)
v /= np.linalg.norm(v) + 1e-12
for _ in range(50):
v = W @ v
v /= np.linalg.norm(v) + 1e-12
approx = np.linalg.norm(W @ v)
if approx > 0:
W *= cfg.spectral_radius / approx
self.W_res = W
self.scaler = StandardScaler()
self.readout = Ridge(alpha=cfg.ridge_alpha)
self.reset_state()
def reset_state(self):
self.x = np.zeros(self.cfg.n_res, dtype=np.float32)
def step(self, u):
pre = self.W_in @ u + self.W_res @ self.x
x_new = np.tanh(pre)
a = self.cfg.leaking_rate
self.x = (1 - a) * self.x + a * x_new
return self.x
def sequence_feature(self, U):
self.reset_state()
states = []
for u in U:
states.append(self.step(u))
states = np.stack(states)
feat = np.concatenate([states.mean(0), states[-1]])
return feat
def fit(self, X_list, y_list):
feats = np.stack([self.sequence_feature(U) for U in X_list])
feats = self.scaler.fit_transform(feats)
Y = np.eye(self.n_classes)[y_list]
self.readout.fit(feats, Y)
def predict_proba_sequence(self, U):
feat = self.sequence_feature(U)[None]
feat = self.scaler.transform(feat)
y = self.readout.predict(feat)[0]
y -= np.max(y)
return np.exp(y) / (np.sum(np.exp(y)) + 1e-12)
def predict_step_proba(self, u):
x = self.step(u)
feat = np.concatenate([x, x])[None]
feat = self.scaler.transform(feat)
y = self.readout.predict(feat)[0]
y -= np.max(y)
return np.exp(y) / (np.sum(np.exp(y)) + 1e-12)
# Audio -> MFCC
SR = 16000
N_MFCC = 13
HOP = int(0.01 * SR)
WIN = int(0.025 * SR)
def _mono_float32(y):
y = y.astype(np.float32)
if y.ndim > 1:
y = y.mean(axis=1)
return y
def normalize_audio_tuple(audio):
# audio: (sr, y)
sr, y = audio
y = _mono_float32(y)
if sr != SR:
y = librosa.resample(y, orig_sr=sr, target_sr=SR)
sr = SR
y /= (np.max(np.abs(y)) + 1e-9)
return (sr, y)
def audio_to_sequence(audio):
sr, y = normalize_audio_tuple(audio)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, hop_length=HOP, n_fft=WIN)
return mfcc.T.astype(np.float32)
def chunk_to_seq(chunk):
if chunk is None:
return None
sr, y = chunk
if y is None:
return None
y = _mono_float32(y)
if len(y) < WIN:
return None
return audio_to_sequence(chunk)
# Data store (replay + relabel)
# each item: {"audio": (sr,y), "U": mfcc_seq, "label": str}
DATA = []
LABELS = []
MODEL = None
def probs_dict_from_p(p):
if p is None or len(LABELS) == 0:
return {}
return {LABELS[i]: float(p[i]) for i in range(min(len(LABELS), len(p)))}
def dataset_table():
# compact rows: idx, label, sec
rows = []
for i, item in enumerate(DATA):
sr, y = item["audio"]
dur = (len(y) / sr) if (sr and y is not None) else 0.0
rows.append([i, item["label"], round(dur, 2)])
return rows
def dataset_stats_text():
counts = {l: 0 for l in LABELS}
for item in DATA:
lab = item["label"]
counts[lab] = counts.get(lab, 0) + 1
parts = [f"{l}:{counts.get(l,0)}" for l in LABELS] if LABELS else ["(no labels)"]
return f"n={len(DATA)} | " + " ".join(parts)
# Training
def train_random(n_trials):
global MODEL
if len(LABELS) < 2:
return "ラベル不足(2種類以上)"
if len(DATA) < 6:
return "データ不足(目安: 合計6以上、各ラベル3以上)"
X = [d["U"] for d in DATA]
y = [LABELS.index(d["label"]) for d in DATA]
try:
X_tr, X_val, y_tr, y_val = train_test_split(
X, y, test_size=0.3, random_state=0, stratify=y
)
except Exception:
X_tr, X_val, y_tr, y_val = train_test_split(
X, y, test_size=0.3, random_state=0
)
best_acc = -1.0
best_model = None
best_cfg = None
for _ in range(int(n_trials)):
cfg = ESNConfig(
n_res=int(np.random.choice([200, 500, 800])),
spectral_radius=float(np.random.uniform(0.6, 1.2)),
leaking_rate=float(np.random.uniform(0.1, 0.8)),
input_scale=float(np.random.uniform(0.1, 1.0)),
ridge_alpha=float(10 ** np.random.uniform(-5, -2)),
sparsity=float(np.random.uniform(0.8, 0.98)),
seed=int(np.random.randint(0, 10_000_000)),
)
model = ESNClassifier(cfg, N_MFCC, len(LABELS))
model.fit(X_tr, y_tr)
preds = []
for U in X_val:
p = model.predict_proba_sequence(U)
preds.append(int(np.argmax(p)))
acc = accuracy_score(y_val, preds)
if acc > best_acc:
best_acc = acc
best_model = model
best_cfg = cfg
MODEL = best_model
return f"val acc: {best_acc:.3f} | cfg: {best_cfg}"
# Streaming inference
def stream_predict(chunk, state):
global MODEL
if state is None:
state = {"active": False}
if MODEL is None:
state["active"] = False
return "未学習", {}, state
U = chunk_to_seq(chunk)
if U is None:
if state.get("active", False):
MODEL.reset_state()
state["active"] = False
return "...", {}, state
if not state.get("active", False):
MODEL.reset_state()
state["active"] = True
p = None
for u in U:
p = MODEL.predict_step_proba(u)
pred = LABELS[int(np.argmax(p))]
return pred, probs_dict_from_p(p), state
# UI callbacks
def add_label_cb(label):
label = (label or "").strip()
if not label:
return gr.update(), dataset_table(), gr.update()
if label not in LABELS:
LABELS.append(label)
return gr.update(choices=LABELS, value=label), dataset_table(), gr.update(choices=LABELS)
def add_sample_cb(audio, label):
label = (label or "").strip()
if label not in LABELS:
return dataset_table(), gr.update(value=None)
if audio is None:
return dataset_table(), gr.update(value=None)
audio_n = normalize_audio_tuple(audio)
U = audio_to_sequence(audio_n)
if U is None or len(U) < 5:
return dataset_table(), gr.update(value=None)
DATA.append({"audio": audio_n, "U": U, "label": label})
return dataset_table(), gr.update(value=None)
def auto_add_sample_cb(audio, label):
"""録音完了(stop_recording)時に自動でDATAに追加"""
label = (label or "").strip()
if label not in LABELS:
return dataset_table(), gr.update(value=None), "⚠ ラベルを先に選択してください"
if audio is None:
return dataset_table(), gr.update(value=None), "待機中..."
audio_n = normalize_audio_tuple(audio)
U = audio_to_sequence(audio_n)
if U is None or len(U) < 5:
return dataset_table(), gr.update(value=None), "⚠ 音声が短すぎます(もう少し話してください)"
DATA.append({"audio": audio_n, "U": U, "label": label})
return dataset_table(), gr.update(value=None), f"✓ 保存完了! (idx={len(DATA)-1}, label={label})"
def undo_last_cb():
if len(DATA) == 0:
return dataset_table()
DATA.pop()
return dataset_table()
def reset_all_cb():
global MODEL
DATA.clear()
LABELS.clear()
MODEL = None
return (
dataset_table(),
gr.update(choices=[], value=None),
gr.update(choices=[], value=None),
gr.update(value=None),
None,
)
def clear_rec_cb():
return gr.update(value=None)
def on_select_row(evt: gr.SelectData):
# evt.index: (row, col) or row index depending on component
# For Dataframe select, evt.index is (row, col)
if evt is None or evt.index is None:
return None, gr.update(value=None), "未選択"
row = evt.index[0] if isinstance(evt.index, (tuple, list)) else int(evt.index)
if row < 0 or row >= len(DATA):
return None, gr.update(value=None), "範囲外"
item = DATA[row]
audio = item["audio"]
lab = item["label"]
return audio, gr.update(value=lab), f"selected idx={row}"
def apply_relabel_cb(table, new_label):
# table is not trusted as source of truth; we use selected_idx in state ideally,
# but keep simple: infer currently selected by last "selected idx=..."
# -> we will pass selected index via State.
return "内部: use state idx"
def relabel_selected_cb(selected_idx, new_label):
new_label = (new_label or "").strip()
if selected_idx is None or selected_idx < 0 or selected_idx >= len(DATA):
return dataset_table()
if new_label not in LABELS:
return dataset_table()
DATA[selected_idx]["label"] = new_label
return dataset_table()
def delete_selected_cb(selected_idx):
if selected_idx is None or selected_idx < 0 or selected_idx >= len(DATA):
return dataset_table(), None, gr.update(value=None)
DATA.pop(selected_idx)
return dataset_table(), None, gr.update(value=None)
# Compact UI — Copy.py風フェミニン・パステルデザイン + 写真風ブルー→グリーングラデーション
HEAD = """
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, viewport-fit=cover, user-scalable=no">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:wght@300;400;500;600&display=swap" rel="stylesheet">
<script>
/* ── 無音自動停止(Gradio公式 .stop-button/.record-button セレクタ使用) ── */
(function(){
const SILENCE_THRESH = 0.01;
const SPEECH_THRESH = 0.015;
const SILENCE_MS = 1500;
let ctx, analyser, src, silStart, spoke, on;
function stat(m){ var e=document.getElementById('rec_status_js'); if(e) e.textContent=m; }
function stop(){
on=false;
try{src&&src.disconnect()}catch(e){}
try{ctx&&ctx.close()}catch(e){}
src=null;ctx=null;analyser=null;
}
function monitor(stream){
stop();
on=true; spoke=false; silStart=null;
ctx=new(window.AudioContext||window.webkitAudioContext)();
analyser=ctx.createAnalyser(); analyser.fftSize=512;
src=ctx.createMediaStreamSource(stream); src.connect(analyser);
var buf=new Float32Array(analyser.fftSize);
function tick(){
if(!on)return;
analyser.getFloatTimeDomainData(buf);
var s=0; for(var i=0;i<buf.length;i++) s+=buf[i]*buf[i];
var rms=Math.sqrt(s/buf.length);
if(rms>SPEECH_THRESH){
spoke=true; silStart=null;
stat('録音中... 🎙️');
} else if(spoke && rms<SILENCE_THRESH){
if(!silStart) silStart=Date.now();
var left=Math.max(0,SILENCE_MS-(Date.now()-silStart));
stat('無音検出中... あと'+Math.ceil(left/1000)+'秒');
if(left<=0){
stat('自動停止 & 保存中...');
/* Gradio公式: .stop-button で停止ボタンを取得 */
var btn=document.querySelector('.stop-button');
console.log('[auto-stop] .stop-button found:', btn);
if(btn){ btn.click(); }
stop();
return;
}
} else if(!spoke){
stat('待機中... 話してください 🎤');
}
requestAnimationFrame(tick);
}
tick();
}
var orig=navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
navigator.mediaDevices.getUserMedia=function(c){
return orig(c).then(function(stream){
if(c&&c.audio){
console.log('[auto-stop] getUserMedia intercepted, starting monitor');
monitor(stream);
stream.getAudioTracks().forEach(function(t){t.addEventListener('ended',stop);});
}
return stream;
});
};
})();
</script>
"""
CSS = """
/* ========================================
グローバル: フェミニン・パステルテーマ
写真風 淡いブルー→ミントグリーン グラデーション
======================================== */
html {
scroll-behavior: smooth !important;
-webkit-overflow-scrolling: touch !important;
}
.gradio-container {
background: linear-gradient(175deg,
#c5dff0 0%,
#b8e0e8 15%,
#b2e5d8 30%,
#c8ecd0 50%,
#d5f0cc 70%,
#e0f4d8 85%,
#e8f6e0 100%) !important;
color: #1a1a1a !important;
font-family: 'Inter', 'Hiragino Kaku Gothic ProN', 'Noto Sans JP', -apple-system, BlinkMacSystemFont, sans-serif !important;
font-weight: 400 !important;
max-width: 100% !important;
padding: 0 !important;
min-height: 100vh !important;
}
footer { display: none !important; }
/* ========================================
ヘッダーヒーローセクション
======================================== */
.hero-section {
text-align: center !important;
padding: 48px 24px 32px 24px !important;
}
.hero-section h1 {
font-family: 'Cormorant Garamond', 'Georgia', 'Times New Roman', serif !important;
font-size: clamp(24px, 7vw, 38px) !important;
font-weight: 400 !important;
letter-spacing: 0.1em !important;
color: #2a3a2a !important;
text-transform: uppercase !important;
margin: 0 0 8px 0 !important;
line-height: 1.2 !important;
}
.hero-section p {
font-size: clamp(12px, 3vw, 14px) !important;
color: #4a5a4a !important;
letter-spacing: 0.04em !important;
margin: 0 !important;
}
/* ========================================
タブナビゲーション: ピル型パステル
======================================== */
div.tab-nav {
background: rgba(255,255,255,0.75) !important;
border: 1px solid rgba(0,0,0,0.06) !important;
border-radius: 22px !important;
padding: 5px !important;
margin: 16px 16px 20px 16px !important;
display: flex !important;
justify-content: center !important;
gap: 3px !important;
box-shadow: 0 2px 12px rgba(100,140,120,0.08) !important;
backdrop-filter: blur(8px) !important;
-webkit-backdrop-filter: blur(8px) !important;
}
div.tab-nav button {
background: transparent !important;
color: #3a4a3a !important;
border: none !important;
border-radius: 18px !important;
padding: 10px 16px !important;
font-family: 'Cormorant Garamond', 'Georgia', 'Times New Roman', serif !important;
font-size: 13px !important;
font-weight: 500 !important;
transition: all 0.25s ease !important;
letter-spacing: 0.1em !important;
}
div.tab-nav button.selected {
background: rgba(178,216,210,0.4) !important;
color: #1a2a1a !important;
box-shadow: 0 1px 8px rgba(178,216,210,0.25) !important;
}
/* ========================================
タブコンテンツ: 二重フレーム(Copy.py風)
======================================== */
.tabitem {
background: transparent !important;
border: none !important;
}
.tabitem > div {
position: relative !important;
background: rgba(255,255,255,0.92) !important;
border-radius: 0 !important;
padding: 32px 20px !important;
margin: 16px 18px 28px 18px !important;
border: 1.25px solid #8aaa8a !important;
box-shadow: 8px 8px 0px 0px #c0d8c0 !important;
backdrop-filter: blur(4px) !important;
-webkit-backdrop-filter: blur(4px) !important;
animation: softFadeIn 0.5s ease forwards;
}
/* ========================================
ラベル: セリフ体
======================================== */
label span, .label-wrap span {
color: #2a3a2a !important;
font-family: 'Cormorant Garamond', 'Georgia', 'Times New Roman', serif !important;
font-weight: 500 !important;
font-size: 13px !important;
letter-spacing: 0.06em !important;
}
/* ========================================
テキスト入力
======================================== */
input[type="text"], textarea, select {
background: rgba(255,255,255,0.8) !important;
border: 1px solid rgba(138,170,138,0.35) !important;
border-radius: 0 !important;
color: #1a1a1a !important;
font-weight: 400 !important;
}
input[type="text"]:focus, textarea:focus {
border-color: rgba(138,170,138,0.6) !important;
box-shadow: 0 0 0 3px rgba(178,216,210,0.2) !important;
outline: none !important;
}
/* Number input: シャープスタイル */
input[type="number"] {
background: #ffffff !important;
border: 1.25px solid #8aaa8a !important;
border-radius: 0 !important;
color: #1a1a1a !important;
font-family: 'Cormorant Garamond', 'Georgia', serif !important;
font-weight: 500 !important;
font-size: 13px !important;
text-align: center !important;
padding: 4px 6px !important;
box-shadow: 3px 3px 0px 0px #c0d8c0 !important;
outline: none !important;
-moz-appearance: textfield !important;
}
input[type="number"]:focus {
border-color: #6a8a6a !important;
box-shadow: 4px 4px 0px 0px #a8c8a8 !important;
}
input[type="number"]::-webkit-inner-spin-button,
input[type="number"]::-webkit-outer-spin-button {
-webkit-appearance: none !important;
margin: 0 !important;
}
/* ========================================
Dropdown: Copy.py風シャープスタイル
======================================== */
[data-testid="dropdown"] {
background: transparent !important;
border: none !important;
box-shadow: none !important;
border-radius: 0 !important;
}
[data-testid="dropdown"] > div,
[data-testid="dropdown"] .wrap,
[data-testid="dropdown"] .wrap-inner,
[data-testid="dropdown"] .secondary-wrap,
[data-testid="dropdown"] input,
[data-testid="dropdown"] .multiselect {
background: #ffffff !important;
border: none !important;
border-radius: 0 !important;
box-shadow: none !important;
outline: none !important;
}
[data-testid="dropdown"] .wrap,
[data-testid="dropdown"] .secondary-wrap {
border: 1px solid #5a7a5a !important;
box-shadow: 3px 3px 0px 0px #c0d8c0 !important;
padding: 8px 10px !important;
}
[data-testid="dropdown"] *:not(ul):not(ul *) {
background: #ffffff !important;
color: #1a1a1a !important;
border-radius: 0 !important;
}
ul.options, ul.options li, .options, .options .item,
.secondary-wrap .item, .secondary-wrap ul li {
background: #2a3a2a !important;
color: #ffffff !important;
border-radius: 0 !important;
}
ul.options li:hover, .options .item:hover, .secondary-wrap .item:hover {
background: #3a4a3a !important;
color: #ffffff !important;
}
/* ========================================
Slider: ラグジュアリー仕様
======================================== */
input[type="range"] {
-webkit-appearance: none !important;
appearance: none !important;
height: 3px !important;
background: linear-gradient(90deg,
#b2d8e8 0%,
#b2e0d4 50%,
#c8e8c0 100%) !important;
border-radius: 0 !important;
outline: none !important;
cursor: pointer !important;
overflow: visible !important;
margin: 12px 0 !important;
}
input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none !important;
appearance: none !important;
width: 14px !important;
height: 14px !important;
background: #3a5a3a !important;
border: 1.5px solid #8aaa8a !important;
border-radius: 0 !important;
transform: rotate(45deg) !important;
cursor: pointer !important;
box-shadow: 2px 2px 4px rgba(0,0,0,0.15) !important;
margin-top: -6px !important;
position: relative !important;
}
input[type="range"]::-moz-range-thumb {
width: 14px !important;
height: 14px !important;
background: #3a5a3a !important;
border: 1.5px solid #8aaa8a !important;
border-radius: 0 !important;
transform: rotate(45deg) !important;
cursor: pointer !important;
}
input[type="range"]::-webkit-slider-runnable-track {
height: 3px !important;
background: linear-gradient(90deg,
#b2d8e8 0%,
#b2e0d4 50%,
#c8e8c0 100%) !important;
border-radius: 0 !important;
overflow: visible !important;
}
[data-testid="slider"] {
background: rgba(255,255,255,0.6) !important;
border: 1.25px solid #a8c8a8 !important;
border-radius: 0 !important;
padding: 12px 14px 16px 14px !important;
box-shadow: 3px 3px 0px 0px #c0d8c0 !important;
overflow: visible !important;
}
[data-testid="slider"] > div,
[data-testid="slider"] .wrap,
[data-testid="slider"] .wrap-inner {
overflow: visible !important;
}
[data-testid="slider"] .label-wrap span,
[data-testid="slider"] label span {
font-family: 'Cormorant Garamond', 'Georgia', serif !important;
font-size: 11px !important;
letter-spacing: 0.12em !important;
text-transform: uppercase !important;
color: #4a6a4a !important;
}
[data-testid="slider"] button {
border-radius: 0 !important;
border: 1.25px solid #a8c8a8 !important;
background: #f0f5f0 !important;
padding: 4px 6px !important;
}
/* ========================================
Radioボタン: ダイヤモンド◆スタイル
======================================== */
.diamond-radio,
.diamond-radio *:not(input):not(span) {
background: transparent !important;
background-color: transparent !important;
box-shadow: none !important;
border-color: transparent !important;
}
.diamond-radio {
border: 2px solid transparent !important;
border-image: linear-gradient(135deg, #b2d8e8 0%, #b2e0d4 50%, #c8e8c0 100%) 1 !important;
border-radius: 0 !important;
padding: 12px 14px !important;
}
.diamond-radio label,
.diamond-radio .wrap label,
.diamond-radio label.svelte-1kcyvh9 {
display: flex !important;
align-items: center !important;
gap: 10px !important;
padding: 10px 12px !important;
cursor: pointer !important;
transition: all 0.2s ease !important;
border-bottom: 1px solid rgba(138,170,138,0.15) !important;
font-family: 'Cormorant Garamond', 'Georgia', serif !important;
font-size: 14px !important;
font-weight: 500 !important;
letter-spacing: 0.08em !important;
}
.diamond-radio label:last-child {
border-bottom: none !important;
}
.diamond-radio label:hover {
background: rgba(178,216,210,0.15) !important;
}
/* ラジオ丸→ダイヤモンド: あらゆるinput[type=radio]をカバー */
.diamond-radio input[type="radio"],
.diamond-radio input[type="radio"]::before,
.diamond-radio input[type="radio"]::after {
-webkit-appearance: none !important;
appearance: none !important;
border-radius: 0 !important;
}
.diamond-radio input[type="radio"] {
width: 14px !important;
height: 14px !important;
min-width: 14px !important;
border: 1.5px solid #8aaa8a !important;
background: #ffffff !important;
transform: rotate(45deg) !important;
cursor: pointer !important;
transition: all 0.25s ease !important;
box-shadow: 1px 1px 3px rgba(0,0,0,0.1) !important;
margin: 0 4px 0 0 !important;
padding: 0 !important;
flex-shrink: 0 !important;
}
.diamond-radio input[type="radio"]:checked {
background: linear-gradient(135deg, #a0cfe0 0%, #90d4c8 50%, #b0e0a0 100%) !important;
border-color: #7abaa0 !important;
box-shadow: 2px 2px 4px rgba(0,0,0,0.2) !important;
}
/* Gradio 6: 丸いsvg/spanインジケーターも上書き */
.diamond-radio .radio-circle,
.diamond-radio [class*="radio"] span:first-child,
.diamond-radio .item > div:first-child,
.diamond-radio .choice > div:first-child {
width: 14px !important;
height: 14px !important;
min-width: 14px !important;
border: 1.5px solid #8aaa8a !important;
border-radius: 0 !important;
background: #ffffff !important;
transform: rotate(45deg) !important;
box-shadow: 1px 1px 3px rgba(0,0,0,0.1) !important;
transition: all 0.25s ease !important;
}
.diamond-radio .selected .radio-circle,
.diamond-radio .selected [class*="radio"] span:first-child,
.diamond-radio .selected .item > div:first-child,
.diamond-radio .selected .choice > div:first-child {
background: linear-gradient(135deg, #a0cfe0 0%, #90d4c8 50%, #b0e0a0 100%) !important;
border-color: #7abaa0 !important;
box-shadow: 2px 2px 4px rgba(0,0,0,0.2) !important;
}
/* ========================================
Gradioボタン: VIEW MORE風 / セリフ体 / シャープ
======================================== */
button[class*="primary"], button[class*="secondary"],
button.lg {
border-radius: 0 !important;
font-family: 'Cormorant Garamond', 'Georgia', 'Times New Roman', 'YuMincho', serif !important;
font-weight: 500 !important;
font-size: 13px !important;
letter-spacing: 0.18em !important;
text-transform: uppercase !important;
padding: 15px 24px !important;
transition: all 0.3s ease !important;
touch-action: manipulation !important;
-webkit-tap-highlight-color: transparent !important;
}
/* Primary: ダーク背景 */
button[class*="primary"] {
background: #3a5a3a !important;
color: #d8e8d8 !important;
border: 1px solid #3a5a3a !important;
box-shadow: none !important;
}
button[class*="primary"]:hover {
background: #4a6a4a !important;
}
button[class*="primary"]:active {
background: #5a7a5a !important;
}
/* Secondary: 白背景 + 細線ボーダー */
button[class*="secondary"] {
background: #ffffff !important;
color: #4a5a4a !important;
border: 1px solid #8aaa8a !important;
box-shadow: none !important;
}
button[class*="secondary"]:hover {
background: #f5faf5 !important;
border-color: #6a8a6a !important;
}
button[class*="secondary"]:active {
background: #eef5ee !important;
}
/* ========================================
Textbox / Markdown
======================================== */
textarea {
background: rgba(255,255,255,0.7) !important;
color: #1a1a1a !important;
border-radius: 0 !important;
border: 1px solid #a8c8a8 !important;
font-family: 'SF Mono', 'Fira Code', ui-monospace, monospace !important;
font-size: 12px !important;
}
.prose, .markdown-text, .md {
color: #1a1a1a !important;
}
.prose h2, .prose h3 {
font-family: 'Cormorant Garamond', 'Georgia', 'Times New Roman', serif !important;
color: #2a3a2a !important;
font-weight: 400 !important;
letter-spacing: 0.08em !important;
}
/* ========================================
JSON表示
======================================== */
.json-holder, [data-testid="json"] {
background: rgba(255,255,255,0.5) !important;
border-radius: 0 !important;
border: 1px solid rgba(138,170,138,0.2) !important;
}
/* ========================================
Dataframe
======================================== */
.dataframe, table {
border-radius: 0 !important;
}
.dataframe th {
background: rgba(178,216,210,0.2) !important;
font-family: 'Cormorant Garamond', 'Georgia', serif !important;
font-weight: 500 !important;
letter-spacing: 0.06em !important;
}
/* ========================================
Audio コンポーネント
======================================== */
.audio-container, [data-testid="audio"] {
border-radius: 0 !important;
border: 1px solid #a8c8a8 !important;
}
/* ========================================
Label (確率表示)
======================================== */
[data-testid="label"] {
border-radius: 0 !important;
}
/* ========================================
セクションタイトル
======================================== */
.section-title {
text-align: center !important;
padding: 8px 16px !important;
}
.section-title h3 {
font-family: 'Cormorant Garamond', 'Georgia', 'Times New Roman', serif !important;
font-size: clamp(13px, 3.5vw, 16px) !important;
font-weight: 400 !important;
color: #4a6a4a !important;
letter-spacing: 0.15em !important;
text-transform: uppercase !important;
margin: 0 !important;
}
/* ========================================
アニメーション
======================================== */
@keyframes softFadeIn {
from { opacity: 0; transform: translateY(12px); }
to { opacity: 1; transform: translateY(0); }
}
/* ========================================
自動録音: ステータス表示
======================================== */
.rec-status {
text-align: center;
padding: 8px 12px;
font-family: 'Cormorant Garamond', 'Georgia', serif;
font-size: 14px;
color: #4a6a4a;
letter-spacing: 0.06em;
}
@keyframes recPulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.6; }
}
/* ========================================
スクロールバー
======================================== */
::-webkit-scrollbar { width: 4px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb {
background: rgba(138,170,138,0.3);
border-radius: 4px;
}
/* ========================================
Gradio内部padding/border補正
======================================== */
.block {
border: none !important;
background: transparent !important;
padding: 0 !important;
}
.form {
background: transparent !important;
border: none !important;
}
.container {
background: transparent !important;
}
.tabs {
background: transparent !important;
}
/* ========================================
テキスト色: 全体黒、Primaryボタンのみ白
======================================== */
* {
color: #1a1a1a;
}
button[class*="primary"],
button[class*="primary"] * {
color: #ffffff !important;
}
/* ========================================
レスポンシブ: PC = 中央固定幅
======================================== */
@media (min-width: 768px) {
.gradio-container > .main,
.gradio-container > div > .main {
max-width: 520px !important;
margin: 0 auto !important;
}
.tabitem > div {
margin: 16px auto 28px auto !important;
max-width: 480px !important;
}
div.tab-nav {
max-width: 480px !important;
margin: 16px auto 20px auto !important;
}
}
/* ========================================
スマホ特化
======================================== */
@media (max-width: 767px) {
div.tab-nav {
margin: 12px 12px 16px 12px !important;
padding: 4px !important;
}
div.tab-nav button {
padding: 9px 10px !important;
font-size: 12px !important;
}
.tabitem > div {
margin: 12px 12px 24px 12px !important;
padding: 24px 14px !important;
box-shadow: 6px 6px 0px 0px #c0d8c0 !important;
}
button[class*="primary"], button[class*="secondary"],
button.lg {
width: 100% !important;
padding: 14px 18px !important;
font-size: 14px !important;
}
label span, .label-wrap span {
font-size: 12px !important;
}
input[type="text"], input[type="number"], textarea, select {
font-size: 16px !important;
}
}
"""
with gr.Blocks() as demo:
# ヒーローセクション + リセットボタン
gr.HTML("""
<div class="hero-section">
<h1>Sound Classify</h1>
<p>Streaming ESN &#183; Record &#183; Learn &#183; Predict</p>
</div>
""")
reset_btn = gr.Button("Reset All", size="sm")
with gr.Tabs():
# ── 収集タブ(ラベル管理 + 録音 + データ一覧を統合) ──
with gr.Tab("収集"):
# ラベル追加
gr.Markdown("### ラベル追加")
with gr.Row():
label_box = gr.Textbox(label="新ラベル", placeholder="例: yes", scale=3)
add_btn = gr.Button("追加", size="lg", scale=1)
# 録音 & サンプル追加(自動停止)
gr.Markdown("### 録音(自動停止)")
label_dd = gr.Radio(choices=LABELS, label="ラベル選択", interactive=True, elem_classes=["diamond-radio"])
with gr.Column(elem_id="auto_rec_area"):
audio_rec = gr.Audio(sources=["microphone"], type="numpy", label="マイク(録音→自動停止→自動保存)")
gr.HTML('<div id="rec_status_js" class="rec-status">待機中... 録音ボタンを押してください</div>')
rec_status_md = gr.Markdown("", elem_classes=["rec-status"])
undo_btn = gr.Button("Undo", size="lg")
# データ一覧 & 編集
gr.Markdown("### データ一覧")
table = gr.Dataframe(
headers=["idx", "label", "sec"],
value=dataset_table(),
datatype=["number", "str", "number"],
row_count=(6, "dynamic"),
column_count=(3, "fixed"),
interactive=False,
elem_id="data_table"
)
selected_idx_state = gr.State(None)
replay_audio = gr.Audio(type="numpy", label="選択した音声(再生)", interactive=False)
relabel_dd = gr.Radio(choices=LABELS, label="ラベル修正", interactive=True, elem_classes=["diamond-radio"])
with gr.Row():
relabel_btn = gr.Button("ラベル更新", size="lg")
del_btn = gr.Button("削除", size="lg")
# ── 学習タブ ──
with gr.Tab("学習"):
trials = gr.Slider(3, 30, value=8, step=1, label="Trials")
train_btn = gr.Button("学習", variant="primary", size="lg")
train_log = gr.Textbox(label="学習ログ", interactive=False, lines=2)
# ── 推論タブ ──
with gr.Tab("推論"):
stream_audio = gr.Audio(sources=["microphone"], streaming=True, type="numpy", label="入力")
st = gr.State(None)
pred_box = gr.Textbox(label="推定", interactive=False)
prob_box = gr.Label(label="確率", num_top_classes=10)
stream_audio.stream(stream_predict, inputs=[stream_audio, st], outputs=[pred_box, prob_box, st])
# wiring
add_btn.click(add_label_cb, inputs=[label_box], outputs=[label_dd, table, relabel_dd])
undo_btn.click(undo_last_cb, inputs=[], outputs=[table])
reset_btn.click(reset_all_cb, inputs=[], outputs=[table, label_dd, relabel_dd, audio_rec, selected_idx_state])
# 録音完了時に自動保存(stop_recording = 停止ボタン押下)
audio_rec.stop_recording(auto_add_sample_cb, inputs=[audio_rec, label_dd], outputs=[table, audio_rec, rec_status_md])
# select row -> update state + replay + relabel dropdown value
def _select_and_store(evt: gr.SelectData):
if evt is None or evt.index is None:
return None, None, gr.update(value=None)
row = evt.index[0] if isinstance(evt.index, (tuple, list)) else int(evt.index)
if row < 0 or row >= len(DATA):
return None, None, gr.update(value=None)
item = DATA[row]
return row, item["audio"], gr.update(value=item["label"])
table.select(_select_and_store, inputs=None, outputs=[selected_idx_state, replay_audio, relabel_dd])
relabel_btn.click(relabel_selected_cb, inputs=[selected_idx_state, relabel_dd], outputs=[table])
del_btn.click(delete_selected_cb, inputs=[selected_idx_state], outputs=[table, selected_idx_state, relabel_dd])
train_btn.click(train_random, inputs=[trials], outputs=[train_log])
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
css=CSS,
head=HEAD,
theme=gr.themes.Base(
text_size=gr.themes.sizes.text_md,
font=[gr.themes.GoogleFont("Inter"), gr.themes.GoogleFont("Noto Sans JP")],
).set(
body_text_color="#1a1a1a",
body_text_color_subdued="#4a5a4a",
block_label_text_color="#2a3a2a",
block_title_text_color="#1a1a1a",
checkbox_label_text_color="#1a1a1a",
table_text_color="#1a1a1a",
link_text_color="#3a5a3a",
color_accent_soft="#c8e0d0",
input_background_fill="#ffffff",
input_background_fill_dark="#ffffff",
input_border_color="#8aaa8a",
input_border_color_dark="#8aaa8a",
),
)