Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -263,88 +263,21 @@ def add_sample_cb(audio, label):
|
|
| 263 |
DATA.append({"audio": audio_n, "U": U, "label": label})
|
| 264 |
return dataset_table(), gr.update(value=None)
|
| 265 |
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
return {"chunks": [], "speech_detected": False, "silence_count": 0, "saved": False, "status": "待機中"}
|
| 274 |
-
|
| 275 |
-
def rec_stream_cb(chunk, label, rec_state):
|
| 276 |
-
"""ストリーミング録音: 音声蓄積 + 無音検出 → 自動保存"""
|
| 277 |
-
if rec_state is None:
|
| 278 |
-
rec_state = _new_rec_state()
|
| 279 |
-
|
| 280 |
-
if rec_state.get("saved", False):
|
| 281 |
-
# 前回保存済み → リセット
|
| 282 |
-
rec_state = _new_rec_state()
|
| 283 |
-
|
| 284 |
-
if chunk is None:
|
| 285 |
-
return rec_state, dataset_table(), "待機中... マイクを開始してください"
|
| 286 |
-
|
| 287 |
-
sr, y = chunk
|
| 288 |
-
if y is None or len(y) < 10:
|
| 289 |
-
return rec_state, gr.update(), rec_state.get("status", "")
|
| 290 |
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
if rms > SILENCE_THRESH:
|
| 297 |
-
# 音声あり
|
| 298 |
-
rec_state["chunks"].append((sr, y.copy()))
|
| 299 |
-
rec_state["speech_detected"] = True
|
| 300 |
-
rec_state["silence_count"] = 0
|
| 301 |
-
n = len(rec_state["chunks"])
|
| 302 |
-
rec_state["status"] = f"録音中... 🎙️ ({n} chunks)"
|
| 303 |
-
else:
|
| 304 |
-
# 無音
|
| 305 |
-
if rec_state["speech_detected"]:
|
| 306 |
-
rec_state["silence_count"] = rec_state.get("silence_count", 0) + 1
|
| 307 |
-
remaining = max(0, SILENCE_CHUNKS_NEEDED - rec_state["silence_count"])
|
| 308 |
-
rec_state["status"] = f"無音検出中... あと{remaining}で自動保存"
|
| 309 |
-
|
| 310 |
-
if rec_state["silence_count"] >= SILENCE_CHUNKS_NEEDED:
|
| 311 |
-
# 十分な音声があれば保存
|
| 312 |
-
if len(rec_state["chunks"]) >= MIN_SPEECH_CHUNKS:
|
| 313 |
-
label = (label or "").strip()
|
| 314 |
-
if label in LABELS:
|
| 315 |
-
# チャンクを結合
|
| 316 |
-
all_y = []
|
| 317 |
-
final_sr = SR
|
| 318 |
-
for s, y_chunk in rec_state["chunks"]:
|
| 319 |
-
if s != SR:
|
| 320 |
-
y_chunk = librosa.resample(y_chunk, orig_sr=s, target_sr=SR)
|
| 321 |
-
all_y.append(y_chunk)
|
| 322 |
-
full_y = np.concatenate(all_y).astype(np.float32)
|
| 323 |
-
full_y /= (np.max(np.abs(full_y)) + 1e-9)
|
| 324 |
-
audio_n = (SR, full_y)
|
| 325 |
-
U = audio_to_sequence(audio_n)
|
| 326 |
-
if U is not None and len(U) >= 5:
|
| 327 |
-
DATA.append({"audio": audio_n, "U": U, "label": label})
|
| 328 |
-
rec_state["status"] = f"✓ 自動保存完了 (idx={len(DATA)-1})"
|
| 329 |
-
else:
|
| 330 |
-
rec_state["status"] = "音声が短すぎます"
|
| 331 |
-
else:
|
| 332 |
-
rec_state["status"] = "ラベルを選択してください"
|
| 333 |
-
|
| 334 |
-
rec_state["saved"] = True
|
| 335 |
-
rec_state["chunks"] = []
|
| 336 |
-
rec_state["speech_detected"] = False
|
| 337 |
-
rec_state["silence_count"] = 0
|
| 338 |
-
return rec_state, dataset_table(), rec_state["status"]
|
| 339 |
-
else:
|
| 340 |
-
rec_state["status"] = "音声が短すぎます。もう一度話してください"
|
| 341 |
-
rec_state["chunks"] = []
|
| 342 |
-
rec_state["speech_detected"] = False
|
| 343 |
-
rec_state["silence_count"] = 0
|
| 344 |
-
else:
|
| 345 |
-
rec_state["status"] = "待機中... 話してください 🎤"
|
| 346 |
-
|
| 347 |
-
return rec_state, gr.update(), rec_state.get("status", "")
|
| 348 |
|
| 349 |
def undo_last_cb():
|
| 350 |
if len(DATA) == 0:
|
|
@@ -408,6 +341,106 @@ HEAD = """
|
|
| 408 |
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 409 |
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 410 |
<link href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:wght@300;400;500;600&display=swap" rel="stylesheet">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
"""
|
| 412 |
|
| 413 |
CSS = """
|
|
@@ -1040,12 +1073,10 @@ with gr.Blocks() as demo:
|
|
| 1040 |
# 録音 & サンプル追加(自動停止)
|
| 1041 |
gr.Markdown("### 録音(自動停止)")
|
| 1042 |
label_dd = gr.Radio(choices=LABELS, label="ラベル選択", interactive=True, elem_classes=["diamond-radio"])
|
| 1043 |
-
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
-
audio_rec = gr.Audio(type="numpy", visible=False)
|
| 1048 |
-
add_sample_btn = gr.Button("追加", variant="primary", size="lg", visible=False)
|
| 1049 |
undo_btn = gr.Button("Undo", size="lg")
|
| 1050 |
|
| 1051 |
# データ一覧 & 編集
|
|
@@ -1082,12 +1113,11 @@ with gr.Blocks() as demo:
|
|
| 1082 |
|
| 1083 |
# wiring
|
| 1084 |
add_btn.click(add_label_cb, inputs=[label_box], outputs=[label_dd, table, relabel_dd])
|
| 1085 |
-
add_sample_btn.click(add_sample_cb, inputs=[audio_rec, label_dd], outputs=[table, audio_rec])
|
| 1086 |
undo_btn.click(undo_last_cb, inputs=[], outputs=[table])
|
| 1087 |
reset_btn.click(reset_all_cb, inputs=[], outputs=[table, label_dd, relabel_dd, audio_rec, selected_idx_state])
|
| 1088 |
|
| 1089 |
-
#
|
| 1090 |
-
|
| 1091 |
|
| 1092 |
# select row -> update state + replay + relabel dropdown value
|
| 1093 |
def _select_and_store(evt: gr.SelectData):
|
|
|
|
| 263 |
DATA.append({"audio": audio_n, "U": U, "label": label})
|
| 264 |
return dataset_table(), gr.update(value=None)
|
| 265 |
|
| 266 |
+
def auto_add_sample_cb(audio, label):
|
| 267 |
+
"""録音完了時に自動でDATAに追加"""
|
| 268 |
+
label = (label or "").strip()
|
| 269 |
+
if label not in LABELS:
|
| 270 |
+
return dataset_table(), gr.update(value=None), "⚠ ラベルを選択してください"
|
| 271 |
+
if audio is None:
|
| 272 |
+
return dataset_table(), gr.update(value=None), "待機中..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
+
audio_n = normalize_audio_tuple(audio)
|
| 275 |
+
U = audio_to_sequence(audio_n)
|
| 276 |
+
if U is None or len(U) < 5:
|
| 277 |
+
return dataset_table(), gr.update(value=None), "⚠ 音声が短すぎます"
|
| 278 |
|
| 279 |
+
DATA.append({"audio": audio_n, "U": U, "label": label})
|
| 280 |
+
return dataset_table(), gr.update(value=None), f"✓ 保存完了 (idx={len(DATA)-1})"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
def undo_last_cb():
|
| 283 |
if len(DATA) == 0:
|
|
|
|
| 341 |
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 342 |
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 343 |
<link href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:wght@300;400;500;600&display=swap" rel="stylesheet">
|
| 344 |
+
<script>
|
| 345 |
+
/* ── 無音自動停止: Web Audio API で音量監視 → 停止ボタン自動クリック ── */
|
| 346 |
+
(function(){
|
| 347 |
+
const SILENCE_THRESHOLD = 0.01;
|
| 348 |
+
const SILENCE_MS = 1500;
|
| 349 |
+
const SPEECH_THRESHOLD = 0.015;
|
| 350 |
+
let audioCtx, analyser, srcNode, silenceStart, speechDetected, monitoring;
|
| 351 |
+
|
| 352 |
+
function getRecArea() {
|
| 353 |
+
return document.getElementById('auto_rec_area');
|
| 354 |
+
}
|
| 355 |
+
function getStopBtn() {
|
| 356 |
+
const area = getRecArea();
|
| 357 |
+
if (!area) return null;
|
| 358 |
+
/* Gradio 6: 録音中は stop ボタン(■)が出る。aria-label で探す */
|
| 359 |
+
let btn = area.querySelector('button[aria-label="Stop recording"]');
|
| 360 |
+
if (btn) return btn;
|
| 361 |
+
btn = area.querySelector('button[aria-label="停止"]');
|
| 362 |
+
if (btn) return btn;
|
| 363 |
+
/* フォールバック: 録音中に表示される赤い■ボタンを探す */
|
| 364 |
+
const btns = area.querySelectorAll('button');
|
| 365 |
+
for (const b of btns) {
|
| 366 |
+
const svg = b.querySelector('svg');
|
| 367 |
+
if (svg) {
|
| 368 |
+
const rect = svg.querySelector('rect');
|
| 369 |
+
if (rect) return b; /* ■アイコンがある = stop */
|
| 370 |
+
}
|
| 371 |
+
}
|
| 372 |
+
return null;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
function startMonitoring(stream) {
|
| 376 |
+
if (monitoring) return;
|
| 377 |
+
monitoring = true;
|
| 378 |
+
speechDetected = false;
|
| 379 |
+
silenceStart = null;
|
| 380 |
+
audioCtx = new (window.AudioContext || window.webkitAudioContext)();
|
| 381 |
+
analyser = audioCtx.createAnalyser();
|
| 382 |
+
analyser.fftSize = 512;
|
| 383 |
+
srcNode = audioCtx.createMediaStreamSource(stream);
|
| 384 |
+
srcNode.connect(analyser);
|
| 385 |
+
const buf = new Float32Array(analyser.fftSize);
|
| 386 |
+
const statusEl = document.getElementById('rec_status_js');
|
| 387 |
+
|
| 388 |
+
function tick() {
|
| 389 |
+
if (!monitoring) return;
|
| 390 |
+
analyser.getFloatTimeDomainData(buf);
|
| 391 |
+
let sum = 0;
|
| 392 |
+
for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i];
|
| 393 |
+
const rms = Math.sqrt(sum / buf.length);
|
| 394 |
+
|
| 395 |
+
if (rms > SPEECH_THRESHOLD) {
|
| 396 |
+
speechDetected = true;
|
| 397 |
+
silenceStart = null;
|
| 398 |
+
if (statusEl) statusEl.textContent = '録音中... 🎙️';
|
| 399 |
+
} else if (speechDetected && rms < SILENCE_THRESHOLD) {
|
| 400 |
+
if (!silenceStart) silenceStart = Date.now();
|
| 401 |
+
const elapsed = Date.now() - silenceStart;
|
| 402 |
+
const remain = Math.max(0, Math.ceil((SILENCE_MS - elapsed) / 1000));
|
| 403 |
+
if (statusEl) statusEl.textContent = '無音検出中... あと' + remain + '秒';
|
| 404 |
+
if (elapsed >= SILENCE_MS) {
|
| 405 |
+
if (statusEl) statusEl.textContent = '自動停止...';
|
| 406 |
+
const stopBtn = getStopBtn();
|
| 407 |
+
if (stopBtn) stopBtn.click();
|
| 408 |
+
stopMonitoring();
|
| 409 |
+
return;
|
| 410 |
+
}
|
| 411 |
+
} else {
|
| 412 |
+
if (statusEl) statusEl.textContent = '待機中... 話してください 🎤';
|
| 413 |
+
}
|
| 414 |
+
requestAnimationFrame(tick);
|
| 415 |
+
}
|
| 416 |
+
tick();
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
function stopMonitoring() {
|
| 420 |
+
monitoring = false;
|
| 421 |
+
if (srcNode) { try { srcNode.disconnect(); } catch(e){} }
|
| 422 |
+
if (audioCtx) { try { audioCtx.close(); } catch(e){} }
|
| 423 |
+
srcNode = null; audioCtx = null; analyser = null;
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
/* MediaStream を横取り: getUserMedia を wrap */
|
| 427 |
+
const origGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
|
| 428 |
+
navigator.mediaDevices.getUserMedia = function(constraints) {
|
| 429 |
+
return origGetUserMedia(constraints).then(function(stream) {
|
| 430 |
+
if (constraints && constraints.audio) {
|
| 431 |
+
const area = getRecArea();
|
| 432 |
+
if (area) {
|
| 433 |
+
startMonitoring(stream);
|
| 434 |
+
stream.getAudioTracks().forEach(function(track) {
|
| 435 |
+
track.addEventListener('ended', stopMonitoring);
|
| 436 |
+
});
|
| 437 |
+
}
|
| 438 |
+
}
|
| 439 |
+
return stream;
|
| 440 |
+
});
|
| 441 |
+
};
|
| 442 |
+
})();
|
| 443 |
+
</script>
|
| 444 |
"""
|
| 445 |
|
| 446 |
CSS = """
|
|
|
|
| 1073 |
# 録音 & サンプル追加(自動停止)
|
| 1074 |
gr.Markdown("### 録音(自動停止)")
|
| 1075 |
label_dd = gr.Radio(choices=LABELS, label="ラベル選択", interactive=True, elem_classes=["diamond-radio"])
|
| 1076 |
+
with gr.Column(elem_id="auto_rec_area"):
|
| 1077 |
+
audio_rec = gr.Audio(sources=["microphone"], type="numpy", label="マイク(録音→自動停止→自動保存)")
|
| 1078 |
+
gr.HTML('<div id="rec_status_js" class="rec-status">待機中... 録音ボタンを押してください</div>')
|
| 1079 |
+
rec_status_md = gr.Markdown("", elem_classes=["rec-status"])
|
|
|
|
|
|
|
| 1080 |
undo_btn = gr.Button("Undo", size="lg")
|
| 1081 |
|
| 1082 |
# データ一覧 & 編集
|
|
|
|
| 1113 |
|
| 1114 |
# wiring
|
| 1115 |
add_btn.click(add_label_cb, inputs=[label_box], outputs=[label_dd, table, relabel_dd])
|
|
|
|
| 1116 |
undo_btn.click(undo_last_cb, inputs=[], outputs=[table])
|
| 1117 |
reset_btn.click(reset_all_cb, inputs=[], outputs=[table, label_dd, relabel_dd, audio_rec, selected_idx_state])
|
| 1118 |
|
| 1119 |
+
# 録音完了(停止)時に自動保存
|
| 1120 |
+
audio_rec.stop_recording(auto_add_sample_cb, inputs=[audio_rec, label_dd], outputs=[table, audio_rec, rec_status_md])
|
| 1121 |
|
| 1122 |
# select row -> update state + replay + relabel dropdown value
|
| 1123 |
def _select_and_store(evt: gr.SelectData):
|