Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -263,6 +263,89 @@ def add_sample_cb(audio, label):
|
|
| 263 |
DATA.append({"audio": audio_n, "U": U, "label": label})
|
| 264 |
return dataset_table(), gr.update(value=None)
|
| 265 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
def undo_last_cb():
|
| 267 |
if len(DATA) == 0:
|
| 268 |
return dataset_table()
|
|
@@ -325,212 +408,6 @@ HEAD = """
|
|
| 325 |
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 326 |
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 327 |
<link href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:wght@300;400;500;600&display=swap" rel="stylesheet">
|
| 328 |
-
<script>
|
| 329 |
-
// ── Auto-stop recording with silence detection ──
|
| 330 |
-
window._autoRec = {
|
| 331 |
-
mediaStream: null,
|
| 332 |
-
audioCtx: null,
|
| 333 |
-
analyser: null,
|
| 334 |
-
scriptNode: null,
|
| 335 |
-
chunks: [],
|
| 336 |
-
isRecording: false,
|
| 337 |
-
speechDetected: false,
|
| 338 |
-
silenceStart: 0,
|
| 339 |
-
SILENCE_THRESH: 0.015,
|
| 340 |
-
SILENCE_DURATION: 1.5,
|
| 341 |
-
MIN_SPEECH_DURATION: 0.3,
|
| 342 |
-
speechStart: 0,
|
| 343 |
-
|
| 344 |
-
async start() {
|
| 345 |
-
if (this.isRecording) return;
|
| 346 |
-
try {
|
| 347 |
-
this.mediaStream = await navigator.mediaDevices.getUserMedia({audio: true});
|
| 348 |
-
} catch(e) {
|
| 349 |
-
const el = document.getElementById('rec_status');
|
| 350 |
-
if (el) el.textContent = 'マイク許可が必要です';
|
| 351 |
-
return;
|
| 352 |
-
}
|
| 353 |
-
this.audioCtx = new (window.AudioContext || window.webkitAudioContext)({sampleRate: 16000});
|
| 354 |
-
const source = this.audioCtx.createMediaStreamSource(this.mediaStream);
|
| 355 |
-
this.analyser = this.audioCtx.createAnalyser();
|
| 356 |
-
this.analyser.fftSize = 2048;
|
| 357 |
-
source.connect(this.analyser);
|
| 358 |
-
|
| 359 |
-
// ScriptProcessorNode to capture raw PCM
|
| 360 |
-
this.scriptNode = this.audioCtx.createScriptProcessor(4096, 1, 1);
|
| 361 |
-
this.chunks = [];
|
| 362 |
-
this.isRecording = true;
|
| 363 |
-
this.speechDetected = false;
|
| 364 |
-
this.silenceStart = 0;
|
| 365 |
-
this.speechStart = 0;
|
| 366 |
-
|
| 367 |
-
const self = this;
|
| 368 |
-
this.scriptNode.onaudioprocess = function(e) {
|
| 369 |
-
if (!self.isRecording) return;
|
| 370 |
-
const input = e.inputBuffer.getChannelData(0);
|
| 371 |
-
const buf = new Float32Array(input.length);
|
| 372 |
-
buf.set(input);
|
| 373 |
-
self.chunks.push(buf);
|
| 374 |
-
|
| 375 |
-
// RMS calculation
|
| 376 |
-
let sum = 0;
|
| 377 |
-
for (let i = 0; i < input.length; i++) sum += input[i] * input[i];
|
| 378 |
-
const rms = Math.sqrt(sum / input.length);
|
| 379 |
-
const now = self.audioCtx.currentTime;
|
| 380 |
-
|
| 381 |
-
// Update status
|
| 382 |
-
const el = document.getElementById('rec_status');
|
| 383 |
-
const bar = document.getElementById('rec_level');
|
| 384 |
-
if (bar) bar.style.width = Math.min(rms * 500, 100) + '%';
|
| 385 |
-
|
| 386 |
-
if (rms > self.SILENCE_THRESH) {
|
| 387 |
-
// Speech detected
|
| 388 |
-
if (!self.speechDetected) {
|
| 389 |
-
self.speechDetected = true;
|
| 390 |
-
self.speechStart = now;
|
| 391 |
-
}
|
| 392 |
-
self.silenceStart = 0;
|
| 393 |
-
if (el) el.textContent = '録音中... 🎙️';
|
| 394 |
-
} else {
|
| 395 |
-
// Silence
|
| 396 |
-
if (self.speechDetected) {
|
| 397 |
-
const speechDur = now - self.speechStart;
|
| 398 |
-
if (speechDur >= self.MIN_SPEECH_DURATION) {
|
| 399 |
-
if (self.silenceStart === 0) {
|
| 400 |
-
self.silenceStart = now;
|
| 401 |
-
} else if (now - self.silenceStart >= self.SILENCE_DURATION) {
|
| 402 |
-
// Auto stop!
|
| 403 |
-
if (el) el.textContent = '保存中...';
|
| 404 |
-
self.stop(true);
|
| 405 |
-
return;
|
| 406 |
-
}
|
| 407 |
-
}
|
| 408 |
-
if (el && self.silenceStart > 0) {
|
| 409 |
-
const remaining = self.SILENCE_DURATION - (now - self.silenceStart);
|
| 410 |
-
el.textContent = '無音検出中... ' + remaining.toFixed(1) + 's';
|
| 411 |
-
}
|
| 412 |
-
} else {
|
| 413 |
-
if (el) el.textContent = '待機中... 話してください';
|
| 414 |
-
}
|
| 415 |
-
}
|
| 416 |
-
};
|
| 417 |
-
|
| 418 |
-
source.connect(this.scriptNode);
|
| 419 |
-
this.scriptNode.connect(this.audioCtx.destination);
|
| 420 |
-
|
| 421 |
-
const el = document.getElementById('rec_status');
|
| 422 |
-
if (el) el.textContent = '待機中... 話してください';
|
| 423 |
-
const btn = document.getElementById('btn_auto_rec');
|
| 424 |
-
if (btn) {
|
| 425 |
-
btn.textContent = '録音中...';
|
| 426 |
-
btn.classList.add('recording');
|
| 427 |
-
}
|
| 428 |
-
},
|
| 429 |
-
|
| 430 |
-
stop(autoSave) {
|
| 431 |
-
if (!this.isRecording) return;
|
| 432 |
-
this.isRecording = false;
|
| 433 |
-
|
| 434 |
-
// Stop media
|
| 435 |
-
if (this.scriptNode) { this.scriptNode.disconnect(); this.scriptNode = null; }
|
| 436 |
-
if (this.mediaStream) { this.mediaStream.getTracks().forEach(t => t.stop()); this.mediaStream = null; }
|
| 437 |
-
|
| 438 |
-
const btn = document.getElementById('btn_auto_rec');
|
| 439 |
-
if (btn) {
|
| 440 |
-
btn.textContent = '録音開始';
|
| 441 |
-
btn.classList.remove('recording');
|
| 442 |
-
}
|
| 443 |
-
|
| 444 |
-
if (autoSave && this.chunks.length > 0 && this.speechDetected) {
|
| 445 |
-
// Concatenate chunks
|
| 446 |
-
let totalLen = 0;
|
| 447 |
-
for (const c of this.chunks) totalLen += c.length;
|
| 448 |
-
const fullAudio = new Float32Array(totalLen);
|
| 449 |
-
let offset = 0;
|
| 450 |
-
for (const c of this.chunks) { fullAudio.set(c, offset); offset += c.length; }
|
| 451 |
-
|
| 452 |
-
// Trim trailing silence (remove last SILENCE_DURATION worth of samples)
|
| 453 |
-
const sr = this.audioCtx ? this.audioCtx.sampleRate : 16000;
|
| 454 |
-
const trimSamples = Math.floor(this.SILENCE_DURATION * sr);
|
| 455 |
-
const trimmedLen = Math.max(sr, totalLen - trimSamples);
|
| 456 |
-
const trimmed = fullAudio.slice(0, trimmedLen);
|
| 457 |
-
|
| 458 |
-
// Convert to WAV blob
|
| 459 |
-
const wav = this._encodeWAV(trimmed, sr);
|
| 460 |
-
const blob = new Blob([wav], {type: 'audio/wav'});
|
| 461 |
-
|
| 462 |
-
// Set to Gradio hidden audio input
|
| 463 |
-
this._setGradioAudio(blob);
|
| 464 |
-
}
|
| 465 |
-
|
| 466 |
-
if (this.audioCtx) { this.audioCtx.close(); this.audioCtx = null; }
|
| 467 |
-
this.chunks = [];
|
| 468 |
-
|
| 469 |
-
const el = document.getElementById('rec_status');
|
| 470 |
-
if (el) el.textContent = autoSave ? '自動保存完了 ✓' : '停止';
|
| 471 |
-
},
|
| 472 |
-
|
| 473 |
-
_encodeWAV(samples, sampleRate) {
|
| 474 |
-
const buffer = new ArrayBuffer(44 + samples.length * 2);
|
| 475 |
-
const view = new DataView(buffer);
|
| 476 |
-
function writeStr(o, s) { for (let i = 0; i < s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); }
|
| 477 |
-
writeStr(0, 'RIFF');
|
| 478 |
-
view.setUint32(4, 36 + samples.length * 2, true);
|
| 479 |
-
writeStr(8, 'WAVE');
|
| 480 |
-
writeStr(12, 'fmt ');
|
| 481 |
-
view.setUint32(16, 16, true);
|
| 482 |
-
view.setUint16(20, 1, true);
|
| 483 |
-
view.setUint16(22, 1, true);
|
| 484 |
-
view.setUint32(24, sampleRate, true);
|
| 485 |
-
view.setUint32(28, sampleRate * 2, true);
|
| 486 |
-
view.setUint16(32, 2, true);
|
| 487 |
-
view.setUint16(34, 16, true);
|
| 488 |
-
writeStr(36, 'data');
|
| 489 |
-
view.setUint32(40, samples.length * 2, true);
|
| 490 |
-
for (let i = 0; i < samples.length; i++) {
|
| 491 |
-
let s = Math.max(-1, Math.min(1, samples[i]));
|
| 492 |
-
view.setInt16(44 + i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
|
| 493 |
-
}
|
| 494 |
-
return buffer;
|
| 495 |
-
},
|
| 496 |
-
|
| 497 |
-
_setGradioAudio(blob) {
|
| 498 |
-
// Find the hidden audio input and set the file
|
| 499 |
-
const url = URL.createObjectURL(blob);
|
| 500 |
-
const hiddenAudio = document.querySelector('#hidden_audio_input audio, #hidden_audio_input input[type="file"]');
|
| 501 |
-
|
| 502 |
-
// Use DataTransfer to programmatically set file on Gradio's file input
|
| 503 |
-
const file = new File([blob], 'recording.wav', {type: 'audio/wav'});
|
| 504 |
-
const dt = new DataTransfer();
|
| 505 |
-
dt.items.add(file);
|
| 506 |
-
|
| 507 |
-
// Find file input inside hidden audio component
|
| 508 |
-
const container = document.getElementById('hidden_audio_input');
|
| 509 |
-
if (!container) return;
|
| 510 |
-
|
| 511 |
-
// Gradio 6: Upload via drag event or input change
|
| 512 |
-
const fileInput = container.querySelector('input[type="file"]');
|
| 513 |
-
if (fileInput) {
|
| 514 |
-
fileInput.files = dt.files;
|
| 515 |
-
fileInput.dispatchEvent(new Event('change', {bubbles: true}));
|
| 516 |
-
// Auto-click submit after short delay
|
| 517 |
-
setTimeout(() => {
|
| 518 |
-
const submitBtn = document.getElementById('btn_auto_submit');
|
| 519 |
-
if (submitBtn) submitBtn.click();
|
| 520 |
-
}, 500);
|
| 521 |
-
return;
|
| 522 |
-
}
|
| 523 |
-
|
| 524 |
-
// Fallback: dispatch drop event
|
| 525 |
-
const dropEvent = new DragEvent('drop', {bubbles: true, dataTransfer: dt});
|
| 526 |
-
container.dispatchEvent(dropEvent);
|
| 527 |
-
setTimeout(() => {
|
| 528 |
-
const submitBtn = document.getElementById('btn_auto_submit');
|
| 529 |
-
if (submitBtn) submitBtn.click();
|
| 530 |
-
}, 500);
|
| 531 |
-
}
|
| 532 |
-
};
|
| 533 |
-
</script>
|
| 534 |
"""
|
| 535 |
|
| 536 |
CSS = """
|
|
@@ -1033,64 +910,20 @@ textarea {
|
|
| 1033 |
}
|
| 1034 |
|
| 1035 |
/* ========================================
|
| 1036 |
-
自動録音:
|
| 1037 |
======================================== */
|
| 1038 |
-
.
|
| 1039 |
text-align: center;
|
| 1040 |
-
padding:
|
| 1041 |
-
}
|
| 1042 |
-
#btn_auto_rec {
|
| 1043 |
-
display: inline-block;
|
| 1044 |
-
padding: 16px 40px;
|
| 1045 |
-
border: 2px solid transparent;
|
| 1046 |
-
border-image: linear-gradient(135deg, #b2d8e8 0%, #b2e0d4 50%, #c8e8c0 100%) 1;
|
| 1047 |
-
background: transparent;
|
| 1048 |
font-family: 'Cormorant Garamond', 'Georgia', serif;
|
| 1049 |
-
font-size:
|
| 1050 |
-
|
| 1051 |
-
letter-spacing: 0.
|
| 1052 |
-
text-transform: uppercase;
|
| 1053 |
-
color: #2a3a2a;
|
| 1054 |
-
cursor: pointer;
|
| 1055 |
-
transition: all 0.3s ease;
|
| 1056 |
-
touch-action: manipulation;
|
| 1057 |
-
}
|
| 1058 |
-
#btn_auto_rec:hover {
|
| 1059 |
-
background: rgba(178,216,210,0.15);
|
| 1060 |
-
}
|
| 1061 |
-
#btn_auto_rec.recording {
|
| 1062 |
-
border-image: none;
|
| 1063 |
-
border-color: #d45050;
|
| 1064 |
-
color: #d45050;
|
| 1065 |
-
animation: recPulse 1.2s ease infinite;
|
| 1066 |
}
|
| 1067 |
@keyframes recPulse {
|
| 1068 |
0%, 100% { opacity: 1; }
|
| 1069 |
50% { opacity: 0.6; }
|
| 1070 |
}
|
| 1071 |
-
.rec-level-wrap {
|
| 1072 |
-
margin: 12px auto 0;
|
| 1073 |
-
width: 80%;
|
| 1074 |
-
max-width: 280px;
|
| 1075 |
-
height: 4px;
|
| 1076 |
-
background: rgba(138,170,138,0.2);
|
| 1077 |
-
border-radius: 0;
|
| 1078 |
-
overflow: hidden;
|
| 1079 |
-
}
|
| 1080 |
-
.rec-level-bar {
|
| 1081 |
-
height: 100%;
|
| 1082 |
-
width: 0%;
|
| 1083 |
-
background: linear-gradient(90deg, #b2d8e8, #90d4c8, #b0e0a0);
|
| 1084 |
-
transition: width 0.08s ease;
|
| 1085 |
-
}
|
| 1086 |
-
#rec_status {
|
| 1087 |
-
display: block;
|
| 1088 |
-
margin-top: 8px;
|
| 1089 |
-
font-family: 'Cormorant Garamond', 'Georgia', serif;
|
| 1090 |
-
font-size: 13px;
|
| 1091 |
-
color: #4a6a4a;
|
| 1092 |
-
letter-spacing: 0.06em;
|
| 1093 |
-
}
|
| 1094 |
|
| 1095 |
/* ========================================
|
| 1096 |
スクロールバー
|
|
@@ -1204,28 +1037,16 @@ with gr.Blocks() as demo:
|
|
| 1204 |
label_box = gr.Textbox(label="新ラベル", placeholder="例: yes", scale=3)
|
| 1205 |
add_btn = gr.Button("追加", size="lg", scale=1)
|
| 1206 |
|
| 1207 |
-
# 録音 & サンプル追加
|
| 1208 |
-
gr.Markdown("### 録音")
|
| 1209 |
label_dd = gr.Radio(choices=LABELS, label="ラベル選択", interactive=True, elem_classes=["diamond-radio"])
|
| 1210 |
-
|
| 1211 |
-
|
| 1212 |
-
gr.
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
|
| 1217 |
-
</button>
|
| 1218 |
-
<div class="rec-level-wrap"><div class="rec-level-bar" id="rec_level"></div></div>
|
| 1219 |
-
<span id="rec_status">ラベルを選択して録音開始</span>
|
| 1220 |
-
</div>
|
| 1221 |
-
""")
|
| 1222 |
-
|
| 1223 |
-
# 隠し音声入力(JSからWAVを受け取る)
|
| 1224 |
-
audio_rec = gr.Audio(type="numpy", label="録音データ", visible=False, elem_id="hidden_audio_input")
|
| 1225 |
-
add_sample_btn = gr.Button("自動保存", variant="primary", size="lg", visible=False, elem_id="btn_auto_submit")
|
| 1226 |
-
with gr.Row():
|
| 1227 |
-
undo_btn = gr.Button("Undo", size="lg")
|
| 1228 |
-
rerec_btn = gr.Button("Clear", size="lg")
|
| 1229 |
|
| 1230 |
# データ一覧 & 編集
|
| 1231 |
gr.Markdown("### データ一覧")
|
|
@@ -1234,7 +1055,7 @@ with gr.Blocks() as demo:
|
|
| 1234 |
value=dataset_table(),
|
| 1235 |
datatype=["number", "str", "number"],
|
| 1236 |
row_count=(6, "dynamic"),
|
| 1237 |
-
|
| 1238 |
interactive=False,
|
| 1239 |
elem_id="data_table"
|
| 1240 |
)
|
|
@@ -1263,9 +1084,11 @@ with gr.Blocks() as demo:
|
|
| 1263 |
add_btn.click(add_label_cb, inputs=[label_box], outputs=[label_dd, table, relabel_dd])
|
| 1264 |
add_sample_btn.click(add_sample_cb, inputs=[audio_rec, label_dd], outputs=[table, audio_rec])
|
| 1265 |
undo_btn.click(undo_last_cb, inputs=[], outputs=[table])
|
| 1266 |
-
rerec_btn.click(clear_rec_cb, inputs=[], outputs=[audio_rec])
|
| 1267 |
reset_btn.click(reset_all_cb, inputs=[], outputs=[table, label_dd, relabel_dd, audio_rec, selected_idx_state])
|
| 1268 |
|
|
|
|
|
|
|
|
|
|
| 1269 |
# select row -> update state + replay + relabel dropdown value
|
| 1270 |
def _select_and_store(evt: gr.SelectData):
|
| 1271 |
if evt is None or evt.index is None:
|
|
|
|
| 263 |
DATA.append({"audio": audio_n, "U": U, "label": label})
|
| 264 |
return dataset_table(), gr.update(value=None)
|
| 265 |
|
| 266 |
+
# ── 自動録音: ストリーミング蓄積 + 無音検出 ──
|
| 267 |
+
|
| 268 |
+
SILENCE_THRESH = 0.012
|
| 269 |
+
SILENCE_CHUNKS_NEEDED = 3 # 約1.5秒(stream_every=0.5sなので 0.5×3=1.5s)
|
| 270 |
+
MIN_SPEECH_CHUNKS = 2 # 最低2チャンク分の音声
|
| 271 |
+
|
| 272 |
+
def _new_rec_state():
|
| 273 |
+
return {"chunks": [], "speech_detected": False, "silence_count": 0, "saved": False, "status": "待機中"}
|
| 274 |
+
|
| 275 |
+
def rec_stream_cb(chunk, label, rec_state):
|
| 276 |
+
"""ストリーミング録音: 音声蓄積 + 無音検出 → 自動保存"""
|
| 277 |
+
if rec_state is None:
|
| 278 |
+
rec_state = _new_rec_state()
|
| 279 |
+
|
| 280 |
+
if rec_state.get("saved", False):
|
| 281 |
+
# 前回保存済み → リセット
|
| 282 |
+
rec_state = _new_rec_state()
|
| 283 |
+
|
| 284 |
+
if chunk is None:
|
| 285 |
+
return rec_state, dataset_table(), "待機中... マイクを開始してください"
|
| 286 |
+
|
| 287 |
+
sr, y = chunk
|
| 288 |
+
if y is None or len(y) < 10:
|
| 289 |
+
return rec_state, gr.update(), rec_state.get("status", "")
|
| 290 |
+
|
| 291 |
+
y = _mono_float32(y)
|
| 292 |
+
|
| 293 |
+
# RMS計算
|
| 294 |
+
rms = float(np.sqrt(np.mean(y ** 2)))
|
| 295 |
+
|
| 296 |
+
if rms > SILENCE_THRESH:
|
| 297 |
+
# 音声あり
|
| 298 |
+
rec_state["chunks"].append((sr, y.copy()))
|
| 299 |
+
rec_state["speech_detected"] = True
|
| 300 |
+
rec_state["silence_count"] = 0
|
| 301 |
+
n = len(rec_state["chunks"])
|
| 302 |
+
rec_state["status"] = f"録音中... 🎙️ ({n} chunks)"
|
| 303 |
+
else:
|
| 304 |
+
# 無音
|
| 305 |
+
if rec_state["speech_detected"]:
|
| 306 |
+
rec_state["silence_count"] = rec_state.get("silence_count", 0) + 1
|
| 307 |
+
remaining = max(0, SILENCE_CHUNKS_NEEDED - rec_state["silence_count"])
|
| 308 |
+
rec_state["status"] = f"無音検出中... あと{remaining}で自動保存"
|
| 309 |
+
|
| 310 |
+
if rec_state["silence_count"] >= SILENCE_CHUNKS_NEEDED:
|
| 311 |
+
# 十分な音声があれば保存
|
| 312 |
+
if len(rec_state["chunks"]) >= MIN_SPEECH_CHUNKS:
|
| 313 |
+
label = (label or "").strip()
|
| 314 |
+
if label in LABELS:
|
| 315 |
+
# チャンクを結合
|
| 316 |
+
all_y = []
|
| 317 |
+
final_sr = SR
|
| 318 |
+
for s, y_chunk in rec_state["chunks"]:
|
| 319 |
+
if s != SR:
|
| 320 |
+
y_chunk = librosa.resample(y_chunk, orig_sr=s, target_sr=SR)
|
| 321 |
+
all_y.append(y_chunk)
|
| 322 |
+
full_y = np.concatenate(all_y).astype(np.float32)
|
| 323 |
+
full_y /= (np.max(np.abs(full_y)) + 1e-9)
|
| 324 |
+
audio_n = (SR, full_y)
|
| 325 |
+
U = audio_to_sequence(audio_n)
|
| 326 |
+
if U is not None and len(U) >= 5:
|
| 327 |
+
DATA.append({"audio": audio_n, "U": U, "label": label})
|
| 328 |
+
rec_state["status"] = f"✓ 自動保存完了 (idx={len(DATA)-1})"
|
| 329 |
+
else:
|
| 330 |
+
rec_state["status"] = "音声が短すぎます"
|
| 331 |
+
else:
|
| 332 |
+
rec_state["status"] = "ラベルを選択してください"
|
| 333 |
+
|
| 334 |
+
rec_state["saved"] = True
|
| 335 |
+
rec_state["chunks"] = []
|
| 336 |
+
rec_state["speech_detected"] = False
|
| 337 |
+
rec_state["silence_count"] = 0
|
| 338 |
+
return rec_state, dataset_table(), rec_state["status"]
|
| 339 |
+
else:
|
| 340 |
+
rec_state["status"] = "音声が短すぎます。もう一度話してください"
|
| 341 |
+
rec_state["chunks"] = []
|
| 342 |
+
rec_state["speech_detected"] = False
|
| 343 |
+
rec_state["silence_count"] = 0
|
| 344 |
+
else:
|
| 345 |
+
rec_state["status"] = "待機中... 話してください 🎤"
|
| 346 |
+
|
| 347 |
+
return rec_state, gr.update(), rec_state.get("status", "")
|
| 348 |
+
|
| 349 |
def undo_last_cb():
|
| 350 |
if len(DATA) == 0:
|
| 351 |
return dataset_table()
|
|
|
|
| 408 |
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 409 |
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 410 |
<link href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:wght@300;400;500;600&display=swap" rel="stylesheet">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
"""
|
| 412 |
|
| 413 |
CSS = """
|
|
|
|
| 910 |
}
|
| 911 |
|
| 912 |
/* ========================================
|
| 913 |
+
自動録音: ステータス表示
|
| 914 |
======================================== */
|
| 915 |
+
.rec-status {
|
| 916 |
text-align: center;
|
| 917 |
+
padding: 8px 12px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 918 |
font-family: 'Cormorant Garamond', 'Georgia', serif;
|
| 919 |
+
font-size: 14px;
|
| 920 |
+
color: #4a6a4a;
|
| 921 |
+
letter-spacing: 0.06em;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
}
|
| 923 |
@keyframes recPulse {
|
| 924 |
0%, 100% { opacity: 1; }
|
| 925 |
50% { opacity: 0.6; }
|
| 926 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 927 |
|
| 928 |
/* ========================================
|
| 929 |
スクロールバー
|
|
|
|
| 1037 |
label_box = gr.Textbox(label="新ラベル", placeholder="例: yes", scale=3)
|
| 1038 |
add_btn = gr.Button("追加", size="lg", scale=1)
|
| 1039 |
|
| 1040 |
+
# 録音 & サンプル追加(自動停止)
|
| 1041 |
+
gr.Markdown("### 録音(自動停止)")
|
| 1042 |
label_dd = gr.Radio(choices=LABELS, label="ラベル選択", interactive=True, elem_classes=["diamond-radio"])
|
| 1043 |
+
rec_audio = gr.Audio(sources=["microphone"], streaming=True, type="numpy", label="マイク(開始で録音)")
|
| 1044 |
+
rec_status_md = gr.Markdown("待機中... マイクを開始してください", elem_classes=["rec-status"])
|
| 1045 |
+
rec_state = gr.State(None)
|
| 1046 |
+
# 隠し(手動追加用に残す)
|
| 1047 |
+
audio_rec = gr.Audio(type="numpy", visible=False)
|
| 1048 |
+
add_sample_btn = gr.Button("追加", variant="primary", size="lg", visible=False)
|
| 1049 |
+
undo_btn = gr.Button("Undo", size="lg")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1050 |
|
| 1051 |
# データ一覧 & 編集
|
| 1052 |
gr.Markdown("### データ一覧")
|
|
|
|
| 1055 |
value=dataset_table(),
|
| 1056 |
datatype=["number", "str", "number"],
|
| 1057 |
row_count=(6, "dynamic"),
|
| 1058 |
+
column_count=(3, "fixed"),
|
| 1059 |
interactive=False,
|
| 1060 |
elem_id="data_table"
|
| 1061 |
)
|
|
|
|
| 1084 |
add_btn.click(add_label_cb, inputs=[label_box], outputs=[label_dd, table, relabel_dd])
|
| 1085 |
add_sample_btn.click(add_sample_cb, inputs=[audio_rec, label_dd], outputs=[table, audio_rec])
|
| 1086 |
undo_btn.click(undo_last_cb, inputs=[], outputs=[table])
|
|
|
|
| 1087 |
reset_btn.click(reset_all_cb, inputs=[], outputs=[table, label_dd, relabel_dd, audio_rec, selected_idx_state])
|
| 1088 |
|
| 1089 |
+
# ストリーミング録音 → 自動停止 & 保存
|
| 1090 |
+
rec_audio.stream(rec_stream_cb, inputs=[rec_audio, label_dd, rec_state], outputs=[rec_state, table, rec_status_md], stream_every=0.5)
|
| 1091 |
+
|
| 1092 |
# select row -> update state + replay + relabel dropdown value
|
| 1093 |
def _select_and_store(evt: gr.SelectData):
|
| 1094 |
if evt is None or evt.index is None:
|