Spaces:

wmoto-ai
/

moonshine-tiny-ja-demo

Running

App Files Files Community

wmoto-ai commited on 13 days ago

Commit

cb5e942

verified ·

1 Parent(s): 4ddfbcd

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

README.md +31 -4
index.html +472 -18

README.md CHANGED Viewed

@@ -1,10 +1,37 @@
 ---
-title: Moonshine Tiny Ja Demo
-emoji: 🌖
 colorFrom: blue
-colorTo: red
 sdk: static
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Moonshine Tiny JA
+emoji: 🎤
 colorFrom: blue
+colorTo: green
 sdk: static
 pinned: false
+license: other
+license_name: moonshine-ai-community-license
+license_link: https://github.com/usefulsensors/moonshine/blob/main/LICENSE
+short_description: ブラウザで動作する日本語リアルタイム音声認識
 ---
+# Moonshine Tiny JA Demo
+ブラウザ上で動作する日本語リアルタイム文字起こしデモです。
+## 特徴
+- **完全ローカル処理**: 音声データはサーバーに送信されません
+- **リアルタイム**: 指定間隔（1〜6秒）ごとに文字起こし
+- **軽量**: 27Mパラメータの小型モデル
+## 使用モデル
+- [wmoto-ai/moonshine-tiny-ja-ONNX](https://huggingface.co/wmoto-ai/moonshine-tiny-ja-ONNX)
+- ベース: [UsefulSensors/moonshine-tiny-ja](https://huggingface.co/UsefulSensors/moonshine-tiny-ja)
+## 技術スタック
+- [Transformers.js](https://huggingface.co/docs/transformers.js) - ブラウザでのML推論
+- ONNX Runtime Web - モデル実行
+- Web Audio API - 音声処理
+## ライセンス
+[Moonshine AI Community License](https://github.com/usefulsensors/moonshine/blob/main/LICENSE)

index.html CHANGED Viewed

@@ -1,19 +1,473 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
 </html>

+<!DOCTYPE html>
+<html lang="ja">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Moonshine Tiny JA - リアルタイム日本語文字起こし</title>
+  <meta name="description" content="ブラウザ上で動作する日本語音声認識デモ。Moonshine Tiny JAモデルをTransformers.jsで実行。">
+  <style>
+    * {
+      box-sizing: border-box;
+    }
+    body {
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+      max-width: 800px;
+      margin: 0 auto;
+      padding: 20px;
+      background: #1a1a2e;
+      color: #eee;
+      min-height: 100vh;
+    }
+    h1 {
+      text-align: center;
+      color: #00d4ff;
+      margin-bottom: 5px;
+    }
+    .subtitle {
+      text-align: center;
+      color: #888;
+      font-size: 14px;
+      margin-bottom: 20px;
+    }
+    .status {
+      text-align: center;
+      padding: 10px;
+      border-radius: 8px;
+      margin: 20px 0;
+      background: #16213e;
+    }
+    .status.loading {
+      color: #ffa500;
+    }
+    .status.ready {
+      color: #00ff88;
+    }
+    .status.recording {
+      color: #ff4757;
+    }
+    .status.error {
+      color: #ff4757;
+      background: #2d1f1f;
+    }
+    button {
+      display: block;
+      width: 200px;
+      margin: 20px auto;
+      padding: 15px 30px;
+      font-size: 18px;
+      border: none;
+      border-radius: 50px;
+      cursor: pointer;
+      transition: all 0.3s;
+    }
+    button:disabled {
+      background: #555;
+      cursor: not-allowed;
+    }
+    #startBtn {
+      background: linear-gradient(135deg, #00d4ff, #00ff88);
+      color: #1a1a2e;
+      font-weight: bold;
+    }
+    #startBtn:hover:not(:disabled) {
+      transform: scale(1.05);
+      box-shadow: 0 0 20px rgba(0, 212, 255, 0.5);
+    }
+    #startBtn.recording {
+      background: linear-gradient(135deg, #ff4757, #ff6b81);
+      animation: pulse 1s infinite;
+    }
+    @keyframes pulse {
+      0%, 100% {
+        box-shadow: 0 0 0 0 rgba(255, 71, 87, 0.4);
+      }
+      50% {
+        box-shadow: 0 0 0 15px rgba(255, 71, 87, 0);
+      }
+    }
+    .slider-container {
+      margin: 20px 0;
+      padding: 15px;
+      background: #16213e;
+      border-radius: 8px;
+    }
+    .slider-container label {
+      display: block;
+      margin-bottom: 10px;
+    }
+    .slider-container input[type="range"] {
+      width: 100%;
+      cursor: pointer;
+    }
+    .slider-labels {
+      display: flex;
+      justify-content: space-between;
+      font-size: 12px;
+      color: #888;
+      margin-top: 5px;
+    }
+    #transcript {
+      background: #16213e;
+      border-radius: 12px;
+      padding: 20px;
+      min-height: 200px;
+      margin-top: 20px;
+      font-size: 18px;
+      line-height: 1.8;
+      white-space: pre-wrap;
+      word-wrap: break-word;
+    }
+    #transcript:empty::before {
+      content: "文字起こし結果がここに表示されます...";
+      color: #666;
+    }
+    #currentText {
+      color: #00d4ff;
+      font-style: italic;
+      min-height: 30px;
+      margin-top: 10px;
+      text-align: center;
+    }
+    .info {
+      background: #16213e;
+      border-radius: 8px;
+      padding: 15px;
+      margin: 20px 0;
+      font-size: 14px;
+      color: #aaa;
+    }
+    .info a {
+      color: #00d4ff;
+      text-decoration: none;
+    }
+    .info a:hover {
+      text-decoration: underline;
+    }
+    .progress-container {
+      background: #0f0f23;
+      border-radius: 10px;
+      height: 20px;
+      margin: 10px 0;
+      overflow: hidden;
+    }
+    .progress-bar {
+      height: 100%;
+      background: linear-gradient(90deg, #00d4ff, #00ff88);
+      width: 0%;
+      transition: width 0.3s;
+    }
+    .footer {
+      text-align: center;
+      margin-top: 30px;
+      padding-top: 20px;
+      border-top: 1px solid #333;
+      font-size: 12px;
+      color: #666;
+    }
+    .footer a {
+      color: #00d4ff;
+      text-decoration: none;
+    }
+  </style>
+</head>
+<body>
+  <h1>Moonshine Tiny JA</h1>
+  <p class="subtitle">ブラウザで動作する日本語リアルタイム文字起こし</p>
+  <div id="status" class="status loading">
+    モデルを読み込み中...
+    <div class="progress-container">
+      <div id="progressBar" class="progress-bar"></div>
+    </div>
+  </div>
+  <button id="startBtn" disabled>読み込み中...</button>
+  <div class="slider-container">
+    <label for="intervalSlider">
+      録音間隔: <span id="intervalValue">3</span>秒
+    </label>
+    <input type="range" id="intervalSlider" min="1" max="6" step="0.5" value="3">
+    <div class="slider-labels">
+      <span>1秒 (高速)</span>
+      <span>6秒 (高精度)</span>
+    </div>
+  </div>
+  <div id="currentText"></div>
+  <div id="transcript"></div>
+  <div class="info">
+    <strong>使い方:</strong><br>
+    1. モデルの読み込みを待つ（初回は数分かかります）<br>
+    2. 「録音開始」ボタンをクリック<br>
+    3. マイクに向かって話す<br>
+    4. 指定間隔ごとに文字起こし結果が表示されます<br><br>
+    <strong>モデル:</strong> <a href="https://huggingface.co/wmoto-ai/moonshine-tiny-ja-ONNX" target="_blank">wmoto-ai/moonshine-tiny-ja-ONNX</a><br>
+    <strong>ベース:</strong> <a href="https://huggingface.co/UsefulSensors/moonshine-tiny-ja" target="_blank">UsefulSensors/moonshine-tiny-ja</a>
+  </div>
+  <div class="footer">
+    Powered by <a href="https://www.moonshine.ai/" target="_blank">Moonshine AI</a> |
+    <a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a><br>
+    Licensed under <a href="https://github.com/usefulsensors/moonshine/blob/main/LICENSE" target="_blank">Moonshine AI Community License</a>
+  </div>
+  <script type="module">
+    import {
+      MoonshineForConditionalGeneration,
+      AutoProcessor,
+      AutoTokenizer,
+    } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3';
+    const statusEl = document.getElementById('status');
+    const startBtn = document.getElementById('startBtn');
+    const transcriptEl = document.getElementById('transcript');
+    const currentTextEl = document.getElementById('currentText');
+    const progressBar = document.getElementById('progressBar');
+    const intervalSlider = document.getElementById('intervalSlider');
+    const intervalValue = document.getElementById('intervalValue');
+    intervalSlider.addEventListener('input', () => {
+      intervalValue.textContent = intervalSlider.value;
+    });
+    let model = null;
+    let processor = null;
+    let tokenizer = null;
+    let isRecording = false;
+    let mediaRecorder = null;
+    let audioContext = null;
+    let audioChunks = [];
+    const MODEL_ID = 'wmoto-ai/moonshine-tiny-ja-ONNX';
+    async function loadModel() {
+      try {
+        statusEl.textContent = 'モデルを読み込み中... (初回は数分かかることがあります)';
+        const progressCallback = (progress) => {
+          if (progress.status === 'progress') {
+            const percent = Math.round((progress.loaded / progress.total) * 100);
+            progressBar.style.width = percent + '%';
+            statusEl.textContent = `モデルを読み込み中... ${percent}%`;
+          }
+        };
+        [model, processor, tokenizer] = await Promise.all([
+          MoonshineForConditionalGeneration.from_pretrained(MODEL_ID, {
+            dtype: 'fp32',
+            progress_callback: progressCallback
+          }),
+          AutoProcessor.from_pretrained(MODEL_ID),
+          AutoTokenizer.from_pretrained(MODEL_ID)
+        ]);
+        statusEl.textContent = '準備完了！録音を開始できます';
+        statusEl.className = 'status ready';
+        startBtn.textContent = '録音開始';
+        startBtn.disabled = false;
+      } catch (error) {
+        console.error('Model loading error:', error);
+        statusEl.textContent = `エラー: ${error.message}`;
+        statusEl.className = 'status error';
+      }
+    }
+    async function transcribeAudio(audioBlob) {
+      if (!model || !processor || !tokenizer) return;
+      try {
+        currentTextEl.textContent = '処理中...';
+        const arrayBuffer = await audioBlob.arrayBuffer();
+        if (!audioContext) {
+          audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
+        }
+        const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
+        const targetSampleRate = 16000;
+        const offlineCtx = new OfflineAudioContext(
+          1,
+          Math.ceil(audioBuffer.duration * targetSampleRate),
+          targetSampleRate
+        );
+        const source = offlineCtx.createBufferSource();
+        source.buffer = audioBuffer;
+        source.connect(offlineCtx.destination);
+        source.start();
+        const resampled = await offlineCtx.startRendering();
+        const audioData = resampled.getChannelData(0);
+        if (audioData.length < 1600) {
+          currentTextEl.textContent = '(音声が短すぎます)';
+          return;
+        }
+        let maxLevel = 0;
+        let sumSquares = 0;
+        for (let i = 0; i < audioData.length; i++) {
+          const abs = Math.abs(audioData[i]);
+          if (abs > maxLevel) maxLevel = abs;
+          sumSquares += audioData[i] * audioData[i];
+        }
+        const rms = Math.sqrt(sumSquares / audioData.length);
+        if (rms < 0.01 || maxLevel < 0.05) {
+          currentTextEl.textContent = '(音声が小さすぎます)';
+          return;
+        }
+        const inputs = await processor(audioData);
+        const intervalSec = parseFloat(intervalSlider.value);
+        const maxTokens = Math.min(Math.round(intervalSec * 25), 150);
+        const outputs = await model.generate({
+          ...inputs,
+          max_new_tokens: maxTokens,
+        });
+        let text = tokenizer.decode(outputs[0], { skip_special_tokens: true }).trim();
+        const repeatPattern = /(.{2,}?)\1{4,}/;
+        if (repeatPattern.test(text)) {
+          text = text.replace(/(.{2,}?)\1{3,}/g, '$1');
+        }
+        const hallucinations = ['彼は私', '彼女は私', 'そう、そう'];
+        const isHallucination = hallucinations.some(h => text.includes(h) && text.length > 30);
+        if (text && !isHallucination) {
+          currentTextEl.textContent = text;
+          transcriptEl.textContent += text + '\n';
+        } else if (isHallucination) {
+          currentTextEl.textContent = '(ノイズ検出)';
+        } else {
+          currentTextEl.textContent = '(音声が検出されませんでした)';
+        }
+      } catch (error) {
+        console.error('Transcription error:', error);
+        currentTextEl.textContent = `エラー: ${error.message}`;
+      }
+    }
+    async function startRecording() {
+      try {
+        const stream = await navigator.mediaDevices.getUserMedia({
+          audio: {
+            channelCount: 1,
+            sampleRate: 16000,
+          }
+        });
+        audioChunks = [];
+        mediaRecorder = new MediaRecorder(stream, {
+          mimeType: 'audio/webm;codecs=opus'
+        });
+        mediaRecorder.ondataavailable = (event) => {
+          if (event.data.size > 0) {
+            audioChunks.push(event.data);
+          }
+        };
+        const processAndRestart = async () => {
+          if (!isRecording) return;
+          mediaRecorder.stop();
+        };
+        mediaRecorder.onstop = async () => {
+          if (audioChunks.length > 0 && isRecording) {
+            const audioBlob = new Blob(audioChunks, { type: 'audio/webm;codecs=opus' });
+            audioChunks = [];
+            await transcribeAudio(audioBlob);
+            if (isRecording && mediaRecorder.stream.active) {
+              const intervalMs = parseFloat(intervalSlider.value) * 1000;
+              mediaRecorder.start(500);
+              setTimeout(processAndRestart, intervalMs);
+            }
+          }
+        };
+        mediaRecorder.start(500);
+        isRecording = true;
+        const intervalMs = parseFloat(intervalSlider.value) * 1000;
+        setTimeout(processAndRestart, intervalMs);
+        statusEl.textContent = '録音中... マイクに向かって話してください';
+        statusEl.className = 'status recording';
+        startBtn.textContent = '録音停止';
+        startBtn.classList.add('recording');
+      } catch (error) {
+        console.error('Recording error:', error);
+        statusEl.textContent = `マイクエラー: ${error.message}`;
+        statusEl.className = 'status error';
+      }
+    }
+    function stopRecording() {
+      if (mediaRecorder && mediaRecorder.state !== 'inactive') {
+        mediaRecorder.stop();
+        mediaRecorder.stream.getTracks().forEach(track => track.stop());
+      }
+      isRecording = false;
+      audioChunks = [];
+      statusEl.textContent = '録音停止。再開するにはボタンをクリック';
+      statusEl.className = 'status ready';
+      startBtn.textContent = '録音開始';
+      startBtn.classList.remove('recording');
+      currentTextEl.textContent = '';
+    }
+    startBtn.addEventListener('click', () => {
+      if (isRecording) {
+        stopRecording();
+      } else {
+        startRecording();
+      }
+    });
+    loadModel();
+  </script>
+</body>
 </html>