Spaces:

missvector
/

audio_to_text

Sleeping

App Files Files Community

missvector commited on Feb 12

Commit

8e749fb

1 Parent(s): 3b446d1

fix: use window.location.origin for transcribe endpoint

Browse files

Files changed (3) hide show

Dockerfile +9 -3
app.py +53 -34
static/script.js +1 -1

Dockerfile CHANGED Viewed

@@ -4,17 +4,23 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y \
     ffmpeg \
-    gcc \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py .
 COPY index.html .
-COPY static ./static
 ENV PORT=7860
 EXPOSE 7860
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "300"]

 RUN apt-get update && apt-get install -y \
     ffmpeg \
+    wget \
+    unzip \
     && rm -rf /var/lib/apt/lists/*
+# Скачиваем Vosk модель при сборке
+RUN wget https://alphacephei.com/vosk/models/vosk-model-small-ru-0.22.zip \
+    && unzip vosk-model-small-ru-0.22.zip \
+    && rm vosk-model-small-ru-0.22.zip
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py .
 COPY index.html .
+COPY static ./static
 ENV PORT=7860
 EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,16 +1,15 @@
 import os
-import tempfile
 import json
 from fastapi import FastAPI, File, UploadFile
-from fastapi.responses import HTMLResponse, StreamingResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
-from faster_whisper import WhisperModel
 import shutil
-from pathlib import Path
 import asyncio
-from concurrent.futures import ThreadPoolExecutor
-from functools import partial
 app = FastAPI()
@@ -23,15 +22,11 @@ app.add_middleware(
 app.mount("/static", StaticFiles(directory="static"), name="static")
-print("Loading faster-whisper-tiny...")
-model = WhisperModel(
-    "tiny",
-    device="cpu",
-    compute_type="int8"
-)
-executor = ThreadPoolExecutor(max_workers=1)
-print("Model ready")
 @app.get("/", response_class=HTMLResponse)
 async def root():
@@ -39,36 +34,60 @@ async def root():
         return f.read()
 @app.post("/transcribe")
-async def transcribe_audio_stream(file: UploadFile = File(...)):
     async def generate():
         try:
             suffix = Path(file.filename).suffix
             with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
-                shutil.copyfileobj(file.file, tmp)
                 tmp_path = tmp.name
-            yield json.dumps({"type": "status", "text": "Loading model..."}) + "\n"
-            loop = asyncio.get_event_loop()
-            segments = await loop.run_in_executor(
-                executor,
-                partial(model.transcribe, tmp_path, language="ru", word_timestamps=False)
-            )
-            full_text = ""
-            for segment in segments:
-                full_text += segment.text + " "
-                yield json.dumps({
-                    "type": "segment",
-                    "text": full_text.strip(),
-                    "partial": segment.text
-                }) + "\n"
-            Path(tmp_path).unlink()
-            yield json.dumps({"type": "done", "text": full_text.strip()}) + "\n"
         except Exception as e:
-            yield json.dumps({"type": "error", "text": f"Error: {str(e)}"}) + "\n"
         finally:
             file.file.close()

 import os
 import json
+import wave
+import tempfile
+import subprocess
+from pathlib import Path
 from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
 import shutil
 import asyncio
 app = FastAPI()
 app.mount("/static", StaticFiles(directory="static"), name="static")
+# Vosk инициализируется мгновенно, 40MB
+from vosk import Model, KaldiRecognizer
+print("🟢 Loading Vosk model...")
+model = Model("vosk-model-small-ru-0.22")
+print("✅ Vosk ready!")
 @app.get("/", response_class=HTMLResponse)
 async def root():
         return f.read()
 @app.post("/transcribe")
+async def transcribe_audio(file: UploadFile = File(...)):
     async def generate():
         try:
+            yield json.dumps({"type": "status", "text": "🔄 Конвертация аудио..."}) + "\n"
             suffix = Path(file.filename).suffix
             with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                shutil.copyfileobj(file.file, tmp)
                 tmp_path = tmp.name
+            # Конвертируем в WAV 16kHz моно
+            wav_path = tmp_path + ".wav"
+            subprocess.run([
+                "ffmpeg", "-i", tmp_path,
+                "-ar", "16000", "-ac", "1",
+                "-acodec", "pcm_s16le",
+                wav_path, "-y", "-loglevel", "quiet"
+            ])
+            yield json.dumps({"type": "status", "text": "🔄 Распознавание речи..."}) + "\n"
+            # Открываем WAV и распознаем
+            wf = wave.open(wav_path, "rb")
+            rec = KaldiRecognizer(model, wf.getframerate())
+            rec.SetWords(False)
+            text = ""
+            while True:
+                data = wf.readframes(4000)
+                if len(data) == 0:
+                    break
+                if rec.AcceptWaveform(data):
+                    result = json.loads(rec.Result())
+                    part = result.get("text", "")
+                    if part:
+                        text += part + " "
+                        yield json.dumps({
+                            "type": "segment",
+                            "text": text.strip()
+                        }) + "\n"
+            # Финальный результат
+            final = json.loads(rec.FinalResult())
+            text += final.get("text", "")
+            text = text.strip()
+            # Чистим файлы
+            Path(tmp_path).unlink(missing_ok=True)
+            Path(wav_path).unlink(missing_ok=True)
+            yield json.dumps({"type": "done", "text": text}) + "\n"
         except Exception as e:
+            yield json.dumps({"type": "error", "text": f"❌ Ошибка: {str(e)}"}) + "\n"
         finally:
             file.file.close()

static/script.js CHANGED Viewed

@@ -110,7 +110,7 @@ async function handleFile(file) {
             }
         }
     } catch (error) {
-        transcriptText.value = 'Ошибка соединения';
         console.error(error);
     }
 }

             }
         }
     } catch (error) {
+        transcriptText.value = '❌ Ошибка соединения';
         console.error(error);
     }
 }