missvector commited on
Commit
8e749fb
·
1 Parent(s): 3b446d1

fix: use window.location.origin for transcribe endpoint

Browse files
Files changed (3) hide show
  1. Dockerfile +9 -3
  2. app.py +53 -34
  3. static/script.js +1 -1
Dockerfile CHANGED
@@ -4,17 +4,23 @@ WORKDIR /app
4
 
5
  RUN apt-get update && apt-get install -y \
6
  ffmpeg \
7
- gcc \
 
8
  && rm -rf /var/lib/apt/lists/*
9
 
 
 
 
 
 
10
  COPY requirements.txt .
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
  COPY app.py .
14
  COPY index.html .
15
- COPY static ./static
16
 
17
  ENV PORT=7860
18
  EXPOSE 7860
19
 
20
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "300"]
 
4
 
5
  RUN apt-get update && apt-get install -y \
6
  ffmpeg \
7
+ wget \
8
+ unzip \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
+ # Скачиваем Vosk модель при сборке
12
+ RUN wget https://alphacephei.com/vosk/models/vosk-model-small-ru-0.22.zip \
13
+ && unzip vosk-model-small-ru-0.22.zip \
14
+ && rm vosk-model-small-ru-0.22.zip
15
+
16
  COPY requirements.txt .
17
  RUN pip install --no-cache-dir -r requirements.txt
18
 
19
  COPY app.py .
20
  COPY index.html .
21
+ COPY static ./static
22
 
23
  ENV PORT=7860
24
  EXPOSE 7860
25
 
26
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -1,16 +1,15 @@
1
  import os
2
- import tempfile
3
  import json
 
 
 
 
4
  from fastapi import FastAPI, File, UploadFile
5
- from fastapi.responses import HTMLResponse, StreamingResponse
6
  from fastapi.staticfiles import StaticFiles
7
  from fastapi.middleware.cors import CORSMiddleware
8
- from faster_whisper import WhisperModel
9
  import shutil
10
- from pathlib import Path
11
  import asyncio
12
- from concurrent.futures import ThreadPoolExecutor
13
- from functools import partial
14
 
15
  app = FastAPI()
16
 
@@ -23,15 +22,11 @@ app.add_middleware(
23
 
24
  app.mount("/static", StaticFiles(directory="static"), name="static")
25
 
26
- print("Loading faster-whisper-tiny...")
27
- model = WhisperModel(
28
- "tiny",
29
- device="cpu",
30
- compute_type="int8"
31
- )
32
-
33
- executor = ThreadPoolExecutor(max_workers=1)
34
- print("Model ready")
35
 
36
  @app.get("/", response_class=HTMLResponse)
37
  async def root():
@@ -39,36 +34,60 @@ async def root():
39
  return f.read()
40
 
41
  @app.post("/transcribe")
42
- async def transcribe_audio_stream(file: UploadFile = File(...)):
43
  async def generate():
44
  try:
 
 
45
  suffix = Path(file.filename).suffix
46
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
47
- shutil.copyfileobj(file.file, tmp)
48
  tmp_path = tmp.name
49
 
50
- yield json.dumps({"type": "status", "text": "Loading model..."}) + "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- loop = asyncio.get_event_loop()
53
- segments = await loop.run_in_executor(
54
- executor,
55
- partial(model.transcribe, tmp_path, language="ru", word_timestamps=False)
56
- )
57
 
58
- full_text = ""
59
- for segment in segments:
60
- full_text += segment.text + " "
61
- yield json.dumps({
62
- "type": "segment",
63
- "text": full_text.strip(),
64
- "partial": segment.text
65
- }) + "\n"
66
 
67
- Path(tmp_path).unlink()
68
- yield json.dumps({"type": "done", "text": full_text.strip()}) + "\n"
69
 
70
  except Exception as e:
71
- yield json.dumps({"type": "error", "text": f"Error: {str(e)}"}) + "\n"
72
  finally:
73
  file.file.close()
74
 
 
1
  import os
 
2
  import json
3
+ import wave
4
+ import tempfile
5
+ import subprocess
6
+ from pathlib import Path
7
  from fastapi import FastAPI, File, UploadFile
8
+ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
9
  from fastapi.staticfiles import StaticFiles
10
  from fastapi.middleware.cors import CORSMiddleware
 
11
  import shutil
 
12
  import asyncio
 
 
13
 
14
  app = FastAPI()
15
 
 
22
 
23
  app.mount("/static", StaticFiles(directory="static"), name="static")
24
 
25
+ # Vosk инициализируется мгновенно, 40MB
26
+ from vosk import Model, KaldiRecognizer
27
+ print("🟢 Loading Vosk model...")
28
+ model = Model("vosk-model-small-ru-0.22")
29
+ print("✅ Vosk ready!")
 
 
 
 
30
 
31
  @app.get("/", response_class=HTMLResponse)
32
  async def root():
 
34
  return f.read()
35
 
36
  @app.post("/transcribe")
37
+ async def transcribe_audio(file: UploadFile = File(...)):
38
  async def generate():
39
  try:
40
+ yield json.dumps({"type": "status", "text": "🔄 Конвертация аудио..."}) + "\n"
41
+
42
  suffix = Path(file.filename).suffix
43
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
44
+ shutil.copyfileobj(file.file, tmp)
45
  tmp_path = tmp.name
46
 
47
+ # Конвертируем в WAV 16kHz моно
48
+ wav_path = tmp_path + ".wav"
49
+ subprocess.run([
50
+ "ffmpeg", "-i", tmp_path,
51
+ "-ar", "16000", "-ac", "1",
52
+ "-acodec", "pcm_s16le",
53
+ wav_path, "-y", "-loglevel", "quiet"
54
+ ])
55
+
56
+ yield json.dumps({"type": "status", "text": "🔄 Распознавание речи..."}) + "\n"
57
+
58
+ # Открываем WAV и распознаем
59
+ wf = wave.open(wav_path, "rb")
60
+ rec = KaldiRecognizer(model, wf.getframerate())
61
+ rec.SetWords(False)
62
+
63
+ text = ""
64
+ while True:
65
+ data = wf.readframes(4000)
66
+ if len(data) == 0:
67
+ break
68
+ if rec.AcceptWaveform(data):
69
+ result = json.loads(rec.Result())
70
+ part = result.get("text", "")
71
+ if part:
72
+ text += part + " "
73
+ yield json.dumps({
74
+ "type": "segment",
75
+ "text": text.strip()
76
+ }) + "\n"
77
 
78
+ # Финальный результат
79
+ final = json.loads(rec.FinalResult())
80
+ text += final.get("text", "")
81
+ text = text.strip()
 
82
 
83
+ # Чистим файлы
84
+ Path(tmp_path).unlink(missing_ok=True)
85
+ Path(wav_path).unlink(missing_ok=True)
 
 
 
 
 
86
 
87
+ yield json.dumps({"type": "done", "text": text}) + "\n"
 
88
 
89
  except Exception as e:
90
+ yield json.dumps({"type": "error", "text": f"❌ Ошибка: {str(e)}"}) + "\n"
91
  finally:
92
  file.file.close()
93
 
static/script.js CHANGED
@@ -110,7 +110,7 @@ async function handleFile(file) {
110
  }
111
  }
112
  } catch (error) {
113
- transcriptText.value = 'Ошибка соединения';
114
  console.error(error);
115
  }
116
  }
 
110
  }
111
  }
112
  } catch (error) {
113
+ transcriptText.value = 'Ошибка соединения';
114
  console.error(error);
115
  }
116
  }