PrashantGoyal commited on
Commit
9430422
·
1 Parent(s): 2f2d263

Smart-prep

Browse files
Files changed (4) hide show
  1. .gitignore +5 -0
  2. App/app.py +77 -0
  3. DockerFile +27 -0
  4. requirements.txt +21 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ /venv
2
+ /__pycache__
3
+ *.pyc
4
+ .env
5
+ .DS_Store
App/app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile
2
+ from fastapi.responses import StreamingResponse
3
+ from faster_whisper import WhisperModel
4
+ import torch
5
+ from transformers import pipeline
6
+ import tempfile
7
+ from scipy.io.wavfile import write as wav_write
8
+ import io
9
+ from io import BytesIO
10
+ import numpy as np
11
+ import uvicorn
12
+
13
+ app = FastAPI()
14
+
15
+
16
+ print("Loading STT model...")
17
+ stt_model = pipeline(
18
+ "automatic-speech-recognition",
19
+ model="openai/whisper-small",
20
+ device="cpu"
21
+ )
22
+
23
+
24
+ print("Loading TTS model...")
25
+ tts = pipeline(
26
+ "text-to-speech",
27
+ model="facebook/mms-tts-eng",
28
+ device=-1
29
+ )
30
+
31
+
32
+ @app.post("/stt")
33
+ async def speech_to_text(file: UploadFile):
34
+ audio_bytes = await file.read()
35
+ audio, sample_rate = sf.read(io.BytesIO(audio_bytes))
36
+ if audio.ndim > 1:
37
+ audio = np.mean(audio, axis=1)
38
+
39
+ result = stt_model({
40
+ "array": audio,
41
+ "sampling_rate": sample_rate
42
+ })
43
+
44
+ return {"text": result["text"]}
45
+
46
+
47
+ @app.post("/tts")
48
+ async def text_to_speech(payload: dict):
49
+ text = payload["text"]
50
+
51
+ out = tts(text)
52
+
53
+ audio = out["audio"]
54
+ sample_rate = int(out["sampling_rate"])
55
+
56
+ audio = np.asarray(audio).squeeze()
57
+
58
+ audio = np.nan_to_num(audio)
59
+
60
+ audio = np.clip(audio, -1.0, 1.0)
61
+ audio = (audio * 32767).astype(np.int16)
62
+
63
+ buffer = BytesIO()
64
+ wav_write(buffer, sample_rate, audio)
65
+ buffer.seek(0)
66
+ return StreamingResponse(
67
+ buffer,
68
+ media_type="audio/wav"
69
+ )
70
+
71
+ @app.get("/")
72
+ def health():
73
+ return {"status": "ok"}
74
+
75
+ if __name__ == "__main__":
76
+ uvicorn.run("App.app:app", host="0.0.0.0", port=8000, reload=True)
77
+
DockerFile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /Transformers
4
+
5
+ # 🔴 REQUIRED system libraries for pyarrow / datasets
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ gcc \
9
+ g++ \
10
+ cmake \
11
+ curl \
12
+ libglib2.0-0 \
13
+ libsm6 \
14
+ libxext6 \
15
+ libxrender-dev \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ COPY requirements.txt .
19
+
20
+ # 🔴 Upgrade tooling and install deps
21
+ RUN python -m pip install --upgrade pip setuptools wheel \
22
+ && python -m pip install -r requirements.txt --no-cache-dir
23
+
24
+ COPY . .
25
+
26
+ CMD ["python", "-m", "App.app"]
27
+
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.1.2
2
+ torchaudio==2.1.2
3
+ numpy==1.26.4
4
+ scipy==1.11.4
5
+ transformers==4.36.2
6
+ tokenizers==0.15.0
7
+ huggingface-hub==0.20.1
8
+ safetensors==0.4.1
9
+ openai-whisper==20231117
10
+ soundfile
11
+ ffmpeg-python
12
+ fastapi
13
+ uvicorn
14
+ python-multipart
15
+ python-dotenv
16
+ tqdm
17
+ regex
18
+ pyyaml
19
+ requests
20
+ threadpoolctl
21
+ joblib